Merge remote-tracking branch 'upstream/master'

2021-05-10 08:11:10 +09:00 · 2021-05-10 08:11:10 +09:00 · d4b251678e
commit d4b251678e
parent f396091bd8 1b8a0f7ae5
105 changed files with 2693 additions and 640 deletions
--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@ -61,6 +61,9 @@ jobs:

  - script: |
      sudo apt --assume-yes install libusb-1.0-0-dev
+      # For opencv-python: setuptools and upgrade
+      sudo apt-get install python3-setuptools
+      python3 -m pip install --upgrade pip
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
      # For running Python API tests
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -30,7 +30,7 @@ message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID
 message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})

 # remove file with exported developer targets to force its regeneration
-file(REMOVE "${CMAKE_BINARY_DIR}/inference_engine_targets.cmake")
+file(REMOVE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")
 foreach(component IN LISTS openvino_export_components)
    file(REMOVE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake")
    unset(${component} CACHE)
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@ -4,7 +4,7 @@

 set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")

-if(CMAKE_CROSSCOMPILING AND LINUX AND X86_64)
+if(CMAKE_CROSSCOMPILING AND CMAKE_HOST_SYSTEM_NAME MATCHES Linux AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
    set(protoc_version "3.7.1")

    RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT
--- a/cmake/developer_package/TBBConfig.cmake
+++ b/cmake/developer_package/TBBConfig.cmake
@ -12,7 +12,7 @@
 #   2) ${TBBROOT} with IE own version of TBBConfig.cmake (actual for TBB < 2017.7)
 #

-## Path to IE own version of TBBConfig.cmake old TBB version without cmake config.
+# Path to IE own version of TBBConfig.cmake old TBB version without cmake config.
 if(APPLE)
    set(IE_OWN_TBB_CONFIG tbb/mac)
 elseif(UNIX)
@ -27,6 +27,7 @@ find_package(TBB
    CONFIG
    PATHS ${TBBROOT}/cmake
          ${IEDevScripts_DIR}/${IE_OWN_TBB_CONFIG}
+    NO_CMAKE_FIND_ROOT_PATH
    NO_DEFAULT_PATH
 )

--- a/cmake/developer_package/features.cmake
+++ b/cmake/developer_package/features.cmake
@ -56,7 +56,7 @@ ie_option (VERBOSE_BUILD "shows extra information about build" OFF)

 ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)

-ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" ON)
+ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" OFF)

 ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF)

--- a/docs/HOWTO/Custom_Layers_Guide.md
+++ b/docs/HOWTO/Custom_Layers_Guide.md
@ -139,6 +139,8 @@ for more details and command line parameters used for the model conversion.
 ```bash
 ./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1
 ```
+> **NOTE:** This conversion guide is applicable for the 2021.3 release of OpenVINO and that starting from 2021.4
+> the OpenVINO supports this model out of the box.

 Model Optimizer produces the following error:
 ```bash
--- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
+++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
@ -160,6 +160,9 @@ Standard TensorFlow\* operations:
 | EuclideanNorm | No |
 | FakeQuantWithMinMaxVars | No |
 | FakeQuantWithMinMaxVarsPerChannel | No |
+| FFT | Supported only when it is part of a sub-graph of the special form |
+| FFT2D | Supported only when it is part of a sub-graph of the special form |
+| FFT3D | Supported only when it is part of a sub-graph of the special form |
 | Fill | No |
 | Floor | No |
 | FloorDiv | No |
@ -172,6 +175,9 @@ Standard TensorFlow\* operations:
 | Greater | No |
 | GreaterEqual | No |
 | Identity | Not needed for shape inference |
+| IFFT | Supported only when it is part of a sub-graph of the special form |
+| IFFT2D | Supported only when it is part of a sub-graph of the special form |
+| IFFT3D | Supported only when it is part of a sub-graph of the special form |
 | LRN | No |
 | Less | No |
 | Log | No |
--- a/docs/install_guides/pypi-openvino-dev.md
+++ b/docs/install_guides/pypi-openvino-dev.md
@ -13,7 +13,7 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
 | Component                                                                                           | Description                                                                                                                                                                                                                                                                                                   |  
 |-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Model Optimizer](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. <br>Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*.                                               |
-| Additional Tools                                   | A set of tools to work with your models including [Accuracy Checker utility](https://docs.openvinotoolkit.org/latest/omz_tools_accuracy_checker.html), [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html)  |
+| Additional Tools                                   | A set of tools to work with your models including [Accuracy Checker utility](https://docs.openvinotoolkit.org/latest/omz_tools_accuracy_checker.html), [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html), [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)                                    |

 **The Runtime Package Includes the Following Components Installed by Dependency:**

@ -23,37 +23,46 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio


 ## System Requirements
+The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8).

 The table below lists the supported operating systems and Python* versions required to run the installation.

 | Supported Operating System                                   | [Python* Version (64-bit)](https://www.python.org/) |
 | :------------------------------------------------------------| :---------------------------------------------------|
-|   Ubuntu* 18.04 long-term support (LTS), 64-bit              | 3.6, 3.7                                            |
-|   Ubuntu* 20.04 long-term support (LTS), 64-bit              | 3.6, 3.7                                            |
-|   Red Hat* Enterprise Linux* 8, 64-bit                       | 3.6, 3.7                                            |
-|   CentOS* 7, 64-bit                                          | 3.6, 3.7                                            |
+|   Ubuntu* 18.04 long-term support (LTS), 64-bit              | 3.6, 3.7, 3.8                                       |
+|   Ubuntu* 20.04 long-term support (LTS), 64-bit              | 3.6, 3.7, 3.8                                       |
+|   Red Hat* Enterprise Linux* 8, 64-bit                       | 3.6, 3.8                                            |
+|   CentOS* 7, 64-bit                                          | 3.6, 3.7, 3.8                                       |
 |   macOS* 10.15.x versions                                    | 3.6, 3.7, 3.8                                       |
 |   Windows 10*, 64-bit                                        | 3.6, 3.7, 3.8                                       |

-> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
+> **NOTE**: This package can be installed on other versions of macOS, Linux and Windows, but only the specific versions above are fully validated.

 ## Install the Developer Package

-### Step 1. Set Up Python Virtual Environment
+### Step 1. Install External Software Dependencies
+
+On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
+
+### Step 2. Set Up Python Virtual Environment

 To avoid dependency conflicts, use a virtual environment. Skip this
   step only if you do want to install all dependencies globally.

 Create virtual environment:
+
+On Linux and macOS:
+```sh
+# Depending on your OS, this step may require installing python3-venv
+python3 -m venv openvino_env
+```
+
+On Windows: 
 ```sh
-python -m pip install --user virtualenv 
 python -m venv openvino_env
 ```

-> **NOTE**: On Linux and macOS, you may need to type `python3` instead of
-`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
-
-### Step 2. Activate Virtual Environment
+### Step 3. Activate Virtual Environment

 On Linux and macOS:
 ```sh
@ -64,14 +73,14 @@ On Windows:
 openvino_env\Scripts\activate
 ```

-### Step 3. Set Up and Update pip to the Highest Version
+### Step 4. Set Up and Update pip to the Highest Version

 Run the command below:
 ```sh
 python -m pip install --upgrade pip
 ```

-### Step 4. Install the Package
+### Step 5. Install the Package

 Run the command below: <br>

@ -79,7 +88,7 @@ Run the command below: <br>
   pip install openvino-dev
   ```

-### Step 5. Verify that the Package is Installed
+### Step 6. Verify that the Package is Installed

 Run the command below (this may take a few seconds):
 ```sh
@ -92,4 +101,3 @@ You will see the help message for Post-Training Optimization Tool if installatio

 - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
 - OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
-
--- a/docs/install_guides/pypi-openvino-rt.md
+++ b/docs/install_guides/pypi-openvino-rt.md
@ -8,7 +8,7 @@ license terms for third party or open source software included in or with the So

 OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud.

-The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
+The Intel® Distribution of OpenVINO™ toolkit\*:
 - Enables CNN-based deep learning inference on the edge
 - Supports heterogeneous execution across Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
 - Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
@ -20,15 +20,16 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
 | [Inference Engine](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_inference_engine_intro.html)               | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications.                                                                                                                                                                |

 ## System Requirements
+The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8).

-The table below lists the supported operating systems and Python* versions required to run the installation.
+The table below lists supported operating systems and Python* versions required to run the installation.

 | Supported Operating System                                   | [Python* Version (64-bit)](https://www.python.org/) |
 | :------------------------------------------------------------| :---------------------------------------------------|
-|   Ubuntu* 18.04 long-term support (LTS), 64-bit              | 3.6, 3.7                                            |
-|   Ubuntu* 20.04 long-term support (LTS), 64-bit              | 3.6, 3.7                                            |
-|   Red Hat* Enterprise Linux* 8, 64-bit                       | 3.6, 3.7                                            |
-|   CentOS* 7, 64-bit                                          | 3.6, 3.7                                            |
+|   Ubuntu* 18.04 long-term support (LTS), 64-bit              | 3.6, 3.7, 3.8                                       |
+|   Ubuntu* 20.04 long-term support (LTS), 64-bit              | 3.6, 3.7, 3.8                                       |
+|   Red Hat* Enterprise Linux* 8, 64-bit                       | 3.6, 3.8                                            |
+|   CentOS* 7, 64-bit                                          | 3.6, 3.7, 3.8                                       |
 |   macOS* 10.15.x versions                                    | 3.6, 3.7, 3.8                                       |
 |   Windows 10*, 64-bit                                        | 3.6, 3.7, 3.8                                       |

@ -36,7 +37,11 @@ The table below lists the supported operating systems and Python* versions requi

 ## Install the Runtime Package

-### Step 1. Set Up Python Virtual Environment
+### Step 1. Install External Software Dependencies
+
+On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
+
+### Step 2. Set Up Python Virtual Environment

 To avoid dependency conflicts, use a virtual environment. Skip this
   step only if you do want to install all dependencies globally.
@ -50,7 +55,7 @@ python -m venv openvino_env
 > **NOTE**: On Linux and macOS, you may need to type `python3` instead of
 `python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).

-### Step 2. Activate Virtual Environment
+### Step 3. Activate Virtual Environment

 On Linux and macOS:
 ```sh
@ -61,14 +66,14 @@ On Windows:
 openvino_env\Scripts\activate
 ```

-### Step 3. Set Up and Update pip to the Highest Version
+### Step 4. Set Up and Update pip to the Highest Version

 Run the command below:
 ```sh
 python -m pip install --upgrade pip
 ```

-### Step 4. Install the Package
+### Step 5. Install the Package

 Run the command below: <br>

@ -76,7 +81,7 @@ Run the command below: <br>
   pip install openvino
   ```

-### Step 5. Verify that the Package is Installed
+### Step 6. Verify that the Package is Installed

 Run the command below:
 ```sh
--- a/docs/ops/normalization/BatchNormInference_1.md
+++ b/docs/ops/normalization/BatchNormInference_1.md
@ -4,39 +4,33 @@

 **Category**: *Normalization*

-**Short description**: *BatchNormInference* layer normalizes a `input` tensor by `mean` and `variance`, and applies a scale (`gamma`) to it, as well as an offset (`beta`).
+**Short description**: *BatchNormInference* performs Batch Normalization operation described in the [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167v2) article.

-**Attributes**:
+**Detailed Description**

-* *epsilon*
-  * **Description**: *epsilon* is the number to be added to the variance to avoid division by zero when normalizing a value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance.
-  * **Range of values**: a positive floating-point number
-  * **Type**: `float`
-  * **Default value**: None
-  * **Required**: *yes*
+*BatchNormInference* performs the following operations on a given data batch input tensor `data`:

-**Inputs**
+* Normalizes each activation \f$x^{(k)}\f$ by the mean and variance.
+\f[
+   \hat{x}^{(k)}=\frac{x^{(k)} - E[x^{(k)}]}{\sqrt{Var(x^{(k)}) + \epsilon}}
+\f]
+where \f$E[x^{(k)}]\f$ and \f$Var(x^{(k)})\f$ are the mean and variance, calculated per channel axis of `data` input, and correspond to `mean` and `variance` inputs, respectively. Additionally, \f$\epsilon\f$ is a value added to the variance for numerical stability and corresponds to `epsilon` attribute.

-* **1**: `input` - input tensor with data for normalization. At least a 2D tensor of type T, the second dimension represents the channel axis and must have a span of at least 1. **Required.**
-* **2**: `gamma` - gamma scaling for normalized value. A 1D tensor of type T with the same span as input's channel axis. **Required.**
-* **3**: `beta` - bias added to the scaled normalized value. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
-* **4**: `mean` - value for mean normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
-* **5**: `variance` - value for variance normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
-
-**Outputs**
-
-* **1**: The result of normalization. A tensor of the same type and shape with 1st input tensor.
-
-**Types**
-
-* *T*: any numeric type.
+* Performs linear transformation of each normalized activation based on `gamma` and `beta` input, representing the scaling factor and shift, respectively.
+\f[
+   \hat{y}^{(k)}=\gamma^{(k)}\hat{x}^{(k)} + \beta^{(k)}
+\f]
+where \f$\gamma^{(k)}\f$ and \f$\beta^{(k)}\f$ are learnable parameters, calculated per channel axis, and correspond to `gamma` and `beta` inputs.

 **Mathematical Formulation**

-*BatchNormInference*  normalizes the output in each hidden layer.
+Let `x` be a *d*-dimensional input, \f$x=(x_{1}\dotsc x_{d})\f$. Since normalization is applied to each activation \f$E[x^{(k)}]\f$, you can focus on a particular activation and omit k.
+
+For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values. *BatchNormInference* performs Batch Normalization algorithm as follows:
+
 *   **Input**: Values of \f$x\f$ over a mini-batch:
    \f[
-    \beta = \{ x_{1...m} \}
+    \mathcal{B} = \{ x_{1...m} \}
    \f]
 *   **Parameters to learn**: \f$ \gamma, \beta\f$
 *   **Output**:
@ -45,22 +39,81 @@
    \f]
 *   **Mini-batch mean**:
    \f[
-    \mu_{\beta} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
+    \mu_{\mathcal{B}} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
    \f]
 *   **Mini-batch variance**:
    \f[
-    \sigma_{\beta }^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\beta} )^{2}
+    \sigma_{\mathcal{B}}^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\mathcal{B}})^{2}
    \f]
 *   **Normalize**:
    \f[
-    \hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\beta}}{\sqrt{\sigma_{\beta }^{2} + \epsilon }}
+    \hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\mathcal{B}}}{\sqrt{\sigma_{\mathcal{B}}^{2} + \epsilon }}
    \f]
 *   **Scale and shift**:
    \f[
    o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} )
    \f]

-**Example**
+**Attributes**:
+
+* *epsilon*
+  * **Description**: *epsilon* is a constant added to the variance for numerical stability.
+  * **Range of values**: a positive floating-point number
+  * **Type**: `float`
+  * **Default value**: none
+  * **Required**: *yes*
+
+**Inputs**
+
+* **1**: `data` - A tensor of type *T* and at least rank 2. The second dimension represents the channel axis and must have a span of at least 1. **Required.**
+* **2**: `gamma` - Scaling factor for normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+* **3**: `beta` - Bias added to the scaled normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+* **4**: `mean` - Value for mean normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+* **5**: `variance` - Value for variance normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise Batch Normalization operation applied to the input tensor `data`. A tensor of type *T* and the same shape as `data` input tensor.
+
+**Types**
+
+* *T*: any supported floating-point type.
+
+**Examples**
+
+*Example: 2D input tensor `data`*
+
+```xml
+<layer ... type="BatchNormInference" ...>
+    <data epsilon="9.99e-06" />
+    <input>
+        <port id="0">  <!-- input -->
+            <dim>10</dim>
+            <dim>128</dim>
+        </port>
+        <port id="1">  <!-- gamma -->
+            <dim>128</dim>
+        </port>
+        <port id="2">  <!-- beta -->
+            <dim>128</dim>
+        </port>
+        <port id="3">  <!-- mean -->
+            <dim>128</dim>
+        </port>
+        <port id="4">  <!-- variance -->
+            <dim>128</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">
+            <dim>10</dim>
+            <dim>128</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example: 4D input tensor `data`*

 ```xml
 <layer ... type="BatchNormInference" ...>
--- a/docs/ops/normalization/BatchNormInference_5.md
+++ b/docs/ops/normalization/BatchNormInference_5.md
@ -1,42 +1,36 @@
 ## BatchNormInference <a name="BatchNormInference"></a> {#openvino_docs_ops_normalization_BatchNormInference_5}

-**Versioned name**: *BatchNormInference-5
+**Versioned name**: *BatchNormInference-5*

 **Category**: *Normalization*

-**Short description**: *BatchNormInference* layer normalizes a `input` tensor by `mean` and `variance`, and applies a scale (`gamma`) to it, as well as an offset (`beta`).
+**Short description**: *BatchNormInference* performs Batch Normalization operation described in the [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167v2) article.

-**Attributes**:
+**Detailed Description**

-* *epsilon*
-  * **Description**: *epsilon* is the number to be added to the variance to avoid division by zero when normalizing a value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance.
-  * **Range of values**: a positive floating-point number
-  * **Type**: `float`
-  * **Default value**: None
-  * **Required**: *yes*
+*BatchNormInference* performs the following operations on a given data batch input tensor `data`:

-**Inputs**
+* Normalizes each activation \f$x^{(k)}\f$ by the mean and variance.
+\f[
+   \hat{x}^{(k)}=\frac{x^{(k)} - E[x^{(k)}]}{\sqrt{Var(x^{(k)}) + \epsilon}}
+\f]
+where \f$E[x^{(k)}]\f$ and \f$Var(x^{(k)})\f$ are the mean and variance, calculated per channel axis of `data` input, and correspond to `mean` and `variance` inputs, respectively. Additionally, \f$\epsilon\f$ is a value added to the variance for numerical stability and corresponds to `epsilon` attribute.

-* **1**: `input` - input tensor with data for normalization. At least a 2D tensor of type T, the second dimension represents the channel axis and must have a span of at least 1. **Required.**
-* **2**: `gamma` - gamma scaling for normalized value. A 1D tensor of type T with the same span as input's channel axis. **Required.**
-* **3**: `beta` - bias added to the scaled normalized value. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
-* **4**: `mean` - value for mean normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
-* **5**: `variance` - value for variance normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.**
-
-**Outputs**
-
-* **1**: The result of normalization. A tensor of the same type and shape with 1st input tensor.
-
-**Types**
-
-* *T*: any numeric type.
+* Performs linear transformation of each normalized activation based on `gamma` and `beta` input, representing the scaling factor and shift, respectively.
+\f[
+   \hat{y}^{(k)}=\gamma^{(k)}\hat{x}^{(k)} + \beta^{(k)}
+\f]
+where \f$\gamma^{(k)}\f$ and \f$\beta^{(k)}\f$ are learnable parameters, calculated per channel axis, and correspond to `gamma` and `beta` inputs.

 **Mathematical Formulation**

-*BatchNormInference*  normalizes the output in each hidden layer.
+Let `x` be a *d*-dimensional input, \f$x=(x_{1}\dotsc x_{d})\f$. Since normalization is applied to each activation \f$E[x^{(k)}]\f$, you can focus on a particular activation and omit k.
+
+For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values. *BatchNormInference* performs Batch Normalization algorithm as follows:
+
 *   **Input**: Values of \f$x\f$ over a mini-batch:
    \f[
-    \beta = \{ x_{1...m} \}
+    \mathcal{B} = \{ x_{1...m} \}
    \f]
 *   **Parameters to learn**: \f$ \gamma, \beta\f$
 *   **Output**:
@ -45,22 +39,81 @@
    \f]
 *   **Mini-batch mean**:
    \f[
-    \mu_{\beta} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
+    \mu_{\mathcal{B}} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
    \f]
 *   **Mini-batch variance**:
    \f[
-    \sigma_{\beta }^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\beta} )^{2}
+    \sigma_{\mathcal{B}}^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\mathcal{B}})^{2}
    \f]
 *   **Normalize**:
    \f[
-    \hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\beta}}{\sqrt{\sigma_{\beta }^{2} + \epsilon }}
+    \hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\mathcal{B}}}{\sqrt{\sigma_{\mathcal{B}}^{2} + \epsilon }}
    \f]
 *   **Scale and shift**:
    \f[
    o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} )
    \f]

-**Example**
+**Attributes**:
+
+* *epsilon*
+  * **Description**: *epsilon* is a constant added to the variance for numerical stability.
+  * **Range of values**: a positive floating-point number
+  * **Type**: `float`
+  * **Default value**: none
+  * **Required**: *yes*
+
+**Inputs**
+
+* **1**: `data` - A tensor of type *T* and at least rank 2. The second dimension represents the channel axis and must have a span of at least 1. **Required.**
+* **2**: `gamma` - Scaling factor for normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+* **3**: `beta` - Bias added to the scaled normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+* **4**: `mean` - Value for mean normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+* **5**: `variance` - Value for variance normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise Batch Normalization operation applied to the input tensor `data`. A tensor of type *T* and the same shape as `data` input tensor.
+
+**Types**
+
+* *T*: any supported floating-point type.
+
+**Examples**
+
+*Example: 2D input tensor `data`*
+
+```xml
+<layer ... type="BatchNormInference" ...>
+    <data epsilon="9.99e-06" />
+    <input>
+        <port id="0">  <!-- input -->
+            <dim>10</dim>
+            <dim>128</dim>
+        </port>
+        <port id="1">  <!-- gamma -->
+            <dim>128</dim>
+        </port>
+        <port id="2">  <!-- beta -->
+            <dim>128</dim>
+        </port>
+        <port id="3">  <!-- mean -->
+            <dim>128</dim>
+        </port>
+        <port id="4">  <!-- variance -->
+            <dim>128</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">
+            <dim>10</dim>
+            <dim>128</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example: 4D input tensor `data`*

 ```xml
 <layer ... type="BatchNormInference" ...>
@ -95,4 +148,3 @@
    </output>
 </layer>
 ```
-
--- a/docs/ops/reduction/ReduceSum_1.md
+++ b/docs/ops/reduction/ReduceSum_1.md
@ -4,7 +4,21 @@

 **Category**: *Reduction*

-**Short description**: *ReduceSum* operation performs reduction with addition of the 1st input tensor in slices specified by the 2nd input.
+**Short description**: *ReduceSum* operation performs reduction with addition, on a given input `data`, along dimensions specified by `axes` input.
+
+**Detailed Description**
+
+*ReduceSum* operation performs reduction with addition, on a given input `data`, along dimensions specified by `axes` additional input.
+Each element in the output is calculated as follows:
+
+    output[i0, i1, ..., iN] = sum[j0,..., jN](x[j0, ..., jN]))
+
+where indices i0, ..., iN run through all valid indices for input `data` and summation `sum[j0, ..., jN]` has `jk = ik` for those dimensions `k` that are not in the set of indices specified by `axes` input. 
+
+Particular cases:
+
+1. If `axes` is an empty list, then *ReduceSum* corresponds to identity operation. 
+2. If `axes` contains all dimensions of input `data`, a single reduction value is calculated for entire input tensor. 

 **Attributes**

@ -18,32 +32,20 @@

 **Inputs**

-* **1**: Input tensor x of type *T1*. **Required.**
+* **1**: `data` - A tensor of type *T* and arbitrary shape. **Required.**

-* **2**: Scalar or 1D tensor of type *T_IND* with axis indices for the 1st input along which reduction is performed. Accepted range is `[-r, r-1]` where where `r` is the rank of input tensor, all values must be unique, repeats are not allowed. **Required.**
+* **2**: `axes` - Axis indices of `data` input tensor, along which reduction is performed. A scalar or 1D tensor of unique elements and type *T_IND*. The range of elements is `[-r, r-1]` where `r` is the rank of `data` input tensor. **Required.**

 **Outputs**

-* **1**: Tensor of the same type as the 1st input tensor and `shape[i] = shapeOf(input1)[i]` for all `i` that is not in the list of axes from the 2nd input. For dimensions from the 2nd input tensor, `shape[i] == 1` if `keep_dims == true`, or `i`-th dimension is removed from the output otherwise.
+* **1**: A tensor of type *T* and `shape[i] = shapeOf(data)[i]` for all `i` dimensions not in `axes` input tensor. For dimensions in `axes`, `shape[i] == 1` if `keep_dims == true`, otherwise the `i`-th dimension is removed from the output.

 **Types**

-* *T1*: any supported numeric type.
+* *T*: any supported numeric type.
 * *T_IND*: `int64` or `int32`.

-**Detailed Description**
-
-Each element in the output is the result of reduction with addition operation along dimensions specified by the 2nd input:
-
-    output[i0, i1, ..., iN] = sum[j0,..., jN](x[j0, ..., jN]))
-
-Where indices i0, ..., iN run through all valid indices for the 1st input and summation `sum[j0, ..., jN]` have `jk = ik` for those dimensions `k` that are not in the set of indices specified by the 2nd input of the operation. 
-Corner cases:
-
-1. When the 2nd input is an empty list, then this operation does nothing, it is an identity. 
-2. When the 2nd input contains all dimensions of the 1st input, this means that a single reduction value is calculated for entire input tensor. 
-
-**Example**
+**Examples**

 ```xml
 <layer id="1" type="ReduceSum" ...>
--- a/docs/ops/shape/Unsqueeze_1.md
+++ b/docs/ops/shape/Unsqueeze_1.md
@ -10,9 +10,9 @@

 **Inputs**:

-*   **1**: Multidimensional input tensor of type *T*. *Required*.
+*   **1**: Tensor of type *T* and arbitrary shape. **Required**.

-*   **2**: OD or 1D tensor of type *T_SHAPE* with dimensions indices to be set to 1. Values could be negative. *Required*.
+*   **2**: Scalar or 1D tensor of type *T_INT* with indices of dimensions to unsqueeze. Values could be negative (have to be from range `[-R, R-1]`, where `R` is the rank of the output). **Required**.

 **Outputs**:

@ -20,13 +20,13 @@

 **Types**

-* *T*: supported type.
+* *T*: any numeric type.

-* *T_SHAPE*: supported integer type.
+* *T_INT*: any supported integer type.

 **Example**

-*Example 1:*
+*Example 1: unsqueeze 2D tensor to a 4D tensor*
 ```xml
 <layer ... type="Unsqueeze">
    <input>
@ -51,7 +51,7 @@
 </layer>
 ```

-*Example 2: (unsqueeze 0D tensor (constant) to 1D tensor)*
+*Example 2: unsqueeze 0D tensor (constant) to 1D tensor*
 ```xml
 <layer ... type="Unsqueeze">
    <input>
--- a/docs/ops/type/ConvertLike_1.md
+++ b/docs/ops/type/ConvertLike_1.md
@ -2,41 +2,37 @@

 **Versioned name**: *ConvertLike-1*

-**Category**: type conversion
+**Category**: *Type conversion*

-**Short description**: Operation converts all elements of the 1st input tensor to a type of elements of 2nd input tensor.
-
-**Attributes**:
-
-    No attributes available.
-
-**Inputs**
-
-* **1**: `data` - A tensor of type T1. **Required.**
-* **2**: `like` - A tensor of type T2. **Required.**
-
-**Outputs**
-
-* **1**: The result of element-wise *"ConvertLike"* operation. A tensor of the same type with `like` tensor and the same shape with `data` tensor.
-
-**Types**
-
-* *T1*: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
-* *T2*: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
+**Short description**: *ConvertLike* operation performs element-wise conversion on a given input tensor `data` to the element type of an additional input tensor `like`.

 **Detailed description**

-Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float *3.141592* may be round to a 32-bit int *3*.
+Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float *3.141592* may be round to a 32-bit int *3*. The result of unsupported conversions is undefined, e.g. conversion of negative signed integer value to any unsigned integer type.

-*a* - `data` input tensor, *b* - `like` input tensor.
+Output elements are represented as follows:

-\f[
-o_{i} = Convert[destination_type=type(b)](a_{i})
-\f]
+    o[i] = Convert[destination_type=type(b)](a[i])

-**Examples**
+where `a` and `b` correspond to `data` and `like` input tensors, respectively.

-*Example 1*
+**Attributes**: *ConvertLike* operation has no attributes.
+
+**Inputs**
+
+* **1**: `data` - A tensor of type *T1* and arbitrary shape. **Required.**
+* **2**: `like` - A tensor of type *T2* and arbitrary shape. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise *ConvertLike* operation applied to input tensor `data`. A tensor of type *T2* and the same shape as `data` input tensor.
+
+**Types**
+
+* *T1*: any supported type
+* *T2*: any supported type
+
+**Example**

 ```xml
 <layer ... type="ConvertLike">
--- a/docs/ops/type/Convert_1.md
+++ b/docs/ops/type/Convert_1.md
@ -8,18 +8,21 @@

 **Detailed description**

-Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float `3.141592` may be round to a 32-bit int `3`. The result of unsupported conversions is undefined, e.g. convertion of negative signed integer value to any unsigned integer type.
+Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float `3.141592` may be round to a 32-bit int `3`. The result of unsupported conversions is undefined, e.g. conversion of negative signed integer value to any unsigned integer type.
+
+Output elements are represented as follows:

 \f[
-o_{i} = convert(a_{i})
+o_{i} = Convert(a_{i})
 \f]

+where `a` corresponds to the input tensor.

 **Attributes**:

 * *destination_type*

-  * **Description**: the destination type
+  * **Description**: the destination type.
  * **Range of values**: one of the supported types *T*
  * **Type**: `string`
  * **Default value**: None
@ -35,11 +38,9 @@ o_{i} = convert(a_{i})

 **Types**

-* *T*: `u8`, `u16`, `u32`, `u64`, `i8`, `i16`, `i32`, `i64`, `f16`, `f32`, `boolean`, `bf16`
+* *T*: any supported type

-**Examples**
-
-*Example 1*
+**Example**

 ```xml
 <layer ... type="Convert">
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@ -32,6 +32,8 @@ add_subdirectory(thirdparty)

 add_subdirectory(src)

+add_subdirectory(ie_bridges/c)
+
 if(ENABLE_TESTS)
    add_subdirectory(tests_deprecated)
    add_subdirectory(tests)
@ -62,8 +64,6 @@ if (ENABLE_PYTHON)
    add_subdirectory(ie_bridges/python)
 endif()

-add_subdirectory(ie_bridges/c)
-
 #
 # Install
 #
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@ -18,7 +18,6 @@ else()
    set(MODELS_BRANCH "master")
 endif()

-
 if (ENABLE_DATA)
    add_models_repo(${ENABLE_DATA} "data:https://github.com/openvinotoolkit/testdata.git")
    set(MODELS_PATH "${TEMP}/models/src/data")
@ -294,8 +293,6 @@ else()
    reset_deps_cache(OpenCV_DIR)
 endif()

-# TODO: remove global CMAKE_MODULE_PATH
-list(APPEND CMAKE_MODULE_PATH "${IEDevScripts_DIR}")
 include(cmake/ie_parallel.cmake)

 if (ENABLE_GNA)
--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
@ -3,8 +3,28 @@
 #

 function(set_ie_threading_interface_for TARGET_NAME)
+    macro(ext_message TRACE_LEVEL)
+         if (TRACE_LEVEL STREQUAL FATAL_ERROR)
+             if(InferenceEngine_FIND_REQUIRED)
+                 message(FATAL_ERROR "${ARGN}")
+             elseif(NOT InferenceEngine_FIND_QUIETLY)
+                 message(WARNING "${ARGN}")
+             endif()
+             return()
+         elseif(NOT InferenceEngine_FIND_QUIETLY)
+             message(${TRACE_LEVEL} "${ARGN}")
+         endif ()
+    endmacro()
+
    if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND)
-        find_package(TBB COMPONENTS tbb tbbmalloc)
+        if(IEDevScripts_DIR)
+            find_package(TBB COMPONENTS tbb tbbmalloc
+                         PATHS IEDevScripts_DIR
+                         NO_CMAKE_FIND_ROOT_PATH
+                         NO_DEFAULT_PATH)
+        else()
+            find_dependency(TBB COMPONENTS tbb tbbmalloc)
+        endif()
        set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
        set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
        set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
@ -15,12 +35,22 @@ function(set_ie_threading_interface_for TARGET_NAME)
    endif()

    get_target_property(target_type ${TARGET_NAME} TYPE)
+
    if(target_type STREQUAL "INTERFACE_LIBRARY")
        set(LINK_TYPE "INTERFACE")
-    elseif(target_type STREQUAL "EXECUTABLE" OR target_type STREQUAL "OBJECT_LIBRARY")
+    elseif(target_type STREQUAL "EXECUTABLE" OR target_type STREQUAL "OBJECT_LIBRARY" OR
+           target_type STREQUAL "MODULE_LIBRARY")
+        set(LINK_TYPE "PRIVATE")
+    elseif(target_type STREQUAL "STATIC_LIBRARY")
+        # Affected libraries: inference_engine_s, inference_engine_preproc_s
+        # they don't have TBB in public headers => PRIVATE
+        set(LINK_TYPE "PRIVATE")
+    elseif(target_type STREQUAL "SHARED_LIBRARY")
+        # TODO: inference_engine only
+        # Why TBB propogates its headers to inference_engine?
        set(LINK_TYPE "PRIVATE")
    else()
-        set(LINK_TYPE "PUBLIC")
+        ext_message(WARNING "Unknown target type")
    endif()

    function(ie_target_link_libraries TARGET_NAME LINK_TYPE)
--- a/inference-engine/cmake/templates/InferenceEngineConfig-build.cmake.in
+++ b/inference-engine/cmake/templates/InferenceEngineConfig-build.cmake.in
@ -1,62 +0,0 @@
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-#
-# FindIE
-# ------
-#
-# This will define the following variables:
-#
-#   InferenceEngine_FOUND        - True if the system has the Inference Engine library
-#   InferenceEngine_INCLUDE_DIRS - Inference Engine include directories
-#   InferenceEngine_LIBRARIES    - Inference Engine libraries
-#
-# and the following imported targets:
-#
-#   IE::inference_engine            - The Inference Engine library
-#   IE::inference_engine_c_api      - The Inference Engine C API library
-#
-
-if(DEFINED IE_MAIN_SOURCE_DIR AND TARGET inference_engine)
-    set(InferenceEngine_LIBRARIES inference_engine inference_engine_c_api)
-    if(NOT TARGET IE::inference_engine)
-        add_library(IE::inference_engine ALIAS inference_engine)
-    endif()
-    if(TARGET inference_engine_c_api AND NOT TARGET IE::inference_engine_c_api)
-        add_library(IE::inference_engine_c_api ALIAS inference_engine_c_api)
-    endif()
-else()
-    include("${CMAKE_CURRENT_LIST_DIR}/inference_engine_targets.cmake")
-
-    file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path)
-    set (ie_options THREADING)
-    load_cache("${cache_path}" READ_WITH_PREFIX "" ${ie_options})
-    message(STATUS "The following CMake options are exported from the Inference Engine build tree")
-    message("")
-    foreach(option IN LISTS ie_options)
-        message("    ${option}: ${${option}}")
-    endforeach()
-    message("")
-
-    # inherit TBB from main IE project if enabled
-    if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
-        load_cache("${cache_path}" READ_WITH_PREFIX "" TBB_DIR;ENABLE_TBB_RELEASE_ONLY)
-        set(TBB_FIND_RELEASE_ONLY ${ENABLE_TBB_RELEASE_ONLY})
-        find_package(TBB)
-    endif()
-
-    get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
-    set(InferenceEngine_LIBRARIES IE::inference_engine IE::inference_engine_c_api)
-
-    foreach(library IN LISTS InferenceEngine_LIBRARIES)
-        if(CMAKE_CROSSCOMPILING AND NOT MSVC)
-            set_property(TARGET ${library} PROPERTY
-                         INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined")
-        endif()
-    endforeach()
-
-    if(NOT MSVC)
-        set_target_properties(${InferenceEngine_LIBRARIES} PROPERTIES
-            INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
-    endif()
-endif()
--- a/inference-engine/cmake/templates/InferenceEngineConfig-version.cmake.in
+++ b/inference-engine/cmake/templates/InferenceEngineConfig-version.cmake.in
@ -2,8 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 #

-set(InferenceEngine_VERSION 2.1.0)
-set(PACKAGE_VERSION ${InferenceEngine_VERSION})
+# TODO: hardcode will be fixed separatelly
+set(PACKAGE_VERSION_MAJOR 2)
+set(PACKAGE_VERSION_MINOR 1)
+set(PACKAGE_VERSION_PATCH 0)
+set(PACKAGE_VERSION_COUNT 3)
+
+set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}")

 set(PACKAGE_VERSION_EXACT False)
 set(PACKAGE_VERSION_COMPATIBLE False)
@ -13,6 +18,7 @@ if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
    set(PACKAGE_VERSION_COMPATIBLE True)
 endif()

-if(PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
+if(PACKAGE_FIND_VERSION_MAJOR EQUAL PACKAGE_VERSION_MAJOR AND
+   PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
    set(PACKAGE_VERSION_COMPATIBLE True)
 endif()
--- a/inference-engine/cmake/templates/InferenceEngineConfig.cmake.in
+++ b/inference-engine/cmake/templates/InferenceEngineConfig.cmake.in
@ -1,12 +1,10 @@
-# Copyright (C) 2018-2020 Intel Corporation
+# Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 #
-# FindIE
+# Inference Engine cmake config
 # ------
 #
-#   You can specify the path to Inference Engine files in IE_ROOT_DIR
-#
 # This will define the following variables:
 #
 #   InferenceEngine_FOUND        - True if the system has the Inference Engine library
@ -19,150 +17,55 @@
 #   IE::inference_engine_c_api      - The Inference Engine C API library
 #

-macro(ext_message TRACE_LEVEL)
-    if (${TRACE_LEVEL} STREQUAL FATAL_ERROR)
-        if(InferenceEngine_FIND_REQUIRED)
-            message(FATAL_ERROR "${ARGN}")
-        elseif(NOT InferenceEngine_FIND_QUIETLY)
-            message(WARNING "${ARGN}")
-        endif()
-        return()
-    elseif(NOT InferenceEngine_FIND_QUIETLY)
-        message(${TRACE_LEVEL} "${ARGN}")
-    endif ()
-endmacro()
+@PACKAGE_INIT@

-set(InferenceEngine_FOUND FALSE)
+include(CMakeFindDependencyMacro)

-if(TARGET IE::inference_engine)
-    set(InferenceEngine_FOUND TRUE)
-    get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
-    set(InferenceEngine_LIBRARIES IE::inference_engine
-                                  IE::inference_engine_c_api)
-else()
-    if (WIN32)
-        set(_ARCH intel64)
-    else()
-        string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} _ARCH)
-        if(_ARCH STREQUAL "x86_64" OR _ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
-            set(_ARCH intel64)
-        elseif(_ARCH STREQUAL "i386")
-            set(_ARCH ia32)
-        endif()
-    endif()
+# need to store current PACKAGE_PREFIX_DIR, because it's overwritten by ngraph one
+set(IE_PACKAGE_PREFIX_DIR "${PACKAGE_PREFIX_DIR}")

 set(THREADING "@THREADING@")
-
-    # check whether setvars.sh is sourced
-    if(NOT IE_ROOT_DIR AND (DEFINED ENV{InferenceEngine_DIR} OR InferenceEngine_DIR OR DEFINED ENV{INTEL_OPENVINO_DIR}))
-        if (EXISTS "${InferenceEngine_DIR}")
-            # InferenceEngine_DIR manually set via command line params
-            set(IE_ROOT_DIR "${InferenceEngine_DIR}/..")
-        elseif (EXISTS "$ENV{InferenceEngine_DIR}")
-            # InferenceEngine_DIR manually set via env
-            set(IE_ROOT_DIR "$ENV{InferenceEngine_DIR}/..")
-        elseif (EXISTS "$ENV{INTEL_OPENVINO_DIR}/inference_engine")
-            # if we installed DL SDK
-            set(IE_ROOT_DIR "$ENV{INTEL_OPENVINO_DIR}/inference_engine")
-        elseif (EXISTS "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine")
-            # CV SDK is installed
-            set(IE_ROOT_DIR "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine")
-        endif()
+if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
+    set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@")
+    find_dependency(TBB
+                    COMPONENTS tbb tbbmalloc
+                    CONFIG
+                    PATHS ${TBBROOT}/cmake
+                          ${_tbb_dir}
+                    NO_CMAKE_FIND_ROOT_PATH
+                    NO_DEFAULT_PATH)
 endif()

-    if(NOT IE_ROOT_DIR)
-        ext_message(FATAL_ERROR "inference_engine root directory is not found")
-    endif()
+set_and_check(_ngraph_dir "@PACKAGE_IE_NGRAPH_DIR@")
+find_dependency(ngraph
+                CONFIG
+                PATHS ${_ngraph_dir}
+                NO_CMAKE_FIND_ROOT_PATH
+                NO_DEFAULT_PATH)

-    find_path(IE_INCLUDE_DIR inference_engine.hpp "${IE_ROOT_DIR}/include" NO_DEFAULT_PATH)
-
-    set(IE_LIB_DIR "${IE_ROOT_DIR}/lib/${_ARCH}")
-    set(IE_LIB_REL_DIR "${IE_LIB_DIR}/Release")
-    set(IE_LIB_DBG_DIR "${IE_LIB_DIR}/Debug")
-
-    include(FindPackageHandleStandardArgs)
-
-    if(WIN32)
-        find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_WIN@ "${IE_LIB_REL_DIR}" NO_DEFAULT_PATH)
-        find_library(IE_C_API_RELEASE_LIBRARY inference_engine_c_api@IE_RELEASE_POSTFIX_WIN@ "${IE_LIB_REL_DIR}" NO_DEFAULT_PATH)
-    elseif(APPLE)
-        find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
-        find_library(IE_C_API_RELEASE_LIBRARY inference_engine_c_api@IE_RELEASE_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
-    else()
-        find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_LIN@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
-        find_library(IE_C_API_RELEASE_LIBRARY inference_engine_c_api@IE_RELEASE_POSTFIX_LIN@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
-    endif()
-
-    find_package_handle_standard_args(  InferenceEngine
-                                        FOUND_VAR INFERENCEENGINE_FOUND
-                                        REQUIRED_VARS IE_RELEASE_LIBRARY IE_C_API_RELEASE_LIBRARY IE_INCLUDE_DIR
-                                        FAIL_MESSAGE "Some of mandatory Inference Engine components are not found. Please consult InferenceEgnineConfig.cmake module's help page.")
-
-    if(INFERENCEENGINE_FOUND)
-        # to keep this line for successful execution in CMake 2.8
-        set(InferenceEngine_FOUND TRUE)
-
-        foreach(ie_library_suffix "" "_c_api")
-            string(TOUPPER "${ie_library_suffix}" ie_library_usuffix)
-            add_library(IE::inference_engine${ie_library_suffix} SHARED IMPORTED GLOBAL)
-
-            if (WIN32)
-                set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                        IMPORTED_CONFIGURATIONS RELEASE
-                        IMPORTED_IMPLIB_RELEASE    "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
-                        MAP_IMPORTED_CONFIG_RELEASE Release
-                        MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release
-                        INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
-
-                # Debug binaries are optional
-                find_library(IE${ie_library_usuffix}_DEBUG_LIBRARY inference_engine${ie_library_suffix}@IE_DEBUG_POSTFIX_WIN@
-                    "${IE_LIB_DBG_DIR}" NO_DEFAULT_PATH)
-                if (IE${ie_library_usuffix}_DEBUG_LIBRARY)
-                    set_property(TARGET IE::inference_engine${ie_library_suffix} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
-                    set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                            IMPORTED_IMPLIB_DEBUG "${IE${ie_library_usuffix}_DEBUG_LIBRARY}"
-                            MAP_IMPORTED_CONFIG_DEBUG Debug)
-                else()
-                    ext_message(WARNING "Inference Engine DEBUG binaries are missed.")
-                endif()
-            elseif (APPLE)
-                set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                        IMPORTED_LOCATION_RELEASE "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
-                        INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}"
-                        INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
-
-                # Debug binaries are optional
-                find_library(IE${ie_library_usuffix}_DEBUG_LIBRARY inference_engine${ie_library_suffix}@IE_DEBUG_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
-                if (IE${ie_library_usuffix}_DEBUG_LIBRARY)
-                    set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                            IMPORTED_LOCATION_DEBUG "${IE${ie_library_usuffix}_DEBUG_LIBRARY}")
-                else()
-                    ext_message(WARNING "Inference Engine DEBUG binaries are missed")
-                endif()
-            else()
-                # Only Release binaries are distributed for Linux systems
-                set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                        IMPORTED_LOCATION "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
-                        INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
+function(_ie_target_no_deprecation_error)
+    if(NOT MSVC)
        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-                    set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                            INTERFACE_COMPILE_OPTIONS "-diag-warning=1786")
+            set(flags "-diag-warning=1786")
        else()
-                    set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
-                            INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
-                    if(CMAKE_CROSSCOMPILING AND NOT MSVC)
-                        set_property(TARGET IE::inference_engine${ie_library_suffix} PROPERTY
-                                     INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined")
+            set(flags "-Wno-error=deprecated-declarations")
        endif()
-                endif()
-            endif()
-        endforeach()

-        set(InferenceEngine_INCLUDE_DIRS ${IE_INCLUDE_DIR})
-        set(InferenceEngine_LIBRARIES IE::inference_engine
-                                      IE::inference_engine_c_api)
+        set_target_properties(${ARGV} PROPERTIES INTERFACE_COMPILE_OPTIONS ${flags})
+    endif()
+endfunction()

-        set(IE_EXTERNAL_DIR "${IE_ROOT_DIR}/external")
-        include("${IE_ROOT_DIR}/share/ie_parallel.cmake")
-    endif()
+if(TARGET inference_engine)
+    set(InferenceEngine_LIBRARIES inference_engine inference_engine_c_api)
+else()
+    include("${CMAKE_CURRENT_LIST_DIR}/InferenceEngineTargets.cmake")
+    set(InferenceEngine_LIBRARIES IE::inference_engine IE::inference_engine_c_api)
+    _ie_target_no_deprecation_error(${InferenceEngine_LIBRARIES})
 endif()
+
+# restore PACKAGE_PREFIX_DIR
+set(PACKAGE_PREFIX_DIR ${IE_PACKAGE_PREFIX_DIR})
+
+set_and_check(InferenceEngine_INCLUDE_DIRS "@PACKAGE_IE_INCLUDE_DIR@")
+
+check_required_components(InferenceEngine)
--- a/inference-engine/ie_bridges/c/src/CMakeLists.txt
+++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt
@ -13,7 +13,9 @@ add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES})

 target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)

-target_include_directories(${TARGET_NAME} PUBLIC "${InferenceEngine_C_API_SOURCE_DIR}/include")
+target_include_directories(${TARGET_NAME} PUBLIC
+    $<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
+    $<BUILD_INTERFACE:${InferenceEngine_C_API_SOURCE_DIR}/include>)

 add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})

@ -25,13 +27,17 @@ ie_add_vs_version_file(NAME ${TARGET_NAME}
 # export

 export(TARGETS ${TARGET_NAME} NAMESPACE IE::
-       APPEND FILE "${CMAKE_BINARY_DIR}/inference_engine_targets.cmake")
+       APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")
+
+# WA for CI issue
+export(TARGETS ${TARGET_NAME} NAMESPACE IE::
+       APPEND FILE "${CMAKE_BINARY_DIR}/share/InferenceEngineTargets.cmake")

 # install

 ie_cpack_add_component(core_c DEPENDS core)

-install(TARGETS ${TARGET_NAME}
+install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core_c
        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core_c
        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core_c)
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@ -77,6 +77,27 @@ DECLARE_GNA_CONFIG_VALUE(AVX1_EXACT);
 DECLARE_GNA_CONFIG_VALUE(AVX2);
 DECLARE_GNA_CONFIG_VALUE(AVX2_EXACT);

+/**
+* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
+* By default (in case of no value set) the behavior depends on GNA HW availability:
+* If GNA HW is present, use the option corresponding to this HW.
+* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
+* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library.
+* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0.
+* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
+* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
+*/
+DECLARE_GNA_CONFIG_KEY(EXEC_TARGET);
+
+DECLARE_GNA_CONFIG_VALUE(TARGET_2_0);
+DECLARE_GNA_CONFIG_VALUE(TARGET_3_0);
+
+/**
+* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
+* By default the same as GNA_EXEC_TARGET.
+*/
+DECLARE_GNA_CONFIG_KEY(COMPILE_TARGET);
+
 /**
 * @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES
 */
--- a/inference-engine/samples/CMakeLists.txt
+++ b/inference-engine/samples/CMakeLists.txt
@ -133,15 +133,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
 endif()

-if(IE_NOT_FOUND_MESSAGE)
-    # the flag is used to throw a custom message in case if the IE package is not found.
-    find_package(InferenceEngine 2.1 QUIET)
-    if (NOT(InferenceEngine_FOUND))
-        message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE})
-    endif()
-else()
-    find_package(InferenceEngine 2.1 REQUIRED)
-endif()
+find_package(InferenceEngine 2.1.0 EXACT REQUIRED)

 if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils")
    add_subdirectory(common/utils)
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@ -760,6 +760,8 @@ int main(int argc, char* argv[]) {
            gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16";
        }

+        gnaPluginConfig[GNAConfigParams::KEY_GNA_EXEC_TARGET] = FLAGS_exec_target;
+        gnaPluginConfig[GNAConfigParams::KEY_GNA_COMPILE_TARGET] = FLAGS_compile_target;
        gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads);
        gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO);
        gnaPluginConfig[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(FLAGS_pwl_me);
--- a/inference-engine/samples/speech_sample/speech_sample.hpp
+++ b/inference-engine/samples/speech_sample/speech_sample.hpp
@ -27,6 +27,20 @@ static const char target_device_message[] = "Optional. Specify a target device t
                                            "below. "
                                            "The sample will look for a suitable plugin for device specified.";

+/// @brief message for execution target
+static const char execution_target_message[] = "Optional. Specify GNA execution target generation. "
+                                               "May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. "
+                                               "By default, generation corresponds to the GNA HW available in the system "
+                                               "or the latest fully supported generation by the software. "
+                                               "See the GNA Plugin's GNA_EXEC_TARGET config option description.";
+
+/// @brief message for execution target
+static const char compile_target_message[] = "Optional. Specify GNA compile target generation. "
+                                             "May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. "
+                                             "By default, generation corresponds to the GNA HW available in the system "
+                                             "or the latest fully supported generation by the software. "
+                                             "See the GNA Plugin's GNA_COMPILE_TARGET config option description.";
+
 /// @brief message for performance counters
 static const char performance_counter_message[] = "Optional. Enables per-layer performance report.";

@ -109,6 +123,12 @@ DEFINE_string(m, "", model_message);
 /// \brief device the target device to infer on (default CPU) <br>
 DEFINE_string(d, "CPU", target_device_message);

+/// \brief GNA execution target <br>
+DEFINE_string(exec_target, "", execution_target_message);
+
+/// \brief GNA compile target <br>
+DEFINE_string(compile_target, "", compile_target_message);
+
 /// \brief Enable per-layer performance report
 DEFINE_bool(pc, false, performance_counter_message);

--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@ -1736,8 +1736,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                                outputTensor.Shape.Dimensions[beginOfHInNHWC + dimHW] =
                                    outputFromPooling(outFromConv, poolWindow->Dimensions[beginOfHInHW + dimHW], poolStride->Dimensions[beginOfHInHW + dimHW]);
                            }
-                            AdvanceOperationIfAllApplied(component, i, gnaOperation);
                        }
+                        AdvanceOperationIfAllApplied(component, i, gnaOperation);
                    }
 #else
                } else if (pLayer->nLayerKind == INTEL_CONVOLUTIONAL) {
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@ -6,12 +6,58 @@
 #include <iostream>
 #include <cmath>

-#include <runtime/pwl.h>
-#include <gna_slope_scale.h>
+#include "runtime/pwl.h"
+#include "gna_slope_scale.h"
 #include "dnn_types.h"
 #include "backend/gna_types.h"
 #include "round_float_define.hpp"

+
+// This function performes emulatation of HW saturation of PWL segments in SW
+// by inserting additional segments when overflow would happen
+static void insert_extra_pwl_segments(std::vector<gna_pwl_segment_t>& gna_pwl,
+    const int16_t y_min,
+    const int16_t y_max) {
+    std::map<size_t, gna_pwl_segment_t> extra_segments;
+    gna_pwl_segment_t extra_segment;
+    size_t gna_pwl_size = gna_pwl.size();
+
+    if (gna_pwl_size == 0)
+        return;
+
+    // We're adding a segment at the beginning if the first one doesn't cover min value
+    if ((gna_pwl[0].xBase & XBASEMASK) != INT32_MIN) {
+        extra_segment.xBase = INT32_MIN & XBASEMASK;
+        extra_segment.yBase = gna_pwl[0].yBase;
+        extra_segment.slope = 0;
+        extra_segments[0] = extra_segment;
+    }
+
+    // We're checking here if saturation could potentially happen at the trailing segments
+    if (gna_pwl[gna_pwl_size - 1].slope != 0) {
+        int16_t slope = gna_pwl[gna_pwl_size - 1].slope;
+        int32_t xBase = gna_pwl[gna_pwl_size - 1].xBase & XBASEMASK;
+        int16_t yBase = gna_pwl[gna_pwl_size - 1].yBase;
+        float scale = pow(2, ((gna_pwl[gna_pwl_size - 1].xBase & ~XBASEMASK) + 1) * 8);
+        float y_value = ((static_cast<float>(INT32_MAX) - xBase) * slope) / scale + yBase;
+
+        if (y_value > static_cast<float>(INT16_MAX) || y_value < static_cast<float>(INT16_MIN)) {
+            float x_value = ((static_cast<float>(y_max) - yBase) * scale) / slope + xBase;
+            extra_segment.xBase = FLOAT_TO_INT32(x_value) & XBASEMASK;
+            extra_segment.yBase = slope > 0 ? y_max : y_min;
+            extra_segment.slope = 0;
+            extra_segments[gna_pwl_size] = extra_segment;
+        }
+    }
+
+    if (!extra_segments.empty())
+        gnalog() << "Additional segment(s) added to protect against saturation\n";
+
+    for (auto i = extra_segments.rbegin(); i != extra_segments.rend(); i++) {
+        gna_pwl.insert(gna_pwl.begin() + i->first, i->second);
+    }
+}
+
 void make_gna_pwl(const DnnActivation  fun,
                  const std::vector<pwl_t>& pwl,
                  const double l_bound,
@ -583,6 +629,7 @@ void make_gna_pwl(const DnnActivation  fun,
        }
        default:
            gnalog() << "Unexpected function activation!\n";
-            std::cerr << "Unexpected function activation!\n";
+            THROW_GNA_EXCEPTION << "Unexpected function activation!" << fun;
    }
+    insert_extra_pwl_segments(gna_pwl, y_min, y_max);
 }
--- a/inference-engine/src/gna_plugin/backend/make_pwl.hpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
@ -5,9 +5,7 @@
 #pragma once

 #include <vector>
-#include <runtime/pwl.h>
-#include "backend/gna_types.h"
-
+#include "runtime/pwl.h"

 void make_gna_pwl(const DnnActivation  fun,
                  const std::vector<pwl_t>& pwl,
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@ -24,6 +24,7 @@
 #include "gna-api.h"
 #endif

+#include "gna/gna_config.hpp"
 #include "gna_plugin_log.hpp"

 //#define MODEL_DUMP
@ -130,7 +131,7 @@ void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
 uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t modelId;
-    if (isUpTo20GnaHwDevice() && isGnaLibVersion2_1) {
+    if (enforceLegacyCnnNeeded()) {
        enforceLegacyCnns(gnaModel);
    }
 #if GNA_LIB_VER == 2 && defined MODEL_DUMP
@ -154,15 +155,40 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {
    checkGna2Status(status, "Gna2ModelRelease");
 }

+bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
+    auto devVersion = getExecutionTargetDevice();
+    return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(devVersion);
+}
+
+Gna2DeviceVersion GNADeviceHelper::getExecutionTargetDevice() const {
+    const volatile auto Gna2DeviceVersion3_0 = static_cast<Gna2DeviceVersion>(0x30);
+    if (executionTarget.empty()) {
+        if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
+            return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
+        return detectedGnaDevVersion;
+    } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
+        if (!isGnaLibVersion2_1)
+            THROW_GNA_EXCEPTION << "Unsupported GNA execution target " << executionTarget << " when GNA Library version is 2.0.X.Y";
+        return Gna2DeviceVersion3_0;
+    } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
+        return Gna2DeviceVersion2_0;
+    }
+    THROW_GNA_EXCEPTION << "Unknown execution target: \"" << executionTarget << "\"";
+}
+
 uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t reqConfId;
    auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
    checkGna2Status(status, "Gna2RequestConfigCreate");
-    if (gna2HwConsistency != Gna2DeviceVersionSoftwareEmulation && !isGnaLibVersion2_1) {
-        status = Gna2RequestConfigEnableHardwareConsistency(reqConfId,
-            isUpTo20GnaDevice() ? gna2HwConsistency : detectedGnaDevVersion);
-        checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency");
+
+    // When the GNA_SW_EXACT mode is chosen inference results should be computed exactly the same way
+    // (bit exactly) as on the selected GNA execution target generation.
+    // See the GNA Plugin's GNA_EXEC_TARGET config option description.
+    if (swExactMode) {
+        const auto consistentDevice = getExecutionTargetDevice();
+        status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice);
+        checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast<long>(consistentDevice)) + ")");
    }
    status = Gna2InstrumentationConfigAssignToRequestConfig(instrumentationConfigId, reqConfId);
    checkGna2Status(status, "Gna2InstrumentationConfigAssignToRequestConfig");
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@ -51,8 +51,10 @@ class GNADeviceHelper {
    intel_gna_perf_t nGNAPerfResultsTotal;
 #else
    uint32_t nGnaDeviceIndex = 0;
-    Gna2DeviceVersion gna2HwConsistency = Gna2DeviceVersionSoftwareEmulation;
+    bool swExactMode = false;
    Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation;
+    std::string executionTarget;
+    std::string compileTarget;
    bool isGnaLibVersion2_1 = false;

    static const uint32_t TotalGna2InstrumentationPoints = 2;
@ -75,11 +77,15 @@ public:
                            bool isPerformanceMeasuring = false) :
                                    isPerformanceMeasuring(isPerformanceMeasuring) {
 #else
-    explicit GNADeviceHelper(Gna2DeviceVersion gna2HwConsistency = Gna2DeviceVersionSoftwareEmulation,
+    explicit GNADeviceHelper(std::string executionTargetIn = "",
+         std::string compileTargetIn = "",
+         bool swExactModeIn = false,
         uint8_t lib_async_n_threads = 1,
         bool use_openmp = false,
         bool isPerformanceMeasuring = false) :
-         gna2HwConsistency(gna2HwConsistency),
+         swExactMode(swExactModeIn),
+         executionTarget(executionTargetIn),
+         compileTarget(compileTargetIn),
         isPerformanceMeasuring(isPerformanceMeasuring),
         nGnaDeviceIndex{selectGnaDevice()} {
 #endif
@ -129,15 +135,17 @@ public:
    uint32_t createRequestConfig(const uint32_t model_id);
    static uint32_t getNumberOfGnaDevices();
    static uint32_t selectGnaDevice();
+    static bool isGnaHw(const Gna2DeviceVersion dev) {
+        return Gna2DeviceVersionSoftwareEmulation != dev;
+    }
    bool hasGnaHw() const {
-        return Gna2DeviceVersionSoftwareEmulation != detectedGnaDevVersion;
+        return isGnaHw(detectedGnaDevVersion);
    }
-    bool isUpTo20GnaDevice() const {
-        return detectedGnaDevVersion <= Gna2DeviceVersion2_0;
-    }
-    bool isUpTo20GnaHwDevice() const {
-        return isUpTo20GnaDevice() && detectedGnaDevVersion != Gna2DeviceVersionSoftwareEmulation;
+    static bool isUpTo20HwGnaDevice(const Gna2DeviceVersion dev) {
+        return dev <= Gna2DeviceVersion2_0 && isGnaHw(dev);
    }
+    bool enforceLegacyCnnNeeded() const;
+    Gna2DeviceVersion getExecutionTargetDevice() const;
    static void checkGna2Status(Gna2Status status, const std::string& from);
    static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel);
 #endif
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -397,7 +397,9 @@ void GNAPlugin::InitGNADevice() {
                                                  gnaFlags->gna_openmp_multithreading,
                                                  gnaFlags->performance_counting);
 #else
-    gnadevice = std::make_shared<GNADeviceHelper>(config.pluginGna2DeviceConsistent,
+    gnadevice = std::make_shared<GNADeviceHelper>(config.gnaExecTarget,
+                config.gnaCompileTarget,
+                config.swExactMode,
                gnaFlags->gna_lib_async_threads_num,
                gnaFlags->gna_openmp_multithreading,
                gnaFlags->performance_counting);
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@ -33,22 +33,28 @@ static const  std::vector<std::string> supported_values_on_gna2 = {
        GNAConfigParams::GNA_AVX2_EXACT
 };
 #else
-static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, Gna2DeviceVersion>> supported_values = {
-                {GNAConfigParams::GNA_AUTO,       {Gna2AccelerationModeAuto,     Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_HW,         {Gna2AccelerationModeHardware, Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_SW,         {Gna2AccelerationModeSoftware, Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_SW_EXACT,   {Gna2AccelerationModeSoftware, Gna2DeviceVersion1_0}},
-                {GNAConfigParams::GNA_GEN,        {Gna2AccelerationModeGeneric,  Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_GEN_EXACT,  {Gna2AccelerationModeGeneric,  Gna2DeviceVersion1_0}},
-                {GNAConfigParams::GNA_SSE,        {Gna2AccelerationModeSse4x2,   Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_SSE_EXACT,  {Gna2AccelerationModeSse4x2,   Gna2DeviceVersion1_0}},
-                {GNAConfigParams::GNA_AVX1,       {Gna2AccelerationModeAvx1,     Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1,     Gna2DeviceVersion1_0}},
-                {GNAConfigParams::GNA_AVX2,       {Gna2AccelerationModeAvx2,     Gna2DeviceVersionSoftwareEmulation}},
-                {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2,     Gna2DeviceVersion1_0}},
+static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
+                {GNAConfigParams::GNA_AUTO,       {Gna2AccelerationModeAuto,     false}},
+                {GNAConfigParams::GNA_HW,         {Gna2AccelerationModeHardware, false}},
+                {GNAConfigParams::GNA_SW,         {Gna2AccelerationModeSoftware, false}},
+                {GNAConfigParams::GNA_SW_EXACT,   {Gna2AccelerationModeSoftware, true}},
+                {GNAConfigParams::GNA_GEN,        {Gna2AccelerationModeGeneric,  false}},
+                {GNAConfigParams::GNA_GEN_EXACT,  {Gna2AccelerationModeGeneric,  true}},
+                {GNAConfigParams::GNA_SSE,        {Gna2AccelerationModeSse4x2,   false}},
+                {GNAConfigParams::GNA_SSE_EXACT,  {Gna2AccelerationModeSse4x2,   true}},
+                {GNAConfigParams::GNA_AVX1,       {Gna2AccelerationModeAvx1,     false}},
+                {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1,     true}},
+                {GNAConfigParams::GNA_AVX2,       {Gna2AccelerationModeAvx2,     false}},
+                {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2,     true}},
        };
 #endif

+static const std::set<std::string> supportedTargets = {
+    GNAConfigParams::GNA_TARGET_2_0,
+    GNAConfigParams::GNA_TARGET_3_0,
+    ""
+};
+
 void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
    for (auto&& item : config) {
        auto key = item.first;
@ -116,9 +122,14 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
                gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
 #else
                pluginGna2AccMode = procType->second.first;
-                pluginGna2DeviceConsistent = procType->second.second;
+                swExactMode = procType->second.second;
 #endif
            }
+        } else if (key == GNA_CONFIG_KEY(EXEC_TARGET) || key == GNA_CONFIG_KEY(COMPILE_TARGET)) {
+            if (supportedTargets.count(value) == 0) {
+                THROW_GNA_EXCEPTION << "Unsupported GNA config value (key, value): (" << key << ", " << value << ")";
+            }
+            (key == GNA_CONFIG_KEY(EXEC_TARGET) ? gnaExecTarget : gnaCompileTarget) = value;
        } else if (key == GNA_CONFIG_KEY(COMPACT_MODE)) {
            if (value == PluginConfigParams::YES) {
                gnaFlags.compact_mode = true;
@ -255,7 +266,7 @@ void Config::AdjustKeyMapValues() {
            }
 #else
            if (value.second.first == pluginGna2AccMode &&
-                value.second.second == pluginGna2DeviceConsistent) {
+                value.second.second == swExactMode) {
                device_mode = value.first;
                break;
            }
@ -264,6 +275,8 @@ void Config::AdjustKeyMapValues() {
    }
    IE_ASSERT(!device_mode.empty());
    keyConfigMap[GNA_CONFIG_KEY(DEVICE_MODE)] = device_mode;
+    keyConfigMap[GNA_CONFIG_KEY(EXEC_TARGET)] = gnaExecTarget;
+    keyConfigMap[GNA_CONFIG_KEY(COMPILE_TARGET)] = gnaCompileTarget;
    keyConfigMap[GNA_CONFIG_KEY(COMPACT_MODE)] =
            gnaFlags.compact_mode ? PluginConfigParams::YES: PluginConfigParams::NO;
    keyConfigMap[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
--- a/inference-engine/src/gna_plugin/gna_plugin_config.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.hpp
@ -37,7 +37,7 @@ struct Config {
        gna_proc_type = r.gna_proc_type;
 #else
        pluginGna2AccMode = r.pluginGna2AccMode;
-        pluginGna2DeviceConsistent = r.pluginGna2DeviceConsistent;
+        swExactMode = r.swExactMode;
 #endif
        inputScaleFactors = r.inputScaleFactors;
        gnaFlags = r.gnaFlags;
@ -55,11 +55,14 @@ struct Config {
    std::string dumpXNNPath;
    std::string dumpXNNGeneration;

+    std::string gnaExecTarget;
+    std::string gnaCompileTarget;
+
 #if GNA_LIB_VER == 1
    intel_gna_proc_t gna_proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
 #else
    Gna2AccelerationMode pluginGna2AccMode = Gna2AccelerationModeSoftware;
-    Gna2DeviceVersion pluginGna2DeviceConsistent = Gna2DeviceVersion1_0;
+    bool swExactMode = true;
 #endif

    std::vector<float> inputScaleFactors;
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -777,7 +777,39 @@ void RemovePermutationsNHWCToNCHWPass::run() {

 void InsertIdentityLayerPass::run() {
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
+    auto createIdentityLayer = [quantized, this](const TensorDesc& tensorDesc) {
+        int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
+        auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
+        CNNLayerPtr activationLayer =
+            std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
+        CNNLayerPtr activationLayerWithQuant = quantized ?
+                                        InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
+                                        activationLayer;
+        auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), tensorDesc);
+        getCreatorLayer(dataPtr) = activationLayerWithQuant;
+        activationLayerWithQuant->outData.push_back(dataPtr);
+        return activationLayerWithQuant;
+    };
+
    for (auto & l : *pLayers) {
+        if (LayerInfo(l).isPooling()) {
+            // Identity should be inserted after 1D pooling if it's the last functional layer.
+            auto pooling = LayerInfo(l).as<PoolingLayer*>();
+            IE_ASSERT(pooling != nullptr);
+            if (is2D(pooling->_kernel)) continue;
+
+            auto hasNextFuncLayer = CNNNetHasNextLayerSkipCertain(l, 0, 0, [](CNNLayerPtr layer) {
+                return LayerInfo(layer).isNonFunctional();
+            });
+            if (hasNextFuncLayer) continue;
+
+            auto identityLayer = createIdentityLayer(l->outData[0]->getTensorDesc());
+            gnalog() << "Inserted "<< identityLayer->name << " after " << l->name << std::endl;
+
+            auto nextLayer = CNNNetCheckNextLayerSkipCertain(l, 0, 0, true, [](CNNLayerPtr layer) { return false; }).first;
+            CNNNetworkInsertLayer(l, nextLayer, identityLayer);
+        }
+
        for (auto && prev : getCandidatesForIdentityInsertion(l, getPassManager())) {
            // Do an upstream search until Functional layer is found
            auto original_prev_layer = prev;
@ -817,15 +849,6 @@ void InsertIdentityLayerPass::run() {
            if (reconnected)
                continue;

-            int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
-            // actual insertion
-            auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
-
-            gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
-
-            CNNLayerPtr activationLayer =
-                std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
-
            // TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
            // detecting ins-data-idx
            size_t insDataIdx = std::numeric_limits<size_t>::max();
@ -840,13 +863,10 @@ void InsertIdentityLayerPass::run() {
            }

            auto inputData = true_layer->insData[insDataIdx].lock();
+            auto identityLayer = createIdentityLayer(inputData->getTensorDesc());
+
+            gnalog() << "Inserted "<< identityLayer->name << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;

-            auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
-            auto activationLayerWithQuant = quantized ?
-                                            InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
-                                            activationLayer;
-            getCreatorLayer(dataPtr) = activationLayerWithQuant;
-            activationLayerWithQuant->outData.push_back(dataPtr);
            // wether 1 identity or all outputs TODO possible grouping here, need to implement special grouped inserter
            bool notAll = false;
            for (auto && nextData  : prev->outData) {
@ -860,14 +880,14 @@ void InsertIdentityLayerPass::run() {
            }
            // copy offset - to be used while connecting outputs
            if (prev->params.find("output_offset") != prev->params.end()) {
-                activationLayerWithQuant->params["output_offset"] = prev->params["output_offset"];
+                identityLayer->params["output_offset"] = prev->params["output_offset"];
            }
            // copy offset - to be used while connecting outputs
            if (prev->params.find("original_num_rows") != prev->params.end()) {
-                activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"];
+                identityLayer->params["original_num_rows"] = prev->params["original_num_rows"];
            }

-            CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), activationLayerWithQuant);
+            CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), identityLayer);
        }
    }
 }
@ -1662,6 +1682,10 @@ void BreakFusingOfOutputLayersPass::run() {
 #endif
    OutputsDataMap outputsMap = this->getPassManager()->getNetwork().getOutputsInfo();
    for (auto layer : *pLayers) {
+        /* Inserion of the second activation after pooling will break Conv - Pooling - Activation component
+         * since scaleshift layers will be inserted between the pooling and activations
+         */
+        if (LayerInfo(layer).isPooling()) continue;
        for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
            auto& output = layer->outData[output_idx];
            auto& input_to = getInputTo(output);
--- a/inference-engine/src/gna_plugin/runtime/pwl.cpp
+++ b/inference-engine/src/gna_plugin/runtime/pwl.cpp
@ -9,11 +9,10 @@
 #include <limits>
 #include <cstdint>
 #include <algorithm>
-#include "backend/gna_types.h"

 #ifdef _NO_MKL_
 #include <cmath>
-#include <backend/make_pwl.hpp>
+#include "backend/make_pwl.hpp"

 #define SCOPY(num, in, inci, out, inco) for (int i_ = 0; i_ < *(num); i_++) *(out + i_ * *(inco)) = *(in + i_ * *(inci));
 #define SSCAL(num, scale, inout, inco)  for (int i_ = 0; i_ < *(num); i_++) *(inout + i_ * *(inco)) = *(scale) * *(inout + i_ * *(inco));
@ -27,7 +26,6 @@

 #include "pwl.h"
 #include "gna_plugin_log.hpp"
-#include "backend/dnn_types.h"
 #include "gna_slope_scale.h"
 #include "round_float_define.hpp"

--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@ -164,7 +164,9 @@ endif()
 target_link_libraries(${TARGET_NAME} PRIVATE pugixml openvino::itt ${CMAKE_DL_LIBS} Threads::Threads
                                             ${NGRAPH_LIBRARIES} inference_engine_transformations)

-target_include_directories(${TARGET_NAME} INTERFACE ${PUBLIC_HEADERS_DIR}
+target_include_directories(${TARGET_NAME} INTERFACE
+            $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
+            $<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
    PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
            $<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)

@ -213,31 +215,10 @@ set_target_properties(${TARGET_NAME}_s PROPERTIES EXCLUDE_FROM_ALL ON)
 set_target_properties(${TARGET_NAME} ${TARGET_NAME}_obj ${TARGET_NAME}_s
                      PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})

-# InferenceEngineConfig.cmake for install tree
-
-configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"
-               "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake" @ONLY)
-
-configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
-              "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
-              COPYONLY)
-
-configure_file("${IE_MAIN_SOURCE_DIR}/cmake/ie_parallel.cmake"
-               "${CMAKE_BINARY_DIR}/share/ie_parallel.cmake"
-               COPYONLY)
-
-# Export Inference Engine targets
+# Export for build tree

 export(TARGETS ${TARGET_NAME} NAMESPACE IE::
-       APPEND FILE "${CMAKE_BINARY_DIR}/inference_engine_targets.cmake")
-
-configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-build.cmake.in"
-               "${CMAKE_BINARY_DIR}/InferenceEngineConfig.cmake"
-               COPYONLY)
-
-configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
-               "${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
-               COPYONLY)
+        APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")

 # Export for developer package

@ -246,6 +227,8 @@ ie_developer_export_targets(${TARGET_NAME} ${TARGET_NAME}_plugin_api)
 # install TBB

 list(APPEND core_components ngraph)
+list(APPEND PATH_VARS "IE_INCLUDE_DIR" "IE_NGRAPH_DIR"
+                      "IE_PARALLEL_CMAKE")

 if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP})
    ie_cpack_add_component(tbb REQUIRED)
@ -265,9 +248,14 @@ if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCH
    install(FILES "${TBB}/LICENSE"
            DESTINATION ${IE_CPACK_IE_DIR}/external/tbb
            COMPONENT tbb)
+
+    set(IE_TBB_DIR_INSTALL "external/tbb/cmake")
+    set(IE_TBB_DIR "${TBB_DIR}")
+    list(APPEND PATH_VARS "IE_TBB_DIR")
+
    install(FILES "${TBB}/cmake/TBBConfig.cmake"
                  "${TBB}/cmake/TBBConfigVersion.cmake"
-            DESTINATION ${IE_CPACK_IE_DIR}/external/tbb/cmake
+            DESTINATION ${IE_CPACK_IE_DIR}/${IE_TBB_DIR_INSTALL}
            COMPONENT tbb)
 endif()

@ -277,15 +265,58 @@ ie_cpack_add_component(core REQUIRED DEPENDS ${core_components})

 install(DIRECTORY "${IE_MAIN_SOURCE_DIR}/include" DESTINATION ${IE_CPACK_IE_DIR}
        COMPONENT core)
-install(TARGETS ${TARGET_NAME}
+
+install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
-install(FILES "${OpenVINO_BINARY_DIR}/share/ie_parallel.cmake"
-              "${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig.cmake"
-              "${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
-        DESTINATION ${IE_CPACK_IE_DIR}/share
-        COMPONENT core)
+
 install(FILES $<TARGET_FILE_DIR:${TARGET_NAME}>/plugins.xml
        DESTINATION ${IE_CPACK_RUNTIME_PATH}
        COMPONENT core)
+
+# Install cmake scripts
+
+install(EXPORT InferenceEngineTargets
+        FILE InferenceEngineTargets.cmake
+        NAMESPACE IE::
+        DESTINATION ${IE_CPACK_IE_DIR}/share
+        COMPONENT core)
+
+include(CMakePackageConfigHelpers)
+
+set(IE_NGRAPH_DIR "${CMAKE_BINARY_DIR}/ngraph")
+set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}")
+set(IE_PARALLEL_CMAKE "${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake")
+
+configure_package_config_file("${InferenceEngine_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"
+                              "${CMAKE_BINARY_DIR}/InferenceEngineConfig.cmake"
+                               INSTALL_DESTINATION "${CMAKE_INSTALL_PREFIX}"
+                               PATH_VARS ${PATH_VARS})
+
+set(IE_INCLUDE_DIR "include")
+set(IE_NGRAPH_DIR "../ngraph/cmake")
+set(IE_TBB_DIR "${IE_TBB_DIR_INSTALL}")
+set(IE_PARALLEL_CMAKE "share/ie_parallel.cmake")
+
+configure_package_config_file("${InferenceEngine_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"
+                              "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake"
+                              INSTALL_DESTINATION share
+                              PATH_VARS ${PATH_VARS})
+
+configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
+               "${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
+               COPYONLY)
+
+# WA for CI
+configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
+               "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
+               COPYONLY)
+export(TARGETS ${TARGET_NAME} NAMESPACE IE::
+       APPEND FILE "${CMAKE_BINARY_DIR}/share/InferenceEngineTargets.cmake")
+
+install(FILES "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake"
+              "${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
+              "${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake"
+        DESTINATION ${IE_CPACK_IE_DIR}/share
+        COMPONENT core)
--- a/inference-engine/src/inference_engine/threading/ie_thread_affinity.hpp
+++ b/inference-engine/src/inference_engine/threading/ie_thread_affinity.hpp
@ -24,7 +24,7 @@ using cpu_set_t = void;
 *
 * @param      mask  The mask
 */
-INFERENCE_ENGINE_API_CPP(void) ReleaseProcessMask(cpu_set_t* mask);
+void ReleaseProcessMask(cpu_set_t* mask);

 /**
 * @brief      Deleter for process mask
@ -52,7 +52,7 @@ using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
 * @ingroup ie_dev_api_threading
 * @return A core affinity mask
 */
-INFERENCE_ENGINE_API_CPP(std::tuple<CpuSet, int>) GetProcessMask();
+std::tuple<CpuSet, int> GetProcessMask();

 /**
 * @brief      Pins current thread to a set of cores determined by the mask
@ -64,7 +64,7 @@ INFERENCE_ENGINE_API_CPP(std::tuple<CpuSet, int>) GetProcessMask();
 * @param[in]  processMask   The process mask
 * @return     `True` in case of success, `false` otherwise
 */
-INFERENCE_ENGINE_API_CPP(bool) PinThreadToVacantCore(int thrIdx, int hyperThreads, int ncores, const CpuSet& processMask);
+bool PinThreadToVacantCore(int thrIdx, int hyperThreads, int ncores, const CpuSet& processMask);

 /**
 * @brief      Pins thread to a spare core in the round-robin scheme, while respecting the given process mask.
@ -75,7 +75,7 @@ INFERENCE_ENGINE_API_CPP(bool) PinThreadToVacantCore(int thrIdx, int hyperThread
 * @param[in]  processMask  The process mask
 * @return     `True` in case of success, `false` otherwise
 */
-INFERENCE_ENGINE_API_CPP(bool) PinCurrentThreadByMask(int ncores, const CpuSet& processMask);
+bool PinCurrentThreadByMask(int ncores, const CpuSet& processMask);

 /**
 * @brief      Pins a current thread to a socket.
@ -84,5 +84,5 @@ INFERENCE_ENGINE_API_CPP(bool) PinCurrentThreadByMask(int ncores, const CpuSet&
 * @param[in]  socket  The socket id
 * @return     `True` in case of success, `false` otherwise
 */
-INFERENCE_ENGINE_API_CPP(bool) PinCurrentThreadToSocket(int socket);
+bool PinCurrentThreadToSocket(int socket);
 }  //  namespace InferenceEngine
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@ -34,8 +34,6 @@ ie_faster_build(${TARGET_NAME}_obj
    PCH PRIVATE "src/precomp.hpp"
 )

-set_ie_threading_interface_for(${TARGET_NAME}_obj)
-
 target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)

 target_include_directories(${TARGET_NAME}_obj PRIVATE
@ -63,8 +61,6 @@ add_library(${TARGET_NAME} SHARED
 ie_add_vs_version_file(NAME ${TARGET_NAME}
                       FILEDESCRIPTION "Inference Engine Legacy library")

-set_ie_threading_interface_for(${TARGET_NAME})
-
 target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
                                     PRIVATE pugixml openvino::itt
                                             ${NGRAPH_LIBRARIES} inference_engine_transformations)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@ -15,7 +15,6 @@

 #include <threading/ie_cpu_streams_executor.hpp>
 #include <ie_system_conf.h>
-#include <threading/ie_thread_affinity.hpp>
 #include <algorithm>
 #include <unordered_set>
 #include <utility>
--- a/inference-engine/src/transformations/CMakeLists.txt
+++ b/inference-engine/src/transformations/CMakeLists.txt
@ -30,7 +30,7 @@ ie_add_vs_version_file(NAME ${TARGET_NAME}
 target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES}
                                     PRIVATE ${NGRAPH_REF_LIBRARIES} openvino::itt ngraph::builder pugixml)

-target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
+target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
                                          PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")

 add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
@ -49,7 +49,7 @@ ie_developer_export_targets(${TARGET_NAME})

 # install

-install(TARGETS ${TARGET_NAME}
+install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
--- a/inference-engine/src/vpu/common/CMakeLists.txt
+++ b/inference-engine/src/vpu/common/CMakeLists.txt
@ -11,8 +11,6 @@ function(add_common_target TARGET_NAME STATIC_IE)
        UNITY
    )

-    set_ie_threading_interface_for(${TARGET_NAME})
-
    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
        # TODO: enable some day and fix all warnings
 #        target_compile_options(${TARGET_NAME} PRIVATE "-Wall")
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp
@ -11,7 +11,6 @@

 #include <cpp/ie_cnn_network.h>
 #include <precision_utils.h>
-#include <ie_parallel.hpp>

 #include <vector>
 #include <memory>
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/hw/utility.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/hw/utility.cpp
@ -8,8 +8,6 @@
 #include <unordered_map>
 #include <algorithm>

-#include <ie_parallel.hpp>
-
 #include <vpu/model/stage.hpp>
 #include <vpu/utils/numeric.hpp>
 #include <vpu/utils/profiling.hpp>
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_extra_split.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_extra_split.cpp
@ -14,7 +14,6 @@
 #include <vpu/model/data_contents/hw_const_data_content.hpp>

 #include <precision_utils.h>
-#include <ie_parallel.hpp>

 #include <utility>
 #include <memory>
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_parallel_fc.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_parallel_fc.cpp
@ -6,8 +6,6 @@
 #include <vpu/stages/stub_stage.hpp>
 #include <vpu/model/data_contents/merge_fc_content.hpp>

-#include <ie_parallel.hpp>
-
 #include <memory>
 #include <utility>
 #include <vector>
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_priorbox_with_const.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_priorbox_with_const.cpp
@ -7,7 +7,6 @@
 #include <vpu/stages/stub_stage.hpp>
 #include <vpu/model/data_contents/priorbox_contents.hpp>

-#include <ie_parallel.hpp>
 #include <precision_utils.h>

 #include <cmath>
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_reduce_mean.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_reduce_mean.cpp
@ -89,6 +89,7 @@ void PassImpl::run(const Model& model) {
 }
 }  // namespace

+
 Pass::Ptr PassManager::replaceWithReduceMean() {
    return std::make_shared<PassImpl>(_stageBuilder);
 }
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_deconv_adaptation.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_deconv_adaptation.cpp
@ -8,8 +8,6 @@
 #include <vpu/utils/numeric.hpp>
 #include <vpu/model/data_contents/deconvolution_contents.hpp>

-#include <ie_parallel.hpp>
-
 #include <vector>
 #include <string>
 #include <memory>
--- a/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
@ -12,7 +12,6 @@
 #include <vpu/compile_env.hpp>

 #include <precision_utils.h>
-#include <ie_parallel.hpp>

 #include <array>
 #include <algorithm>
--- a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mean_contents.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mean_contents.cpp
@ -7,7 +7,6 @@
 #include <vpu/utils/profiling.hpp>
 #include <vpu/middleend/sw/utility.hpp>

-#include <ie_parallel.hpp>
 #include <precision_utils.h>

 namespace vpu {
--- a/inference-engine/src/vpu/graph_transformer/src/stages/batch_norm.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/batch_norm.cpp
@ -10,7 +10,6 @@
 #include <vpu/model/data_contents/batch_norm_contents.hpp>

 #include <precision_utils.h>
-#include <ie_parallel.hpp>

 #include <cmath>
 #include <vector>
--- a/inference-engine/src/vpu/graph_transformer/src/stages/prelu.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/prelu.cpp
@ -9,8 +9,6 @@
 #include <vpu/utils/profiling.hpp>
 #include <vpu/model/data_contents/prelu_blob_content.hpp>

-#include <ie_parallel.hpp>
-
 #include <vector>
 #include <memory>

--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@ -45,6 +45,8 @@ addIeTargetTest(
        IE
 )

+set_ie_threading_interface_for(${TARGET_NAME})
+
 if(NGRAPH_ONNX_IMPORT_ENABLE)
    target_compile_definitions(${TARGET_NAME} PRIVATE
        NGRAPH_ONNX_IMPORT_ENABLE
--- a/inference-engine/tests/functional/inference_engine/caching_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/caching_test.cpp
@ -192,6 +192,8 @@ public:
    }

    void TearDown() override {
+        EXPECT_TRUE(Mock::VerifyAndClearExpectations(net.get()));
+        EXPECT_TRUE(Mock::VerifyAndClearExpectations(mockPlugin.get()));
        CommonTestUtils::removeIRFiles(modelName, weightsName);
    }

@ -766,8 +768,6 @@ TEST_P(CachingTest, TestThrowOnExport) {
 // TODO: temporary behavior is to no re-throw exception on import error (see 54335)
 // In future add separate 'no throw' test for 'blob_outdated' exception from plugin
 TEST_P(CachingTest, TestThrowOnImport) {
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).WillByDefault(Throw(1));
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _)).WillByDefault(Throw(1));
    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
@ -785,20 +785,25 @@ TEST_P(CachingTest, TestThrowOnImport) {
    {
        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
+        if (m_remoteContext) {
+            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
+        } else {
+            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
+            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1).WillOnce(Throw(1));
+        }
        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
        testLoad([&](Core &ie) {
            ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
            EXPECT_NO_THROW(m_testFunction(ie));
        });
    }
-    { // Step 3: same load, cache should be deleted due to unsuccessful import on step 2
-        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+    { // Step 3: same load, cache is re-created on export on step 2 and shall be successfully imported now
+        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
        testLoad([&](Core &ie) {
            ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
            EXPECT_NO_THROW(m_testFunction(ie));
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
@ -26,6 +26,14 @@ typedef std::tuple<
    std::vector<size_t>                 // Input shape
 > removePermutationsPassParams;

+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input shape
+    bool                                // with activation
+> removePermutationsWithPoolPassParams;
+
 namespace LayerTestsDefinitions {

 class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<removePermutationsPassParams>,
@ -137,15 +145,16 @@ protected:
    }
 };

-class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsPassParams>,
+class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsWithPoolPassParams>,
                                             public LayerTestsUtils::LayerTestsCommon {
    public:
-        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) {
+        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsWithPoolPassParams> obj) {
            InferenceEngine::Precision netPrecision;
            std::string targetDevice;
            std::map<std::string, std::string> configuration;
            std::vector<size_t> inputShape;
-            std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
+            bool withActivation;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = obj.param;

            std::ostringstream result;
            result << "netPRC=" << netPrecision.name() << "_";
@ -154,6 +163,7 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
                result << "_configItem=" << configItem.first << "_" << configItem.second;
            }
            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
+            result << "_withActivation=" << withActivation;
            return result.str();
        }

@ -175,8 +185,6 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            //          |
            //      Permute (order: [0, 3, 1, 2])
            //          |
-            //        Relu
-            //          |
            //      Convolution
            //          |
            //       Pooling
@ -188,7 +196,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            //      Reshape
            InferenceEngine::Precision netPrecision;
            std::vector<size_t> inputShape;
-            std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
+            bool withActivation;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = this->GetParam();
            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

            size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>());
@ -199,14 +208,12 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));

-            auto relu1 = std::make_shared<ngraph::opset3::Relu>(permute1);
-
            size_t num_out_channels = 12;
            size_t kernal_size = 8;
            auto kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
            std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
                                                                                        -0.2f, 0.2f);
-            auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+            auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights);
            auto pool_kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, 2} : std::vector<size_t>{2, 1});
            auto pool = ngraph::builder::makePooling(conv1, pool_kernal_shape, {0, 0}, {0, 0}, pool_kernal_shape, ngraph::op::RoundingType::FLOOR,
@ -214,9 +221,14 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<

            size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1) / pool_kernal_shape[1];
            size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1) / pool_kernal_shape[0];
-            auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);

-            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(relu2,
+            auto pool_output = pool;
+            if (withActivation) {
+                auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
+                pool_output = relu2;
+            }
+
+            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(pool_output,
                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));

            std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
@ -480,8 +492,9 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::ValuesIn(netPrecisions),
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
-            ::testing::ValuesIn(inputShapes)),
-        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+            ::testing::ValuesIn(inputShapes),
+            ::testing::ValuesIn(std::vector<bool>{false, true})), // with activation
+        RemovePermutationsWithPoolAndActTest::getTestCaseName);

    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest,
        ::testing::Combine(
@ -489,7 +502,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
            ::testing::ValuesIn(inputShapes)),
-        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+        RemovePermutationsWithTwoConvTest::getTestCaseName);

    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithEltwiseTest,
        ::testing::Combine(
@ -497,7 +510,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
            ::testing::ValuesIn(inputShapes)),
-        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+        RemovePermutationsWithEltwiseTest::getTestCaseName);

 } // namespace LayerTestsDefinitions

--- a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
@ -16,6 +16,8 @@ const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
    {GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
    {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), ""},
    {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), ""},
+    {GNA_CONFIG_KEY(EXEC_TARGET), ""},
+    {GNA_CONFIG_KEY(COMPILE_TARGET), ""},
    {GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT},
    {GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)},
    {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
@ -104,28 +106,28 @@ TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) {
    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_HARDWARE));
 #else
    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware);
-    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation);
+    EXPECT_EQ(config.swExactMode, false);
 #endif
    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW);
 #if GNA_LIB_VER == 1
    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE));
 #else
    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware);
-    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation);
+    EXPECT_EQ(config.swExactMode, false);
 #endif
    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT);
 #if GNA_LIB_VER == 1
    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE));
 #else
    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware);
-    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersion1_0);
+    EXPECT_EQ(config.swExactMode, true);
 #endif
    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_AUTO);
 #if GNA_LIB_VER == 1
    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_AUTO));
 #else
    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeAuto);
-    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation);
+    EXPECT_EQ(config.swExactMode, false);
 #endif
    ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "");
    ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "abc");
@ -187,3 +189,25 @@ TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
                    config.gnaFlags.gna_openmp_multithreading,
                    true);
 }
+
+TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
+    SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_2_0");
+    EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
+    SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_0");
+    EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_3_0");
+    ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_5");
+    ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "0");
+    ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_1_5");
+    ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET");
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigGnaCompileTargetTest) {
+    SetAndCompare(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_2_0");
+    EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_2_0");
+    SetAndCompare(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_0");
+    EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_3_0");
+    ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_5");
+    ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "0");
+    ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_1_5");
+    ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET");
+}
--- a/inference-engine/tests_deprecated/functional/mkldnn/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/mkldnn/CMakeLists.txt
@ -48,6 +48,8 @@ target_include_directories(${TARGET_NAME} PRIVATE

 target_link_libraries(${TARGET_NAME} PRIVATE ${LIBRARIES})

+set_ie_threading_interface_for(${TARGET_NAME})
+
 add_dependencies(${TARGET_NAME} ${DEPENDENCIES})

 add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
--- a/inference-engine/tests_deprecated/functional/shared_tests/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/shared_tests/CMakeLists.txt
@ -21,6 +21,8 @@ file(GLOB SHARED_TESTS_SRC
 add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
 add_dependencies(${TARGET_NAME} inference_engine_preproc MultiDevicePlugin mock_engine)

+set_ie_threading_interface_for(${TARGET_NAME})
+
 ie_faster_build(${TARGET_NAME}
    UNITY
    PCH PRIVATE "precomp.hpp"
--- a/inference-engine/tests_deprecated/functional/shared_tests/io_blob_tests/cropResize_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/io_blob_tests/cropResize_tests.hpp
@ -25,8 +25,6 @@
 #include <ngraph_functions/builders.hpp>
 #include <functional_test_utils/blob_utils.hpp>

-#include "ie_parallel.hpp"
-
 using namespace ::testing;
 using namespace InferenceEngine;

--- a/inference-engine/tests_deprecated/functional/shared_tests/io_blob_tests/dims_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/io_blob_tests/dims_tests.hpp
@ -49,6 +49,7 @@ protected:
    }

    void TearDown() override {
+        PluginCache::get().reset();
    }

    std::string ConvNet(const int batch, TBlob<uint8_t>::Ptr &weights) {
--- a/inference-engine/tests_deprecated/functional/shared_tests/io_blob_tests/layout_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/io_blob_tests/layout_tests.hpp
@ -139,6 +139,7 @@ protected:
    }

    void TearDown() override {
+        PluginCache::get().reset();
    }

    template <Precision::ePrecision PRC>
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@ -83,7 +83,6 @@ source_group("include" FILES ${TEST_INCLUDE})
 # create target

 add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE})
-set_ie_threading_interface_for(${TARGET_NAME})

 target_include_directories(${TARGET_NAME} PRIVATE
        ${IE_MAIN_SOURCE_DIR}/src/gna_plugin
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_pwl_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_pwl_test.cpp
@ -147,7 +147,7 @@ TEST_F(PWLAproximationTest, forReLUonRecursiveAlgoWithSegmentThresholdIsSuccess)
                                .propagate_forward()
                                .called_with()
                                .pwl_quantization_activation(DnnActivationType::kActRelu)
-                                .pwl_quantization_segments_threshold(2);
+                                .pwl_quantization_segments_threshold(4);
 }

 TEST_F(PWLAproximationTest, forLeakyReLUonRecursiveAlgoWithSegmentThresholdIsSuccess) {
@ -157,7 +157,7 @@ TEST_F(PWLAproximationTest, forLeakyReLUonRecursiveAlgoWithSegmentThresholdIsSuc
                                .propagate_forward()
                                .called_with()
                                .pwl_quantization_activation(DnnActivationType::kActLeakyRelu)
-                                .pwl_quantization_segments_threshold(2);
+                                .pwl_quantization_segments_threshold(4);
 }

 TEST_F(PWLAproximationTest, DISABLED_forIdentityOnRecursiveAlgoWithSegmentThresholdIsSuccess) {
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
@ -102,8 +102,14 @@ class PWLMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
        }

        switch (slopeChangedTimes) {
-            case 2 : return kActRelu; // also relu has y=0 segment while identity doenst have
-            case 3 : return kActIdentity;
+            case 3 :
+                if (comp.op.pwl.num_segments == 4) {
+                    // ReLU has y=0 segment while identity doesn't have
+                    // 2 segments are added: one at the begining and one at the end, due to saturation errata
+                    return kActRelu;
+                } else {
+                    return kActIdentity;
+                }
            default:
                // currently cannot determine between sigmoid or tanh etc
                if (slopeChangedTimes > 3) {
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
@ -36,8 +36,15 @@ void prepare_quantization::prepare_packed_quantize(program_impl& p) {

            auto levels = quantize_node.get_primitive()->levels;

-            auto &input_low = quantize_node.get_dependency(1).template as<data>();
-            auto &input_high = quantize_node.get_dependency(2).template as<data>();
+            program_node &input_low_node = quantize_node.get_dependency(1);
+            program_node &input_high_node = quantize_node.get_dependency(2);
+
+            if (!input_low_node.is_type<data>() || !input_high_node.is_type<data>()) {
+                return;
+            }
+
+            auto &input_low = input_low_node.as<data>();
+            auto &input_high = input_high_node.as<data>();

            auto &mem_input_low = input_low.get_attached_memory();
            auto &mem_input_high = input_high.get_attached_memory();
@ -99,10 +106,20 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
            if (levels == 2 || levels > 256 || quantize_node.get_scale_shift_opt() || quantize_node.is_constant())
                return;

-            auto &input_low = quantize_node.get_dependency(1).template as<data>();
-            auto &input_high = quantize_node.get_dependency(2).template as<data>();
-            auto &output_low = quantize_node.get_dependency(3).template as<data>();
-            auto &output_high = quantize_node.get_dependency(4).template as<data>();
+            program_node &input_low_node = quantize_node.get_dependency(1);
+            program_node &input_high_node = quantize_node.get_dependency(2);
+            program_node &output_low_node = quantize_node.get_dependency(3);
+            program_node &output_high_node = quantize_node.get_dependency(4);
+
+            if (!input_low_node.is_type<data>() || !input_high_node.is_type<data>() ||
+                !output_low_node.is_type<data>() || !output_high_node.is_type<data>()) {
+                return;
+            }
+
+            auto &input_low = input_low_node.as<data>();
+            auto &input_high = input_high_node.as<data>();
+            auto &output_low = output_low_node.as<data>();
+            auto &output_high = output_high_node.as<data>();

            auto &mem_input_low = input_low.get_attached_memory();
            auto &mem_input_high = input_high.get_attached_memory();
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@ -60,6 +60,7 @@ extensions/back/ShufflenetReLUReorder.py
 extensions/back/SpecialNodesFinalization.py
 extensions/back/StridedSliceMasksNormalizer.py
 extensions/back/TopKNormalizer.py
+extensions/back/TransposeDFT.py
 extensions/back/TransposeReduceFusing.py
 extensions/back/UselessConcatRemoval.py
 extensions/front/__init__.py
@ -196,6 +197,7 @@ extensions/front/mxnet/elementwise_ext.py
 extensions/front/mxnet/eltwise_scalar_replacers.py
 extensions/front/mxnet/exp_ext.py
 extensions/front/mxnet/expand_dims_ext.py
+extensions/front/mxnet/fft_ext.py
 extensions/front/mxnet/flatten_ext.py
 extensions/front/mxnet/fully_connected_ext.py
 extensions/front/mxnet/gather.py
@ -208,6 +210,7 @@ extensions/front/mxnet/max_ext.py
 extensions/front/mxnet/multibox_detection_ext.py
 extensions/front/mxnet/mx_reshape_reverse.py
 extensions/front/mxnet/mx_reshape_to_reshape.py
+extensions/front/mxnet/MXFFTToDFT.py
 extensions/front/mxnet/MXRepeatReplacer.py
 extensions/front/mxnet/null_ext.py
 extensions/front/mxnet/pad_ext.py
@ -383,10 +386,13 @@ extensions/front/tf/broadcast_ext.py
 extensions/front/tf/bucketize.py
 extensions/front/tf/bucketize_ext.py
 extensions/front/tf/Cast_ext.py
+extensions/front/tf/ComplexAbs.py
+extensions/front/tf/ComplexAbsAfterComplex.py
 extensions/front/tf/concat.py
 extensions/front/tf/concat_ext.py
 extensions/front/tf/const_ext.py
 extensions/front/tf/conv_ext.py
+extensions/front/tf/CorrectRollAxes.py
 extensions/front/tf/crop_and_resize_ext.py
 extensions/front/tf/CropAndResizeReplacement.py
 extensions/front/tf/CTCGreedyDecoder_ext.py
@ -413,6 +419,7 @@ extensions/front/tf/faster_rcnn_support_api_v1.15.json
 extensions/front/tf/faster_rcnn_support_api_v1.7.json
 extensions/front/tf/faster_rcnn_support_api_v2.0.json
 extensions/front/tf/faster_rcnn_support_api_v2.4.json
+extensions/front/tf/fft_ext.py
 extensions/front/tf/fifo_queue_v2_ext.py
 extensions/front/tf/fifo_replacer.py
 extensions/front/tf/fill_ext.py
@ -471,6 +478,7 @@ extensions/front/tf/rfcn_support_api_v1.10.json
 extensions/front/tf/rfcn_support_api_v1.13.json
 extensions/front/tf/rfcn_support_api_v1.14.json
 extensions/front/tf/roll_ext.py
+extensions/front/tf/RollRealImagPack.py
 extensions/front/tf/select_ext.py
 extensions/front/tf/sign_ext.py
 extensions/front/tf/SizeReplacer.py
@ -495,12 +503,14 @@ extensions/front/tf/ssd_toolbox_detection_output.json
 extensions/front/tf/ssd_toolbox_multihead_detection_output.json
 extensions/front/tf/ssd_v2_support.json
 extensions/front/tf/SSDToolboxDetectionOutput.py
+extensions/front/tf/SSliceComplex.py
 extensions/front/tf/swap_deconv_inputs.py
 extensions/front/tf/swish_ext.py
 extensions/front/tf/SwitchMergeOptimization.py
 extensions/front/tf/TensorArrayExtractors.py
 extensions/front/tf/TensorArrayGatherV3.py
 extensions/front/tf/tensorflow_custom_operations_config_update.py
+extensions/front/tf/TFFFTToDFT.py
 extensions/front/tf/TFResizeToInterpolate.py
 extensions/front/tf/TFSliceToSlice.py
 extensions/front/tf/tile_ext.py
@ -667,6 +677,7 @@ extensions/ops/depth_to_space.py
 extensions/ops/dequantize_linear.py
 extensions/ops/DetectionOutput.py
 extensions/ops/detectionoutput_onnx.py
+extensions/ops/dft.py
 extensions/ops/elementwise.py
 extensions/ops/embedding_bag.py
 extensions/ops/Enter.py
@ -695,6 +706,7 @@ extensions/ops/lstm_sequence.py
 extensions/ops/MatMul.py
 extensions/ops/merge.py
 extensions/ops/mvn.py
+extensions/ops/mxfft.py
 extensions/ops/mxrepeat.py
 extensions/ops/mxreshape.py
 extensions/ops/NextIteration.py
@ -760,6 +772,7 @@ extensions/ops/TensorArrayScatter.py
 extensions/ops/TensorArraySize.py
 extensions/ops/TensorArrayWrite.py
 extensions/ops/TensorIterator_ops.py
+extensions/ops/TFFFT.py
 extensions/ops/TFResize.py
 extensions/ops/topk.py
 extensions/ops/topkrois_onnx.py
--- a/model-optimizer/extensions/back/LayoutChangeForGatherND.py
+++ b/model-optimizer/extensions/back/LayoutChangeForGatherND.py
@ -1,13 +1,8 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-import numpy as np
-
-from extensions.ops.transpose import Transpose
-from mo.front.common.partial_infer.utils import int64_array
-from mo.front.tf.graph_utils import create_op_with_const_inputs
-from mo.graph.graph import Graph, Port
 from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph


 class LayoutChangeForGatherND(BackReplacementPattern):
@ -19,31 +14,9 @@ class LayoutChangeForGatherND(BackReplacementPattern):
    force_shape_inference = True
    graph_condition = [lambda graph: graph.graph['fw'] == 'tf']

-    @staticmethod
-    def insert_transpose(graph: Graph, input_port: Port, before_input=True):
-        input_rank = len(input_port.data.get_shape())
-        if input_rank > 3:
-            if before_input:
-                axis_order = np.concatenate((int64_array([0]),
-                                             int64_array(list(range(2, input_rank))),
-                                             int64_array([1])))
-                source_node = input_port.get_source().node
-                transpose_name = source_node.soft_get('name', source_node.id) + '/TransposeToNHWC'
-            else:
-                axis_order = np.concatenate(
-                    (int64_array([0]),
-                     int64_array([input_rank - 1]),
-                     int64_array(list(range(1, input_rank - 1)))))
-                transpose_name = input_port.node.soft_get('name', input_port.node.id) + '/TransposeToNCHW'
-                input_port.node['need_shape_inference'] = True
-                input_port.node['override_output_shape'] = True
-            transpose = create_op_with_const_inputs(graph, Transpose, {1: axis_order}, {'name': transpose_name})
-            input_port.get_connection().insert_node(transpose)
-            transpose['need_shape_inference'] = True
-            transpose['override_output_shape'] = True
-
    def find_and_replace_pattern(self, graph: Graph):
+        import extensions.middle.InsertLayoutPropagationTransposes as InsertTransposes
        for gathernd in graph.get_op_nodes(type='GatherND'):
-            self.insert_transpose(graph, gathernd.in_port(0), before_input=True)
-            self.insert_transpose(graph, gathernd.in_port(1), before_input=True)
-            self.insert_transpose(graph, gathernd.out_port(0), before_input=False)
+            InsertTransposes.insert_transpose(graph, gathernd.in_port(0), before_input=True)
+            InsertTransposes.insert_transpose(graph, gathernd.in_port(1), before_input=True)
+            InsertTransposes.insert_transpose(graph, gathernd.out_port(0), before_input=False)
--- a/model-optimizer/extensions/back/ReverseInputChannels.py
+++ b/model-optimizer/extensions/back/ReverseInputChannels.py
@ -310,6 +310,12 @@ class ReverseChannelsPropagationUp(BackReplacementPattern):
            reverse_channels_copy = reverse_channels.copy_node({'axis': np.array(axis)})

            src = port.get_connection().get_source()
+            if src.node.soft_get('type') == 'Parameter':
+                # For Parameter nodes tensor debug attributes should not move to the last node
+                # of subgraph. It is needed for the proper mapping of input framework name.
+                # For this reason "source" mode is used to keep tensor debug attributes at Parameter node.
+                port.get_connection().set_source(reverse_channels_copy.out_port(0), attributes_save_mode="source")
+            else:
                port.get_connection().set_source(reverse_channels_copy.out_port(0))
            src.connect(reverse_channels_copy.in_port(0))

--- a/model-optimizer/extensions/back/TransposeDFT.py
+++ b/model-optimizer/extensions/back/TransposeDFT.py
@ -0,0 +1,34 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
+
+
+class TransposeDFT(BackReplacementPattern):
+    """
+    In TF models, operation (I)FFTxD has some input shape, [N_0, ..., N_{r - 1}].
+
+    After the transformation SSliceComplexRolledFFTPackBlockReplacement, we have an input shape [N_0, ..., N_{r - 1}, 2]
+    for operation DFT or IDFT.
+
+    If the input rank in the TF model was greater than 2, we have [N_0, 2, N_1, ..., N_{r - 1}] as the input shape of
+    (I)DFT after the layout conversion, if the option '--disable_nhwc_to_nchw' is not specified.
+
+    But, generally speaking, according to DFT and IDFT specifications, the input shape [N_0, 2, N_1, ..., N_{r - 1}]
+    is not correct input shape for DFT and IDFT. Hence, we need to insert Transpose operations before and after (I)DFT
+    in such cases.
+
+    This transformation inserts such Transpose nodes, when the source model was the TF model, (I)DFT node has the
+    attribute 'need_insert_transposes_for_dft', and this attribute is True.
+    """
+    enabled = True
+    force_shape_inference = True
+    graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
+
+    def find_and_replace_pattern(self, graph: Graph):
+        import extensions.middle.InsertLayoutPropagationTransposes as InsertTransposes
+        for dft in graph.get_op_nodes(need_insert_transposes_for_dft=True):
+            InsertTransposes.insert_transpose(graph, dft.in_port(0), before_input=True)
+            InsertTransposes.insert_transpose(graph, dft.out_port(0), before_input=False)
--- a/model-optimizer/extensions/front/ThresholdedReluDecomposition.py
+++ b/model-optimizer/extensions/front/ThresholdedReluDecomposition.py
@ -36,3 +36,4 @@ class ThresholdedReluDecomposition(FrontReplacementPattern):
            mul.in_port(1).connect(float_greater.out_port(0))

            rename_nodes([(node, name + '/TBR'), (mul, name)])
+            graph.remove_node(node.id)
--- a/model-optimizer/extensions/front/mxnet/MXFFTToDFT.py
+++ b/model-optimizer/extensions/front/mxnet/MXFFTToDFT.py
@ -0,0 +1,140 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+
+from extensions.ops.dft import DFT, IDFT
+from extensions.ops.elementwise import Add, Sub
+from extensions.ops.rank import Rank
+from extensions.ops.scatter import ScatterUpdate
+from extensions.ops.split import Split
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.graph_utils import create_op_with_const_inputs
+from mo.graph.graph import Graph, Node, rename_nodes
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.ops.broadcast import Broadcast
+from mo.ops.concat import Concat
+from mo.ops.pad import Pad
+from mo.ops.reshape import Reshape
+from mo.ops.squeeze import Squeeze
+from mo.ops.unsqueeze import Unsqueeze
+
+
+class MXFFTToDFT(FrontReplacementSubgraph):
+    """
+    This transformation converts the operation MXFFT into OpenVINO DFT (if the attribute 'is_inverse' is False),
+    or into OpenVINO IDFT (otherwise).
+
+    According to https://mxnet.apache.org/versions/1.0.0/api/python/symbol/contrib.html#mxnet.symbol.contrib.fft,
+    MxNet operation FFT accept 2 input data shapes: [N, d] or [N_1, N_2, N_3, d], data can only be real numbers.
+    The output data has shape: [N, 2*d] or [N_1, N_2, N_3, 2*d]. The format is: [real0, imag0, real1, imag1, ...].
+
+    Next, MxNet operation IFFT accept 2 input data shapes: [N, d] or [N_1, N_2, N_3, d]. Data is in format:
+    [real0, imag0, real1, imag1, ...]. Last dimension must be an even number. The output data has shape: [N, d/2] or
+    [N_1, N_2, N_3, d/2]. It is only the real part of the result.
+
+    But OpenVINO DFT and IDFT operations uses complex input data represented as real tensors of the shape
+    [N_1, ..., N_r, 2]. Also, the result of OpenVINO DFT and IDFT operations is always complex but represented as
+    a real tensor of the shape [M_1, ..., M_r, 2]. If OpenVINO DFT or IDFT have no input signal_size, the output shape
+    and the input shape are the same.
+
+    Hence, to convert MxNet FFT to OpenVINO DFT, we need
+    1) to convert input data from the shape [N, d] or [N_1, N_2, N_3, d] to shape [N, d, 1] or [N_1, N_2, N_3, d, 1]
+       respectively;
+    2) to pad converted data using pads_begin = [0, 0, 0] and pads_end = [0, 0, 1] for MxNet FFT input shape [N, d], or
+       using pads_begin [0, 0, 0, 0, 0] and pads_end = [0, 0, 0, 0, 1] for MxNet FFT input shape [N_1, N_2, N_3, d],
+       with mode=constant;
+    3) to put padded data into DFT input 0, using (-1) in 'axes' input;
+    4) to reshape calculated DFT output to the shape [N, 2 * d] for for MxNet FFT input shape [N, d], or to the shape
+       [N_1, N_2, N_3, 2 * d]
+
+    Finally, to convert MxNet IFFT to OpenVINO IDFT, we need
+    1) to reshape input data from the shape [N, d] or [N_1, N_2, N_3, d] to shape [N, d // 2, 2] or
+       [N_1, N_2, N_3, d // 2, 2] respectively;
+    2) to put reshaped input data to the input 0 of IDFT, using (-1) in 'axes' input;
+    3) to get real parts using Split + Squeeze.
+    """
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for mx_fft in graph.get_op_nodes(op='MXFFT'):
+            if mx_fft.soft_get('is_inverse', False):
+                self.convert_ifft_to_dft(graph, mx_fft)
+            else:
+                self.convert_fft_to_dft(graph, mx_fft)
+
+    def convert_fft_to_dft(self, graph: Graph, mx_fft: Node):
+        mx_fft_name = mx_fft.soft_get('name', mx_fft.id)
+        unsqueeze_node = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array([-1])},
+                                                     {'name': mx_fft_name + '/Unsqueeze'})
+        rank_node = Rank(graph, {'name': mx_fft_name + '/Rank'}).create_node()
+
+        mx_fft_connection = mx_fft.in_port(0).get_connection()
+        mx_fft_connection.set_destination(unsqueeze_node.in_port(0))
+        mx_fft_connection.get_source().connect(rank_node.in_port(0))
+
+        add_node = create_op_with_const_inputs(graph, Add, {1: int64_array(1)},
+                                               {'name': mx_fft_name + '/Add'}, rank_node)
+        broadcast_node1 = create_op_with_const_inputs(graph, Broadcast, {0: int64_array(0)},
+                                                         {'name': mx_fft_name + '/Pad_broadcast'})
+        add_node.out_port(0).connect(broadcast_node1.in_port(1))
+
+        scatter_node = create_op_with_const_inputs(graph, ScatterUpdate,
+                                                   {2: int64_array(1), 3: int64_array(0)},
+                                                   {'name': mx_fft_name + '/ScatterUpdate'})
+        broadcast_node1.out_port(0).connect(scatter_node.in_port(0))
+        rank_node.out_port(0).connect(scatter_node.in_port(1))
+
+        pad_node = Pad(graph, {'name': mx_fft_name + '/Pad', 'mode': 'constant'}).create_node([unsqueeze_node,
+                                                                                               broadcast_node1,
+                                                                                               scatter_node])
+
+        dft_node = create_op_with_const_inputs(graph, DFT, {1: int64_array([-1])},
+                                               {'name': mx_fft_name + '/DFT', 'in_ports_count': 2},
+                                               pad_node)
+
+        sub_node = create_op_with_const_inputs(graph, Sub, {1: int64_array(1)}, {'name': mx_fft_name + '/Sub'})
+        rank_node.out_port(0).connect(sub_node.in_port(0))
+        broadcast_node2 = create_op_with_const_inputs(graph, Broadcast, {0: int64_array(0)},
+                                                      {'name': mx_fft_name + '/Reshape_broadcast'})
+        sub_node.out_port(0).connect(broadcast_node2.in_port(1))
+        concat_node = create_op_with_const_inputs(graph, Concat, {1: int64_array([-1, 2])},
+                                                  {'name': mx_fft_name + '/New_shape', 'in_ports_count': 2, 'axis': 0},
+                                                  broadcast_node2)
+
+        reshape_node = Reshape(graph, {}).create_node([dft_node, concat_node])
+
+        mx_fft.out_port(0).get_connection().set_source(reshape_node.out_port(0))
+        rename_nodes([(mx_fft, mx_fft_name + '/to_be_removed'), (reshape_node, mx_fft_name)])
+
+    def convert_ifft_to_dft(self, graph: Graph, mx_fft: Node):
+        mx_fft_name = mx_fft.soft_get('name', mx_fft.id)
+
+        rank_node = Rank(graph, {'name': mx_fft_name + '/rank'}).create_node()
+        sub_node = create_op_with_const_inputs(graph, Sub, {1: int64_array(1)}, {'name': mx_fft_name + '/Sub'})
+        rank_node.out_port(0).connect(sub_node.in_port(0))
+        broadcast_node0 = create_op_with_const_inputs(graph, Broadcast, {0: int64_array(0)},
+                                                      {'name': mx_fft_name + '/broadcast'})
+        sub_node.out_port(0).connect(broadcast_node0.in_port(1))
+        concat_node = create_op_with_const_inputs(graph, Concat, {1: int64_array([-1, 2])},
+                                                  {'name': mx_fft_name + '/new_shape', 'in_ports_count': 2, 'axis': 0},
+                                                  broadcast_node0)
+
+        reshape_node = Reshape(graph, {'name': mx_fft_name + '/reshape'}).create_node()
+        concat_node.out_port(0).connect(reshape_node.in_port(1))
+
+        mx_fft_connection = mx_fft.in_port(0).get_connection()
+        mx_fft_connection.set_destination(reshape_node.in_port(0))
+        mx_fft_connection.get_source().connect(rank_node.in_port(0))
+
+        dft_node = create_op_with_const_inputs(graph, IDFT, {1: int64_array([-1])},
+                                               {'name': mx_fft_name + '/idft', 'in_ports_count': 2},
+                                               reshape_node)
+
+        split_node = create_op_with_const_inputs(graph, Split, {1: int64_array(-1)},
+                                                 {'name': mx_fft_name + '/split', 'num_splits': 2},
+                                                 dft_node)
+        squeeze_node = create_op_with_const_inputs(graph, Squeeze, {1: int64_array([-1])}, {}, split_node)
+
+        mx_fft.out_port(0).get_connection().set_source(squeeze_node.out_port(0))
+        rename_nodes([(mx_fft, mx_fft_name + '/to_be_removed'), (squeeze_node, mx_fft_name)])
--- a/model-optimizer/extensions/front/mxnet/fft_ext.py
+++ b/model-optimizer/extensions/front/mxnet/fft_ext.py
@ -0,0 +1,25 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from extensions.ops.mxfft import MXFFT
+from mo.front.extractor import FrontExtractorOp
+
+
+class FFTFrontExtractor(FrontExtractorOp):
+    op = 'fft'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        MXFFT.update_node_stat(node, {'is_inverse': False})
+        return cls.enabled
+
+
+class IFFTFrontExtractor(FrontExtractorOp):
+    op = 'ifft'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        MXFFT.update_node_stat(node, {'is_inverse': True})
+        return cls.enabled
--- a/model-optimizer/extensions/front/tf/ComplexAbs.py
+++ b/model-optimizer/extensions/front/tf/ComplexAbs.py
@ -0,0 +1,36 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from extensions.ops.elementwise import Pow
+from extensions.ops.ReduceOps import ReduceSum
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.graph_utils import create_op_with_const_inputs
+from mo.graph.graph import Graph, rename_nodes
+from mo.middle.passes.convert_data_type import data_type_str_to_np
+
+
+class ComplexAbs(FrontReplacementSubgraph):
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.tf.ComplexAbsAfterComplex import ComplexAbsAfterComplex
+        return [ComplexAbsAfterComplex]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for complex_abs in graph.get_op_nodes(op='ComplexAbs'):
+            complex_abs_name = complex_abs.soft_get('name', complex_abs.id)
+            power_type = data_type_str_to_np(graph.graph['cmd_params'].data_type)
+
+            squared = create_op_with_const_inputs(graph, Pow, {1: power_type(2.0)},
+                                                  {'name': complex_abs_name + '/squared_parts'})
+            complex_abs.in_port(0).get_connection().set_destination(squared.in_port(0))
+            sum = create_op_with_const_inputs(graph, ReduceSum, {1: int64_array(-1)},
+                                              {'name': complex_abs_name + '/squared_abs'},
+                                              squared)
+            sqrt = create_op_with_const_inputs(graph, Pow, {1: power_type(0.5)}, {}, sum)
+
+            complex_abs.out_port(0).get_connection().set_source(sqrt.out_port(0))
+
+            rename_nodes([(complex_abs, complex_abs_name + '/to_be_removed'), (sqrt, complex_abs_name)])
--- a/model-optimizer/extensions/front/tf/ComplexAbsAfterComplex.py
+++ b/model-optimizer/extensions/front/tf/ComplexAbsAfterComplex.py
@ -0,0 +1,69 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from extensions.ops.elementwise import Add, Pow
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.subgraph_matcher import SubgraphMatch
+from mo.front.tf.graph_utils import create_op_with_const_inputs
+from mo.graph.graph import Graph, rename_nodes
+from mo.middle.passes.convert_data_type import data_type_str_to_np
+
+
+class ComplexAbsAfterComplex(FrontReplacementSubgraph):
+    """
+    This transformation converts a sub-graph
+
+    SomeOp1    SomeOp2
+       |          |
+       ------------
+            |
+         Complex
+            |
+        ComplexAbs
+
+    into the sub-graph
+
+                SomeOp1      SomeOp2
+                   |           |
+     Constant[2]--Pow         Pow--Constant[2]
+                   |           |
+                   -------------
+                        Add
+                         |
+                        Pow--Constant[0.5]
+    """
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('complex', dict(op='Complex')),
+                ('abs', dict(op='ComplexAbs')),
+            ],
+            edges=[
+                ('complex', 'abs', {'in': 0}),
+            ])
+
+    def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
+        cmp = match['complex']
+        complex_abs = match['abs']
+        complex_abs_name = complex_abs.soft_get('name', complex_abs.id)
+
+        power_type = data_type_str_to_np(graph.graph['cmd_params'].data_type)
+
+        pow0 = create_op_with_const_inputs(graph, Pow, {1: power_type(2.0)},
+                                           {'name': complex_abs_name + '/real_part_squared'})
+        pow1 = create_op_with_const_inputs(graph, Pow, {1: power_type(2.0)},
+                                           {'name': complex_abs_name + '/imag_part_squared'})
+
+        cmp.in_port(0).get_connection().set_destination(pow0.in_port(0))
+        cmp.in_port(1).get_connection().set_destination(pow1.in_port(0))
+
+        add = Add(graph, {'name': complex_abs_name + '/squared_abs'}).create_node([pow0, pow1])
+        sqrt = create_op_with_const_inputs(graph, Pow, {1: power_type(0.5)}, {})
+        add.out_port(0).connect(sqrt.in_port(0))
+
+        complex_abs.out_port(0).get_connection().set_source(sqrt.out_port(0))
+
+        rename_nodes([(complex_abs, complex_abs_name + '/to_be_removed'), (sqrt, complex_abs_name)])
--- a/model-optimizer/extensions/front/tf/CorrectRollAxes.py
+++ b/model-optimizer/extensions/front/tf/CorrectRollAxes.py
@ -0,0 +1,27 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.graph_utils import add_constant_to_negative_values
+from mo.graph.graph import Graph
+
+
+class CorrectRollAxes(FrontReplacementSubgraph):
+    """
+    The transformation SSliceComplex removes 2 StridedSlice and Complex operation. If the Roll node is a consumer
+    of Complex node in the original TF model, then we have a real input tensor for Roll instead of a complex.
+    Negative axes values for the Roll operation should be updated to reflect the fact that the rank of input tensor was
+    increased by one (a new trailing dimension of size 2 containing real and imaginary part of complex number is added).
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.tf.SSliceComplex import SSliceComplex
+        return [SSliceComplex]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for roll in graph.get_op_nodes(op='Roll', input_rank_changed=True):
+            add_constant_to_negative_values(roll, 2, int64_array(-1))
+            del roll['input_rank_changed']
--- a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
+++ b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
@ -765,6 +765,7 @@ class ObjectDetectionAPIPreprocessor2Replacement(FrontReplacementFromConfigFileG
                else:  # case 1
                    # change output of the end_node to be produced with the last preprocessing op
                    end_node.out_port(0).get_connection().set_source(pre_processing_ops[-1][0].out_port(0))
+                    start_node.in_port(0).disconnect()
        else:  # simply remove the nodes in between start_node and end_node (including them). Case 3 and 6
            end_node.out_port(0).get_connection().set_source(start_node.in_port(0).get_source())

--- a/model-optimizer/extensions/front/tf/RollRealImagPack.py
+++ b/model-optimizer/extensions/front/tf/RollRealImagPack.py
@ -0,0 +1,73 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.subgraph_matcher import SubgraphMatch
+from mo.front.tf.graph_utils import add_constant_to_negative_values
+from mo.graph.graph import Graph
+
+
+class RollRealImagPack(FrontReplacementSubgraph):
+    """
+    Some TF models contain Roll for complex data, as a part of the sub-graph
+
+        input   shift   axes
+          |       |      |
+         -------------------
+                 Roll
+                  |
+         -------------------
+         |                 |
+        Real              Imag
+         |                 |
+         -------     -------
+               |    |
+                Pack
+                  |
+                SomeOp
+
+    This sub-graph can be replaced with the sub-graph
+
+        input   shift   axes
+          |       |      |
+         -------------------
+                 Roll
+                  |
+                SomeOp
+
+    But after such replacement, we should correct axes of Roll, because input data are real now. Namely, if
+    there are negative axes for Roll, we need subtract 1 from such axes indices.
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.tf.SSliceComplex import SSliceComplex
+        return [SSliceComplex]
+
+    def run_before(self):
+        from extensions.front.Pack import Pack
+        return [Pack]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('unroll', dict(op='Roll')),
+                ('real', dict(op='Real')),
+                ('imag', dict(op='Imag')),
+                ('pack', dict(op='Pack')),
+            ],
+            edges=[
+                ('unroll', 'real', {'in': 0}),
+                ('unroll', 'imag', {'in': 0}),
+                ('real', 'pack', {'in': 0}),
+                ('imag', 'pack', {'in': 1}),
+            ])
+
+    def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
+        unroll = match['unroll']
+        add_constant_to_negative_values(unroll, 2, int64_array(-1))
+        pack = match['pack']
+        pack.out_port(0).get_connection().set_source(unroll.out_port(0))
+        graph.remove_nodes_from([match['real'].id, match['imag'].id])
--- a/model-optimizer/extensions/front/tf/SSliceComplex.py
+++ b/model-optimizer/extensions/front/tf/SSliceComplex.py
@ -0,0 +1,70 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import logging as log
+
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.subgraph_matcher import SubgraphMatch
+from mo.graph.graph import Graph
+
+
+class SSliceComplex(FrontReplacementSubgraph):
+    """
+    Some TF models contain the sub-graph
+               SomeOp
+                 |
+    --------------------------
+    |                        |
+    StridedSlice          StridedSlice
+    |                       |
+    ------------------------
+         Complex
+          |
+          |       other inputs
+          |       |  ...  |
+         -------------------
+                 SomeOp1
+
+    Here SomeOp is some node with real output and with the shape [N_0, ..., N_{r - 1}, 2], and StridedSlice nodes
+    have output shapes [N_0, ..., N_{r - 1}].
+
+    But MO and Inference Engine do not support complex tensors. Hence, we need to replace this sub-graph with
+
+         SomeOp   other inputs
+          |       |  ...  |
+         -------------------
+                 SomeOp1
+
+    After this transformation we need to mark SomeOp1 operation that its input rank has changed because
+    its inputs/attributes should probably be updated. Currently we have such a case for a Roll operation.
+    """
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('strided_slice_real', dict(op='StridedSlice')),
+                ('strided_slice_imag', dict(op='StridedSlice')),
+                ('complex', dict(op='Complex')),
+            ],
+            edges=[
+                ('strided_slice_real', 'complex', {'in': 0}),
+                ('strided_slice_imag', 'complex', {'in': 1}),
+            ])
+
+    def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
+        strided_slice_real = match['strided_slice_real']
+        strided_slice_imag = match['strided_slice_imag']
+
+        real_input = strided_slice_real.in_port(0).get_source().node
+        imag_input = strided_slice_imag.in_port(0).get_source().node
+        if real_input.id != imag_input.id:
+            log.debug('The pattern does not correspond to operation for complex tensor. Different inputs.')
+            return
+
+        complex_node = match['complex']
+        for dst in complex_node.out_port(0).get_connection().get_destinations():
+            after_complex_node = dst.node
+            after_complex_node['input_rank_changed'] = True
+        complex_node.out_port(0).get_connection().set_source(strided_slice_real.in_port(0).get_source())
--- a/model-optimizer/extensions/front/tf/TFFFTToDFT.py
+++ b/model-optimizer/extensions/front/tf/TFFFTToDFT.py
@ -0,0 +1,38 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from extensions.ops.dft import DFT, IDFT
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.graph_utils import create_op_with_const_inputs
+from mo.graph.graph import Graph, rename_nodes
+
+
+class TFFFTToDFT(FrontReplacementSubgraph):
+    """
+    This transformation converts the operation TFFFT into OpenVINO DFT (if the attribute 'is_inverse' is False),
+    or into OpenVINO IDFT (otherwise).
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.tf.RollRealImagPack import RollRealImagPack
+        return [RollRealImagPack]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for tf_fft in graph.get_op_nodes(op='TFFFT'):
+            tf_fft_name = tf_fft.soft_get('name', tf_fft.id)
+
+            num_of_dims = tf_fft.soft_get('num_of_dimensions', 1)
+            axes = int64_array(range(-num_of_dims, 0))
+            op = IDFT if tf_fft.soft_get('is_inverse', False) else DFT
+            dft_node = create_op_with_const_inputs(graph, op, {1: axes}, {'in_ports_count': 2},
+                                                   tf_fft.in_port(0).get_source().node)
+
+            tf_fft.out_port(0).get_connection().set_source(dft_node.out_port(0))
+
+            rename_nodes([(tf_fft, tf_fft_name + '/to_be_removed'), (dft_node, tf_fft_name)])
+
+            if graph.graph['layout'] == 'NHWC':
+                dft_node['need_insert_transposes_for_dft'] = True
--- a/model-optimizer/extensions/front/tf/fft_ext.py
+++ b/model-optimizer/extensions/front/tf/fft_ext.py
@ -0,0 +1,71 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from extensions.ops.TFFFT import TFFFT
+from mo.front.extractor import FrontExtractorOp
+
+
+class FFT1DOpFrontExtractor(FrontExtractorOp):
+    op = 'FFT'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {'num_of_dimensions': 1, 'is_inverse': False}
+        TFFFT.update_node_stat(node, attrs)
+        return cls.enabled
+
+
+class FFT2DOpFrontExtractor(FrontExtractorOp):
+    op = 'FFT2D'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {'num_of_dimensions': 2, 'is_inverse': False}
+        TFFFT.update_node_stat(node, attrs)
+        return cls.enabled
+
+
+class FFT3DOpFrontExtractor(FrontExtractorOp):
+    op = 'FFT3D'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {'num_of_dimensions': 3, 'is_inverse': False}
+        TFFFT.update_node_stat(node, attrs)
+        return cls.enabled
+
+
+class IFFT1DOpFrontExtractor(FrontExtractorOp):
+    op = 'IFFT'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {'num_of_dimensions': 1, 'is_inverse': True}
+        TFFFT.update_node_stat(node, attrs)
+        return cls.enabled
+
+
+class IFFT2DOpFrontExtractor(FrontExtractorOp):
+    op = 'IFFT2D'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {'num_of_dimensions': 2, 'is_inverse': True}
+        TFFFT.update_node_stat(node, attrs)
+        return cls.enabled
+
+
+class IFFT3DOpFrontExtractor(FrontExtractorOp):
+    op = 'IFFT3D'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {'num_of_dimensions': 3, 'is_inverse': True}
+        TFFFT.update_node_stat(node, attrs)
+        return cls.enabled
--- a/model-optimizer/extensions/middle/InsertLayoutPropagationTransposes.py
+++ b/model-optimizer/extensions/middle/InsertLayoutPropagationTransposes.py
@ -1,10 +1,12 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+import numpy as np
+
 from extensions.middle.pass_separator import PostMiddleStart
 from extensions.ops.transpose import Transpose
-
-from mo.graph.graph import Graph, Node
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph, Node, Port
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import PermuteAttrs

@ -159,3 +161,28 @@ def mark_as_correct_data_layout(node: Node):

    for ind, port in node.out_ports().items():
        mark_output_as_in_correct_layout(node, ind)
+
+
+def insert_transpose(graph: Graph, input_port: Port, before_input=True):
+    from mo.front.tf.graph_utils import create_op_with_const_inputs
+
+    input_rank = len(input_port.data.get_shape())
+    if input_rank > 3:
+        if before_input:
+            axis_order = np.concatenate((int64_array([0]),
+                                         int64_array(list(range(2, input_rank))),
+                                         int64_array([1])))
+            source_node = input_port.get_source().node
+            transpose_name = source_node.soft_get('name', source_node.id) + '/TransposeToNHWC'
+        else:
+            axis_order = np.concatenate(
+                (int64_array([0]),
+                 int64_array([input_rank - 1]),
+                 int64_array(list(range(1, input_rank - 1)))))
+            transpose_name = input_port.node.soft_get('name', input_port.node.id) + '/TransposeToNCHW'
+            input_port.node['need_shape_inference'] = True
+            input_port.node['override_output_shape'] = True
+        transpose = create_op_with_const_inputs(graph, Transpose, {1: axis_order}, {'name': transpose_name})
+        input_port.get_connection().insert_node(transpose)
+        transpose['need_shape_inference'] = True
+        transpose['override_output_shape'] = True
--- a/model-optimizer/extensions/ops/TFFFT.py
+++ b/model-optimizer/extensions/ops/TFFFT.py
@ -0,0 +1,31 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class TFFFT(Op):
+    """
+    This operation is intended to read TF operations FFT, FFT2D, FFT3D, IFFT, IFFT2D, IFFT3D.
+    The operation TFFFT has two attributes: an integer attribute num_of_dimensions and a boolean attribute is_inverse.
+
+    If an operation to read is FFT, FFT2D, or FFT3D, then the attribute 'is_inverse' is False, and True otherwise.
+    The attribute 'num_of_dimensions' is equal to number of transformed axes, i.e. 1 for FFT and IFFT, 2 for FFT2D and
+    IFFT2D, 3 for FFT3D and IFFT3D.
+
+    The transformation TFFFTToDFT converts the operation TFFFT into MO DFT (if the attribute 'is_inverse' is False),
+    or into MO IDFT (otherwise).
+    """
+    op = 'TFFFT'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'op': self.op,
+            'out_ports_count': 1,
+            'in_ports_count': 1,
+        }
+        assert 'is_inverse' in attrs, 'Attribute is_inverse is not given for the operation TFFFT.'
+        assert 'num_of_dimensions' in attrs, 'Attribute num_of_dimensions is not given for the operation TFFFT.'
+        super().__init__(graph, mandatory_props, attrs)
--- a/model-optimizer/extensions/ops/dft.py
+++ b/model-optimizer/extensions/ops/dft.py
@ -0,0 +1,127 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class FFTBase(Op):
+    enabled = False
+    op = None
+    version = 'opset7'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'out_ports_count': 1,
+            'in_ports_count': 3,
+            'version': self.version,
+            'infer': self.infer
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def infer(self, node: Node):
+        node_name = node.soft_get(node.name, node.id)
+        assert len([p for p in node.in_ports().values() if not p.disconnected()]) in [2, 3], \
+            '(I)DFT node {} must have 2 or 3 inputs'.format(node_name)
+
+        src_shape = node.in_port(0).data.get_shape()
+        assert src_shape is not None, 'The input data shape of (I)DFT node {} must not be None'.format(node_name)
+        assert src_shape[-1] == 2, \
+            'The last dimension of input shape of (I)DFT node {} should be equal to 2'.format(node_name)
+
+        input_rank = len(src_shape)
+        assert input_rank >= 2, 'The input rank of (I)DFT node {} should be greater or equal to 2'.format(node_name)
+
+        axes = FFTBase.get_axes(node)
+        assert input_rank >= len(axes) + 1, \
+            'The input rank must be greater than number of (I)DFT node {} axes'.format(node_name)
+        axes = FFTBase.canonicalize_axes(axes, input_rank)
+        assert (input_rank - 1) not in axes, '(I)DFT node {} axes cannot contain the last axis'.format(node_name)
+        assert len(set(axes)) == len(axes), '(I)DFT node {} axes must be unique.'.format(node_name)
+
+        output_shape = int64_array(src_shape)
+        if node.is_in_port_connected(2):
+            signal_size = FFTBase.get_signal_size(node)
+            signal_size = FFTBase.canonicalize_signal_size(signal_size, axes, src_shape)
+            output_shape[axes] = signal_size
+
+        node.out_port(0).data.set_shape(output_shape)
+
+    @staticmethod
+    def canonicalize_axes(axes, input_rank):
+        """
+        FFT operation supports for negative axes to transform. More precisely, according to the FFT operation
+        specification, axes should be integers from -(r - 1) to (r - 2) inclusively, where r = rank(data).
+        A negative axis 'a' is interpreted as an axis 'r - 1 + a'. The reason is the following: real input
+        tensor of the shape [n_0, ..., n_{r - 1}, 2] is interpreted as a complex tensor with the shape
+        [n_0, ..., n_{r - 1}]. Hence, we need to 'canonicalize' axes using the formula 'r - 1 + a'.
+
+        :param axes: axes to canonicalize
+        :param input_rank: input tensor rank
+        :return: canonicalized axes
+        """
+        result = axes.copy()
+        for i, axis in enumerate(axes):
+            if axis < 0:
+                result[i] = axis + input_rank - 1
+        return result
+
+    @staticmethod
+    def canonicalize_signal_size(signal_size, axes, input_shape):
+        result = signal_size.copy()
+        for i, axis in enumerate(axes):
+            size = signal_size[i]
+            if size == -1:
+                result[i] = input_shape[axis]
+        return result
+
+    @staticmethod
+    def get_axes(node: Node):
+        axes = node.in_port(1).get_source().data.get_value()
+        node_name = node.soft_get('name', node.id)
+        assert axes is not None, 'The input with axes is not constant for node {}'.format(node_name)
+        return int64_array(axes)
+
+    @staticmethod
+    def get_signal_size(node: Node):
+        src_shape = node.in_port(0).data.get_shape()
+        assert src_shape is not None
+        input_rank = len(src_shape)
+        if node.is_in_port_connected(2):
+            signal_size = node.in_port(2).get_source().data.get_value()
+        else:
+            axes = FFTBase.get_axes(node)
+            signal_size = [src_shape[: input_rank - 1][a] for a in axes]
+
+        node_name = node.soft_get('name', node.id)
+        assert signal_size is not None, 'The input with signal_size is not constant for node {}'.format(node_name)
+
+        return int64_array(signal_size)
+
+
+class DFT(FFTBase):
+    op = 'DFT'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': self.op,
+            'op': self.op,
+        }
+        mandatory_props.update(attrs)
+        super().__init__(graph, mandatory_props)
+
+
+class IDFT(FFTBase):
+    op = 'IDFT'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': self.op,
+            'op': self.op,
+        }
+        mandatory_props.update(attrs)
+        super().__init__(graph, mandatory_props)
--- a/model-optimizer/extensions/ops/mxfft.py
+++ b/model-optimizer/extensions/ops/mxfft.py
@ -0,0 +1,42 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph, Node
+from mo.ops.op import Op
+
+
+class MXFFT(Op):
+    """
+    This operation is intended to read MxNet operations FFT and IFFT.
+    The operation MXFFT has one attribute: a boolean attribute is_inverse.
+
+    If an operation to read is FFT, then the attribute 'is_inverse' is False, and True otherwise.
+
+    The transformation MXFFTToDFT converts the operation MXFFT into MO DFT (if the attribute 'is_inverse'
+    is False), or into MO IDFT (otherwise).
+    """
+    op = 'MXFFT'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'op': self.op,
+            'out_ports_count': 1,
+            'in_ports_count': 1,
+            'infer': self.infer
+        }
+        assert 'is_inverse' in attrs, 'Attribute is_inverse is not given for the operation MXFFT.'
+        super().__init__(graph, mandatory_props, attrs)
+
+    def infer(self, node: Node):
+        node_name = node.soft_get('name', node.id)
+        input_shape = node.in_port(0).data.get_shape()
+        assert input_shape is not None, 'Input shape of MXFFT node {} must not be None'.format(node_name)
+        is_inverse = node.soft_get('is_inverse', False)
+        output_shape = input_shape.copy()
+        if is_inverse:
+            output_shape[-1] = output_shape[-1] // 2
+        else:
+            output_shape[-1] = output_shape[-1] * 2
+        node.out_port(0).data.set_shape(int64_array(output_shape))
--- a/model-optimizer/extensions/ops/roll.py
+++ b/model-optimizer/extensions/ops/roll.py
@ -8,7 +8,8 @@ from mo.ops.op import Op


 class Roll(Op):
-    """ Roll operation that shifts elements of a tensor along specified axes.
+    """
+    Roll operation that shifts elements of a tensor along specified axes.
    """
    op = 'Roll'
    enabled = False
@ -26,7 +27,6 @@ class Roll(Op):

 class AttributedRoll(Op):
    """ Roll operation that shifts elements of a tensor along specified axes.
-
        This operation uses the same semantics as Roll but with shift and axes specified as attributes.
        Shift and axes are specified as attributes in MxNet.
    """
--- a/model-optimizer/mo/front/tf/graph_utils.py
+++ b/model-optimizer/mo/front/tf/graph_utils.py
@ -10,6 +10,7 @@ import numpy as np
 from extensions.middle.InsertLayoutPropagationTransposes import mark_input_as_in_correct_layout, \
    mark_output_as_in_correct_layout
 from extensions.ops.activation_ops import Sigmoid
+from extensions.ops.elementwise import Add, Less, Mul
 from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Node, Graph
 from mo.ops.concat import Concat
@ -185,3 +186,31 @@ def add_activation_function_after_node(graph: Graph, node: Node, activation_func
    else:
        raise Error('Unknown post-processing activation function "{}".'.format(activation_function))
    return activation_node
+
+
+def add_constant_to_negative_values(node: Node, port_idx: int, added_value: np.array):
+    """
+    This function adds the given values to negative elements of value from the given input port.
+    :param node: node with corrected values in the input port port_idx
+    :param port_idx: input port index for negative values
+    :param added_value: the value to add
+    :return: None
+    """
+    negative_values_source = node.in_port(port_idx).get_source()
+    negative_values_node = node.in_port(port_idx).get_source().node
+    negative_values_node_name = negative_values_node.soft_get('name', negative_values_node.id)
+
+    graph = node.graph
+
+    less_node = create_op_with_const_inputs(graph, Less,
+                                            {1: np.array(0, dtype=added_value.dtype)},
+                                            {'name': negative_values_node_name + '/Less'})
+    mul_node = create_op_with_const_inputs(graph, Mul, {1: added_value}, {'name': negative_values_node_name + '/Mul'})
+
+    node.in_port(port_idx).get_connection().set_destination(less_node.in_port(0))
+    less_node.out_port(0).connect(mul_node.in_port(0))
+
+    add_node = Add(graph, {}).create_node()
+    mul_node.out_port(0).connect(add_node.in_port(1))
+    negative_values_source.connect(add_node.in_port(0))
+    add_node.out_port(0).connect(node.in_port(port_idx))
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops.py
@ -107,9 +107,40 @@ def _fuse_mul(graph: Graph, node: Node, fuse_nodes: list, backward: bool = True)
        w_mul = node.copy_node({'name': mul_name, 'in_ports_count': len(node.in_ports()),
                                'out_ports_count': len(node.out_ports()), 'can_be_fused': False})
        w_mul.in_port(const_port.idx).connect(mul_const.out_port(0))
-        w_const = weights_port.get_source()
-        weights_port.get_connection().set_source(w_mul.out_port(0))
-        w_const.connect(w_mul.in_port(tensor_port.idx))
+
+        r"""
+        In this transformation we remove Mul or Div node (node) that goes after fuse_node and
+        create new Mul node (w_mul), connect it with the corrected const value (mul_const) and
+        insert w_mul before the fuse_node. So the input data of fuse_node becomes different. 
+        For this reason we need to use set_destination from previous operation to w_mul which 
+        guaranties that data node will be reused on previous_op -> w_mul connection and its 
+        attributes won't be copied to the data node of w_mul -> fuse_node connection.   
+        
+        BEFORE                        AFTER
+
+                                 previous_op      mul_const
+                                         \     /
+            previous_op                   w_mul
+               |                            |
+             fuse_node   const          fuse_node     
+                 \     /                    |       
+                  node                   next_op      
+                   |                              
+                 next_op                      
+        """
+        weights_port.get_connection().set_destination(w_mul.in_port(tensor_port.idx))
+        w_mul.out_port(0).connect(weights_port)
+
+        # As fusing is applied to convolutions it is important to keep 'permutation' and 'input_permutation' attributes
+        # which were obtained from original model. These attributes are stored on the incoming edge to the operation
+        # node and during the reconnection they are moved to the new connection. But during reconnection in this
+        # transformation these attributes are moved to the previous node. So we need manually set them at the
+        # incoming edge to fuse_node.
+        in_edge = w_mul.in_edge(tensor_port.idx)
+        if 'permutation' in in_edge:
+            fuse_node.in_edge(weights_port.idx)['permutation'] = in_edge['permutation']
+        if 'input_permutation' in in_edge:
+            fuse_node.in_edge(weights_port.idx)['input_permutation'] = in_edge['input_permutation']

        # If we fuse in backward direction we should multiply biases if they exists
        if backward and len(fuse_node.in_ports()) == 3 and not fuse_node.in_port(2).disconnected() and \
--- a/model-optimizer/mo/utils/ir_engine/compare_graphs.py
+++ b/model-optimizer/mo/utils/ir_engine/compare_graphs.py
@ -133,12 +133,18 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
            if in_node.id not in checked_nodes_ref and in_node.id not in q_ref:
                q_ref.append(in_node.id)

-        out_nodes = node.out_nodes().values() if node.kind == 'op' else sorted_by_name(node.out_nodes())
+        if node.kind == 'op':
+            out_nodes = sorted_by_name([Node(graph, v) for v, _ in node.get_outputs()])
+        else:
+            out_nodes = sorted_by_name(node.out_nodes())
        for out_node in out_nodes:
            if out_node.id not in checked_nodes and out_node.id not in q:
                q.append(out_node.id)

-        out_nodes = node_ref.out_nodes().values() if node_ref.kind == 'op' else sorted_by_name(node_ref.out_nodes())
+        if node_ref.kind == 'op':
+            out_nodes = sorted_by_name([Node(graph_ref, v) for v, _ in node_ref.get_outputs()])
+        else:
+            out_nodes = sorted_by_name(node_ref.out_nodes())
        for out_node in out_nodes:
            if out_node.id not in checked_nodes_ref and out_node.id not in q_ref:
                q_ref.append(out_node.id)
--- a/model-optimizer/mo/utils/ir_reader/layer_to_class.py
+++ b/model-optimizer/mo/utils/ir_reader/layer_to_class.py
@ -11,6 +11,7 @@ from extensions.middle.FakeSplitOutputs import AddFakeOutputsToSplit
 from extensions.ops.Cast import Cast
 from extensions.ops.ReduceOps import ReduceOp
 from extensions.ops.activation_ops import Activation
+from extensions.ops.dft import FFTBase
 from extensions.ops.elementwise import Elementwise, UnaryElementwise, LogicalElementwise, BiasAdd, Div, Mul, Pow, Sub
 from extensions.ops.embedding_bag import EmbeddingBagBase
 from extensions.ops.loop import Loop
@ -60,7 +61,7 @@ def collect_ops(path: str):
    import_by_path(os.path.join(path, 'mo', 'ops'), ['mo', 'ops'])
    import_by_path(os.path.join(path, 'extensions', 'ops'), ['extensions', 'ops'])
    update_registration(classes=[Op, Activation, Elementwise, UnaryElementwise, LogicalElementwise,
-                                 EmbeddingBagBase, ReduceOp, Scatter, ScatterNDBase],
+                                 EmbeddingBagBase, ReduceOp, Scatter, ScatterNDBase, FFTBase],
                        enabled_transforms=[], disabled_transforms=[])


--- a/model-optimizer/unit_tests/extensions/back/ReverseInputChannels_test.py
+++ b/model-optimizer/unit_tests/extensions/back/ReverseInputChannels_test.py
@ -0,0 +1,49 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from extensions.back.ReverseInputChannels import ReverseChannelsPropagationUp
+from mo.graph.graph import Node, Graph
+from unit_tests.utils.graph import build_graph, result, connect, regular_op_with_shaped_data
+
+nodes = {
+    **regular_op_with_shaped_data('placeholder1', [1, 3, 10, 10], {'type': 'Parameter'}),
+    **regular_op_with_shaped_data('placeholder2', [1, 1, 1, 1], {'type': 'Parameter'}),
+
+    **regular_op_with_shaped_data('mul', [1, 3, 10, 10], {'type': 'Multiply'}),
+    **regular_op_with_shaped_data('reverse_channels', [1, 3, 10, 10], {'type': 'ReverseChannels', 'axis': 1}),
+
+    **result('result'),
+}
+
+
+class ReverseInputChannelsTest(unittest.TestCase):
+    def check_graph_attrs(self, graph: Graph, parameter_node_names: list):
+        for node in graph.get_op_nodes():
+            if node.soft_get('name') in parameter_node_names:
+                self.assertTrue(node.soft_get('type') == 'Parameter')
+                out_node = node.out_node(0)
+                self.assertTrue(out_node['fw_tensor_debug_info'] == ['fw_name', 0])
+            else:
+                for idx in node.out_nodes():
+                    out_node = node.out_node(idx)
+                    self.assertFalse('fw_tensor_debug_info' in out_node)
+
+    def set_graph_attrs(self, graph: Graph, parameter_node_names: list):
+        for node in graph.get_op_nodes():
+            if node.soft_get('name') in parameter_node_names:
+                self.assertTrue(node.soft_get('type') == 'Parameter')
+                out_node = node.out_node(0)
+                out_node['fw_tensor_debug_info'] = ['fw_name', 0]
+
+    def test_lift_up_through_eltwise(self):
+        graph = build_graph(nodes, [*connect('placeholder1', '0:mul'), *connect('placeholder2', '1:mul'),
+                                    *connect('mul', 'reverse_channels'), *connect('reverse_channels', 'result')])
+        self.set_graph_attrs(graph, ['placeholder1', 'placeholder2'])
+
+        node = Node(graph, 'mul')
+        reverse_channels = Node(graph, 'reverse_channels')
+
+        ReverseChannelsPropagationUp.lift_up_through_eltwise(node, reverse_channels)
+        self.check_graph_attrs(graph, ['placeholder1', 'placeholder2'])
--- a/model-optimizer/unit_tests/extensions/back/TransposeDFT_test.py
+++ b/model-optimizer/unit_tests/extensions/back/TransposeDFT_test.py
@ -0,0 +1,84 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from extensions.back.TransposeDFT import TransposeDFT
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \
+    regular_op_with_empty_data
+
+dft_graph_node_attrs = {
+    **regular_op_with_shaped_data('placeholder', [8, 2, 40, 56], {'type': 'Parameter', 'op': 'Parameter'}),
+    **valued_const_with_data('axes', int64_array([-2, -1])),
+    **regular_op_with_shaped_data('dft', [8, 2, 40, 56], {'op': 'DFT', 'need_insert_transposes_for_dft': True}),
+    **regular_op_with_shaped_data('abs', [8, 2, 40, 56], {'type': 'Abs', 'op': 'Abs'}),
+    **result(),
+}
+
+dft_graph_edges = [
+    *connect('placeholder', '0:dft'),
+    *connect('axes', '1:dft'),
+    *connect('dft', 'abs'),
+    *connect('abs', 'output'),
+]
+
+
+transposed_dft_graph_node_attrs = {
+    **regular_op_with_shaped_data('placeholder', [8, 2, 40, 56], {'type': 'Parameter', 'op': 'Parameter'}),
+    **regular_op_with_empty_data('transpose_before',
+                                 {'type': 'Transpose', 'op': 'Transpose', 'need_shape_inference': True}),
+    **valued_const_with_data('transpose_before_axis_const', int64_array([0, 2, 3, 1])),
+    **regular_op_with_empty_data('transpose_after',
+                                 {'type': 'Transpose', 'op': 'Transpose', 'need_shape_inference': True}),
+    **valued_const_with_data('transpose_after_axis_const', int64_array([0, 3, 1, 2])),
+    **valued_const_with_data('dft_axes', int64_array([-2, -1])),
+    **regular_op_with_shaped_data('dft', [8, 2, 40, 56], {'op': 'DFT', 'need_insert_transposes_for_dft': True}),
+    **regular_op_with_shaped_data('abs', [8, 2, 40, 56], {'type': 'Abs', 'op': 'Abs'}),
+    **result(),
+}
+
+transposed_dft_graph_edges = [
+    *connect('placeholder', '0:transpose_before'),
+    *connect('transpose_before_axis_const', '1:transpose_before'),
+    *connect('transpose_before', '0:dft'),
+    *connect('dft_axes', '1:dft'),
+    *connect('dft', '0:transpose_after'),
+    *connect('transpose_after_axis_const', '1:transpose_after'),
+    *connect('transpose_after', 'abs'),
+    *connect('abs', 'output'),
+]
+
+
+nontransposed_dft_graph_node_attrs = {
+    **regular_op_with_shaped_data('placeholder', [8, 2, 40, 56], {'type': 'Parameter', 'op': 'Parameter'}),
+    **valued_const_with_data('axes', int64_array([-2, -1])),
+    **regular_op_with_shaped_data('dft', [8, 2, 40, 56], {'op': 'DFT'}),
+    **regular_op_with_shaped_data('abs', [8, 2, 40, 56], {'type': 'Abs', 'op': 'Abs'}),
+    **result(),
+}
+
+nontransposed_dft_graph_edges = [
+    *connect('placeholder', '0:dft'),
+    *connect('axes', '1:dft'),
+    *connect('dft', 'abs'),
+    *connect('abs', 'output'),
+]
+
+
+class TransposeDFTTest(unittest.TestCase):
+    def test_dft_transpose(self):
+        graph = build_graph(nodes_attrs=dft_graph_node_attrs, edges=dft_graph_edges)
+        ref_graph = build_graph(nodes_attrs=transposed_dft_graph_node_attrs, edges=transposed_dft_graph_edges)
+        graph.graph['fw'] = 'tf'
+        TransposeDFT().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
+
+    def test_dft_nontranspose(self):
+        graph = build_graph(nodes_attrs=nontransposed_dft_graph_node_attrs, edges=nontransposed_dft_graph_edges)
+        ref_graph = build_graph(nodes_attrs=nontransposed_dft_graph_node_attrs, edges=nontransposed_dft_graph_edges)
+        TransposeDFT().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/mxnet/MXFFTToDFT_test.py
+++ b/model-optimizer/unit_tests/extensions/front/mxnet/MXFFTToDFT_test.py
@ -0,0 +1,191 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+from generator import generator, generate
+
+from extensions.front.mxnet.MXFFTToDFT import MXFFTToDFT
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+fft_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'fft': {'kind': 'op', 'op': 'MXFFT', 'is_inverse': False},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+fft_graph_edges = [
+    ('placeholder', 'fft', {'in': 0}),
+    ('fft', 'abs'),
+    ('abs', 'output'),
+]
+
+
+ref_converted_fft_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'rank': {'kind': 'op', 'op': 'Rank'},
+    'unsqueeze': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
+    'unsqueeze_axis': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
+    },
+    'one': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
+    },
+    'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
+    'zero1': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
+    },
+    'broadcast1': {'type': 'Broadcast', 'kind': 'op', 'op': 'Broadcast'},
+    'one2': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
+    },
+    'zero2': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
+    },
+    'scatter': {'type': 'ScatterUpdate', 'kind': 'op', 'op': 'ScatterUpdate'},
+    'pad': {'type': 'Pad', 'kind': 'op', 'op': 'Pad', 'mode': 'constant'},
+    'fft_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
+    },
+    'fft': {'kind': 'op', 'op': 'DFT', 'type': 'DFT'},
+    'one3': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
+    },
+    'sub': {'type': 'Subtract', 'kind': 'op', 'op': 'Sub'},
+    'zero3': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
+    },
+    'broadcast2': {'type': 'Broadcast', 'kind': 'op', 'op': 'Broadcast'},
+    'm1_2': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-1, 2])
+    },
+    'concat': {'type': 'Concat', 'kind': 'op', 'op': 'Concat', 'axis': 0},
+    'reshape': {'kind': 'op', 'op': 'Reshape', 'type': 'Reshape'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+ref_converted_fft_graph_edges = [
+    ('placeholder', 'rank', {'in': 0, 'out': 0}),
+    ('placeholder', 'unsqueeze', {'in': 0, 'out': 0}),
+    ('unsqueeze_axis', 'unsqueeze', {'in': 1, 'out': 0}),
+    ('rank', 'add', {'in': 0, 'out': 0}),
+    ('one', 'add', {'in': 1, 'out': 0}),
+    ('zero1', 'broadcast1', {'in': 0, 'out': 0}),
+    ('add', 'broadcast1', {'in': 1, 'out': 0}),
+    ('broadcast1', 'scatter', {'in': 0, 'out': 0}),
+    ('rank', 'scatter', {'in': 1, 'out': 0}),
+    ('one2', 'scatter', {'in': 2, 'out': 0}),
+    ('zero2', 'scatter', {'in': 3, 'out': 0}),
+    ('unsqueeze', 'pad', {'in': 0, 'out': 0}),
+    ('broadcast1', 'pad', {'in': 1, 'out': 0}),
+    ('scatter', 'pad', {'in': 2, 'out': 0}),
+    ('pad', 'fft', {'in': 0, 'out': 0}),
+    ('fft_axes', 'fft', {'in': 1, 'out': 0}),
+    ('rank', 'sub', {'in': 0, 'out': 0}),
+    ('one3', 'sub', {'in': 1, 'out': 0}),
+    ('zero3', 'broadcast2', {'in': 0, 'out': 0}),
+    ('sub', 'broadcast2', {'in': 1, 'out': 0}),
+    ('broadcast2', 'concat', {'in': 0, 'out': 0}),
+    ('m1_2', 'concat', {'in': 1, 'out': 0}),
+    ('fft', 'reshape', {'in': 0, 'out': 0}),
+    ('concat', 'reshape', {'in': 1, 'out': 0}),
+    ('reshape', 'abs'),
+    ('abs', 'output'),
+]
+
+
+ref_converted_ifft_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'rank': {'kind': 'op', 'op': 'Rank'},
+    'subtracted_one': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
+    },
+    'sub': {'type': 'Subtract', 'kind': 'op', 'op': 'Sub'},
+    'broadcast': {'type': 'Broadcast', 'kind': 'op', 'op': 'Broadcast'},
+    'broadcasted_value': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
+    },
+    'new_shape': {'type': 'Concat', 'kind': 'op', 'op': 'Concat', 'axis': 0},
+    'new_shape_const': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-1, 2])
+    },
+    'reshape': {'kind': 'op', 'op': 'Reshape', 'type': 'Reshape'},
+    'fft': {'kind': 'op', 'op': 'IDFT', 'type': 'IDFT'},
+    'fft_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
+    },
+    'split': {'kind': 'op', 'op': 'Split', 'type': 'Split', 'num_splits': 2},
+    'split_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(-1)
+    },
+    'squeeze': {'kind': 'op', 'op': 'Squeeze', 'type': 'Squeeze'},
+    'squeeze_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+ref_converted_ifft_graph_edges = [
+    ('placeholder', 'rank', {'out': 0}),
+    ('placeholder', 'reshape', {'out': 0}),
+    ('rank', 'sub'),
+    ('subtracted_one', 'sub'),
+    ('broadcasted_value', 'broadcast'),
+    ('sub', 'broadcast'),
+    ('broadcast', 'new_shape'),
+    ('new_shape_const', 'new_shape'),
+    ('new_shape', 'reshape'),
+    ('reshape', 'fft'),
+    ('fft_axes', 'fft'),
+    ('fft', 'split'),
+    ('split_axes', 'split'),
+    ('split', 'squeeze', {'out': 0}),
+    ('squeeze_axes', 'squeeze'),
+    ('squeeze', 'abs'),
+    ('abs', 'output'),
+]
+
+
+@generator
+class MXFFTToDFTTest(unittest.TestCase):
+    @generate(*[int64_array([3, 100, 100, 8]), int64_array([5, 60])])
+    def test_fft_replacement(self, input_shape):
+        graph = build_graph(nodes_attrs=fft_graph_node_attrs,
+                            edges=fft_graph_edges,
+                            update_attributes={
+                                'placeholder': {'shape': input_shape}
+                            })
+        graph.stage = 'front'
+        MXFFTToDFT().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_converted_fft_graph_node_attrs,
+                                edges=ref_converted_fft_graph_edges,
+                                update_attributes={
+                                    'placeholder': {'shape': input_shape}
+                                })
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
+
+    @generate(*[int64_array([3, 100, 100, 8]), int64_array([5, 60])])
+    def test_ifft_replacement(self, input_shape):
+        graph = build_graph(nodes_attrs=fft_graph_node_attrs,
+                            edges=fft_graph_edges,
+                            update_attributes={
+                                'placeholder': {'shape': input_shape},
+                                'fft': {'is_inverse': True}
+                            })
+        graph.stage = 'front'
+        MXFFTToDFT().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_converted_ifft_graph_node_attrs,
+                                edges=ref_converted_ifft_graph_edges,
+                                update_attributes={
+                                    'placeholder': {'shape': input_shape}
+                                })
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/ComplexAbsAfterComplex_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/ComplexAbsAfterComplex_test.py
@ -0,0 +1,74 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+import numpy as np
+
+from extensions.front.tf.ComplexAbsAfterComplex import ComplexAbsAfterComplex
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+graph_node_attrs = {
+    'placeholder_0': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_1': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'complex': {'kind': 'op', 'op': 'Complex'},
+    'complex_abs': {'kind': 'op', 'op': 'ComplexAbs'},
+    'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+graph_edges = [
+    ('placeholder_0', 'complex', {'in': 0}),
+    ('placeholder_1', 'complex', {'in': 1}),
+    ('complex', 'complex_abs', {'in': 0}),
+    ('complex_abs', 'relu'),
+    ('relu', 'output'),
+]
+
+
+ref_graph_node_attrs = {
+    'placeholder_0': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_1': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'pow0_const': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(2.0)
+    },
+    'pow1_const': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(2.0)
+    },
+    'pow0': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
+    'pow1': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
+    'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
+    'sqrt_const': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(0.5)
+    },
+    'sqrt': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
+    'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+ref_graph_edges = [
+    ('placeholder_0', 'pow0', {'in': 0}),
+    ('placeholder_1', 'pow1', {'in': 0}),
+    ('pow0_const', 'pow0', {'in': 1}),
+    ('pow1_const', 'pow1', {'in': 1}),
+    ('pow0', 'add', {'in': 0}),
+    ('pow1', 'add', {'in': 1}),
+    ('add', 'sqrt', {'in': 0}),
+    ('sqrt_const', 'sqrt', {'in': 1}),
+    ('sqrt', 'relu'),
+    ('relu', 'output'),
+]
+
+
+class ComplexAbsAfterComplexTest(unittest.TestCase):
+    def test_replacement(self):
+        graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
+        graph.stage = 'front'
+        ComplexAbsAfterComplex().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/ComplexAbs_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/ComplexAbs_test.py
@ -0,0 +1,66 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+import numpy as np
+
+from extensions.front.tf.ComplexAbs import ComplexAbs
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'complex_abs': {'kind': 'op', 'op': 'ComplexAbs'},
+    'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+graph_edges = [
+    ('placeholder', 'complex_abs'),
+    ('complex_abs', 'relu'),
+    ('relu', 'output'),
+]
+
+
+ref_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'pow2_const': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(2.0)
+    },
+    'pow2': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
+    'sum_axis': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': int64_array(-1)
+    },
+    'sum': {'type': 'ReduceSum', 'kind': 'op', 'op': 'ReduceSum'},
+    'sqrt_const': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(0.5)
+    },
+    'sqrt': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
+    'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+ref_graph_edges = [
+    ('placeholder', 'pow2', {'in': 0}),
+    ('pow2_const', 'pow2', {'in': 1}),
+    ('sum_axis', 'sum', {'in': 1}),
+    ('pow2', 'sum', {'in': 0}),
+    ('sum', 'sqrt', {'in': 0}),
+    ('sqrt_const', 'sqrt', {'in': 1}),
+    ('sqrt', 'relu'),
+    ('relu', 'output'),
+]
+
+
+class ComplexAbsTest(unittest.TestCase):
+    def test_replacement(self):
+        graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
+        graph.stage = 'front'
+        ComplexAbs().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/CorrectRollAxes_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/CorrectRollAxes_test.py
@ -0,0 +1,89 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+from extensions.front.tf.CorrectRollAxes import CorrectRollAxes
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'roll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll', 'input_rank_changed': True},
+    'roll_shift': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
+    },
+    'roll_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+graph_edges = [
+    ('placeholder', 'roll', {'in': 0}),
+    ('roll', 'abs'),
+    ('abs', 'output'),
+    ('roll_shift', 'roll', {'in': 1}),
+    ('roll_axes', 'roll', {'in': 2}),
+]
+
+
+ref_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'roll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll'},
+    'roll_shift': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
+    },
+    'roll_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+    'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
+    'mul': {'type': 'Multiply', 'kind': 'op', 'op': 'Mul'},
+    'less': {'type': 'Less', 'kind': 'op', 'op': 'Less'},
+    'zero': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
+    },
+    'minus_one': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(-1)
+    },
+}
+
+ref_graph_edges = [
+    ('placeholder', 'roll', {'out': 0, 'in': 0}),
+    ('roll', 'abs'),
+    ('abs', 'output'),
+    ('roll_shift', 'roll', {'in': 1}),
+    ('mul', 'add', {'in': 1}),
+    ('add', 'roll', {'in': 2}),
+    ('zero', 'less', {'in': 1}),
+    ('minus_one', 'mul', {'in': 1}),
+    ('less', 'mul', {'in': 0}),
+    ('roll_axes', 'less', {'out': 0, 'in': 0}),
+    ('roll_axes', 'add', {'out': 0, 'in': 0}),
+]
+
+
+class CorrectRollAxesTest(unittest.TestCase):
+    def test_replacement(self):
+        graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
+        graph.stage = 'front'
+        CorrectRollAxes().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_nonreplacement(self):
+        graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges,
+                            update_attributes={'roll': {'input_rank_changed': False}})
+        graph.stage = 'front'
+        CorrectRollAxes().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges,
+                                update_attributes={'roll': {'input_rank_changed': False}})
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/RollRealImagPack_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/RollRealImagPack_test.py
@ -0,0 +1,88 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+from extensions.front.tf.RollRealImagPack import RollRealImagPack
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'unroll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll'},
+    'real': {'kind': 'op', 'op': 'Real'},
+    'imag': {'kind': 'op', 'op': 'Imag'},
+    'pack': {'kind': 'op', 'op': 'Pack'},
+    'unroll_shift': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
+    },
+    'unroll_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+graph_edges = [
+    ('placeholder', 'unroll', {'in': 0}),
+    ('unroll', 'real', {'out': 0, 'in': 0}),
+    ('unroll', 'imag', {'out': 0, 'in': 0}),
+    ('real', 'pack', {'in': 0}),
+    ('imag', 'pack', {'in': 1}),
+    ('pack', 'abs'),
+    ('abs', 'output'),
+    ('unroll_shift', 'unroll', {'in': 1}),
+    ('unroll_axes', 'unroll', {'in': 2}),
+]
+
+
+ref_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'unroll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll'},
+    'unroll_shift': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
+    },
+    'unroll_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+    'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
+    'mul': {'type': 'Multiply', 'kind': 'op', 'op': 'Mul'},
+    'less': {'type': 'Less', 'kind': 'op', 'op': 'Less'},
+    'zero': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
+    },
+    'minus_one': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(-1)
+    },
+}
+
+ref_graph_edges = [
+    ('placeholder', 'unroll', {'out': 0, 'in': 0}),
+    ('unroll', 'abs'),
+    ('abs', 'output'),
+    ('unroll_shift', 'unroll', {'in': 1}),
+    ('unroll_axes', 'unroll', {'in': 2}),
+
+    ('mul', 'add', {'in': 1}),
+    ('add', 'unroll', {'in': 2}),
+    ('zero', 'less', {'in': 1}),
+    ('minus_one', 'mul', {'in': 1}),
+    ('less', 'mul', {'in': 0}),
+    ('unroll_axes', 'less', {'out': 0, 'in': 0}),
+    ('unroll_axes', 'add', {'out': 0, 'in': 0}),
+]
+
+
+class RollRealImagPackTest(unittest.TestCase):
+    def test_replacement(self):
+        graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
+        graph.stage = 'front'
+        RollRealImagPack().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/SSliceComplex_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/SSliceComplex_test.py
@ -0,0 +1,143 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+from extensions.front.tf.SSliceComplex import SSliceComplex
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'strided_slice_real': {
+        'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0, 1]),
+    },
+    'strided_slice_imag': {
+        'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0, 1]),
+    },
+    'complex': {'kind': 'op', 'op': 'Complex'},
+    'real_begin': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 0])
+    },
+    'imag_begin': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
+    },
+    'real_end': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
+    },
+    'imag_end': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 2])
+    },
+    'real_strides': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
+    },
+    'imag_strides': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+graph_edges = [
+    ('placeholder', 'strided_slice_real', {'out': 0, 'in': 0}),
+    ('placeholder', 'strided_slice_imag', {'out': 0, 'in': 0}),
+    ('strided_slice_real', 'complex', {'in': 0}),
+    ('strided_slice_imag', 'complex', {'in': 1}),
+    ('complex', 'abs'),
+    ('abs', 'output'),
+    ('real_begin', 'strided_slice_real', {'in': 1}),
+    ('imag_begin', 'strided_slice_imag', {'in': 1}),
+    ('real_end', 'strided_slice_real', {'in': 2}),
+    ('imag_end', 'strided_slice_imag', {'in': 2}),
+    ('real_strides', 'strided_slice_real', {'in': 3}),
+    ('imag_strides', 'strided_slice_imag', {'in': 3}),
+]
+
+
+ref_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+ref_graph_edges = [
+    ('placeholder', 'abs'),
+    ('abs', 'output'),
+]
+
+
+non_transformed_graph_node_attrs = {
+    'placeholder_0': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_1': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'strided_slice_real': {
+        'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0, 1]),
+    },
+    'strided_slice_imag': {
+        'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0, 1]),
+    },
+    'complex': {'kind': 'op', 'op': 'Complex'},
+    'real_begin': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 0])
+    },
+    'imag_begin': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
+    },
+    'real_end': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
+    },
+    'imag_end': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 2])
+    },
+    'real_strides': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
+    },
+    'imag_strides': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
+    },
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+non_transformed_graph_edges = [
+    ('placeholder_0', 'strided_slice_real', {'out': 0, 'in': 0}),
+    ('placeholder_1', 'strided_slice_imag', {'out': 0, 'in': 0}),
+    ('strided_slice_real', 'complex', {'in': 0}),
+    ('strided_slice_imag', 'complex', {'in': 1}),
+    ('complex', 'abs'),
+    ('abs', 'output'),
+    ('real_begin', 'strided_slice_real', {'in': 1}),
+    ('imag_begin', 'strided_slice_imag', {'in': 1}),
+    ('real_end', 'strided_slice_real', {'in': 2}),
+    ('imag_end', 'strided_slice_imag', {'in': 2}),
+    ('real_strides', 'strided_slice_real', {'in': 3}),
+    ('imag_strides', 'strided_slice_imag', {'in': 3}),
+]
+
+
+class SSliceComplexTest(unittest.TestCase):
+    def test_replacement(self):
+        graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
+        graph.stage = 'front'
+        SSliceComplex().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_nonreplacement(self):
+        graph = build_graph(nodes_attrs=non_transformed_graph_node_attrs, edges=non_transformed_graph_edges)
+        ref_graph = build_graph(nodes_attrs=non_transformed_graph_node_attrs, edges=non_transformed_graph_edges)
+        graph.stage = 'front'
+        SSliceComplex().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/TFFFTToDFT_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/TFFFTToDFT_test.py
@ -0,0 +1,72 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import unittest
+
+from generator import generator, generate
+
+from extensions.front.tf.TFFFTToDFT import TFFFTToDFT
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+
+dft_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'fft': {'kind': 'op', 'op': 'TFFFT', 'num_of_dimensions': 2, 'is_inverse': False},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+}
+
+dft_graph_edges = [
+    ('placeholder', 'fft', {'in': 0}),
+    ('fft', 'abs'),
+    ('abs', 'output'),
+]
+
+
+ref_dft_graph_node_attrs = {
+    'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'fft': {'kind': 'op', 'op': 'DFT'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
+    'fft_axes': {
+        'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
+    },
+}
+
+ref_dft_graph_edges = [
+    ('placeholder', 'fft', {'in': 0}),
+    ('fft', 'abs'),
+    ('abs', 'output'),
+    ('fft_axes', 'fft', {'in': 1}),
+]
+
+
+@generator
+class TFFFTToDFTTest(unittest.TestCase):
+    @generate(*[(2, False, 'DFT', int64_array([-2, -1])),
+                (2, True, 'IDFT', int64_array([-2, -1])),
+                (1, False, 'DFT', int64_array([-1])),
+                (1, True, 'IDFT', int64_array([-1])),
+                (3, False, 'DFT', int64_array([-3, -2, -1])),
+                (3, True, 'IDFT', int64_array([-3, -2, -1]))])
+    def test_replacement(self, num_of_dimensions, is_inverse, dft_type, fft_axes):
+        graph = build_graph(nodes_attrs=dft_graph_node_attrs,
+                            edges=dft_graph_edges,
+                            update_attributes={
+                                'fft': {'num_of_dimensions': num_of_dimensions, 'is_inverse': is_inverse},
+                            })
+        graph.stage = 'front'
+        setattr(graph.graph['cmd_params'], 'disable_nhwc_to_nchw', False)
+        graph.graph['layout'] = 'NHWC'
+        TFFFTToDFT().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=ref_dft_graph_node_attrs,
+                                edges=ref_dft_graph_edges,
+                                update_attributes={
+                                    'fft': {'kind': 'op', 'op': dft_type},
+                                    'fft_axes': {'value': fft_axes, 'shape': int64_array(fft_axes.shape)},
+                                })
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/extensions/front/tf/TFSliceToSlice_test.py
+++ b/model-optimizer/unit_tests/extensions/front/tf/TFSliceToSlice_test.py
@ -29,44 +29,6 @@ nodes = {


 class SliceReplacerTest(unittest.TestCase):
-    # test case when input goes besides from TFSlice to other nodes
-    def test_slice_replacer_begin_with_2_inputs(self):
-        graph = build_graph(nodes_attrs=nodes, edges=[
-            ('input', 'tfslice'),
-            *connect_front('begin:0', '1:tfslice'),
-            *connect_front('begin:0', '0:john_doe'),
-            *connect_front('size:0', '2:tfslice'),
-            *connect_front('tfslice:0', 'output'),
-        ], nodes_with_edges_only=True)
-        graph.stage = 'front'
-
-        TFSliceToSliceReplacer().find_and_replace_pattern(graph)
-
-        graph_ref = build_graph(nodes_attrs=nodes, edges=[
-            *connect_front('input:0', 'slice'),
-            *connect_front('input:0', 'shapeof'),
-            *connect_front('begin:0', 'slice:1'),
-            *connect_front('begin:0', 'john_doe:1'),
-
-            *connect_front('begin:0', 'end_const:0'),
-            *connect_front('size:0', 'end_const:1'),
-            *connect_front('size:0', 'equal:0'),
-
-            *connect_front('shapeof:0', 'select:1'),
-            *connect_front('minus_one:0', 'equal:1'),
-
-            *connect_front('equal:0', 'select:0'),
-
-            *connect_front('end_const:0', 'cast:0'),
-            *connect_front('cast:0', 'select:2'),
-            *connect_front('select:0', 'slice:2'),
-
-            *connect_front('slice:0', 'output'),
-        ], nodes_with_edges_only=True)
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
    def test_slice_replacer(self):
        graph = build_graph(nodes_attrs=nodes, edges=[
            *connect_front('input:0', '0:tfslice'),
--- a/Show More
+++ b/Show More