diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
index f45f4e410c6..146775f6189 100644
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@@ -112,6 +112,7 @@ jobs:
         -DNGRAPH_ONNX_IMPORT_ENABLE=ON
         -DNGRAPH_ONNX_EDITOR_ENABLE=ON
         -DENABLE_FASTER_BUILD=ON
+        -DENABLE_STRICT_DEPENDENCIES=OFF
         -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
         $(REPO_DIR)
       workingDirectory: $(BUILD_DIR)
diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml
index 680ef281ac2..04d4c16ea23 100644
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@@ -90,7 +90,7 @@ jobs:
       # Disable errors with Ninja
       export CXXFLAGS="-Wno-error=unused-command-line-argument"
       export CFLAGS="-Wno-error=unused-command-line-argument"
-      cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
+      cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
     workingDirectory: $(BUILD_DIR)
     displayName: 'CMake'
 
diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml
index 6b4e5203dd0..21a36392e33 100644
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@@ -92,7 +92,7 @@ jobs:
 
   - script: |
       set PATH=$(WORK_DIR)\ninja-win;%PATH%
-      call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
+      call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
     workingDirectory: $(BUILD_DIR)
     displayName: 'CMake'
 
diff --git a/.ci/openvino-onnx/Jenkinsfile b/.ci/openvino-onnx/Jenkinsfile
index 48529879ef1..5fe24928798 100644
--- a/.ci/openvino-onnx/Jenkinsfile
+++ b/.ci/openvino-onnx/Jenkinsfile
@@ -113,8 +113,8 @@ def buildDockerImage(Map configuration, String workdir) {
         --build-arg BUILD_TYPE=${configuration.build_type} \
         --build-arg PROTOBUF_LITE=${configuration.protobuf_lite} \
         --file=.ci/openvino-onnx/Dockerfile \
-        --build-arg http_proxy=http://proxy-chain.intel.com:911/ \
-        --build-arg https_proxy=http://proxy-chain.intel.com:912/ .
+        --build-arg http_proxy=http://proxy-ir.intel.com:911/ \
+        --build-arg https_proxy=http://proxy-ir.intel.com:911/ .
     """
 }
 
diff --git a/.github/org_control/check_pr.py b/.github/org_control/check_pr.py
index e0b48832ead..7bb8f89dd60 100644
--- a/.github/org_control/check_pr.py
+++ b/.github/org_control/check_pr.py
@@ -139,7 +139,7 @@ def update_labels(gh_api, pull, non_org_intel_pr_users, non_org_pr_users):
 
 def get_wrong_commits(pull):
     """Returns commits with incorrect user and email"""
-    pr_author_email = pull.user.email.lower()
+    pr_author_email = (pull.user.email or "").lower()
     print("GitHub PR author email:", pr_author_email)
     print("Check commits:")
     wrong_commits = set()
@@ -147,7 +147,7 @@ def get_wrong_commits(pull):
         # import pprint; pprint.pprint(commit.raw_data)
         print("Commit SHA:", commit.sha)
         # Use raw data because commit author can be non GitHub user
-        commit_email = commit.raw_data["commit"]["author"]["email"].lower()
+        commit_email = (commit.raw_data["commit"]["author"]["email"] or "").lower()
         print("    Commit email:", commit_email)
         if not github_api.is_valid_user(commit.author):
             print(
@@ -229,7 +229,7 @@ def main():
     if wrong_pulls:
         for pull_number, wrong_commits in wrong_pulls.items():
             print(
-                f"\nERROR: Remove or replace wrong commits in the PR {pull_number}:\n    ",
+                f"\nERROR: Remove or replace wrong commits in the PR {pull_number}:\n   ",
                 "\n    ".join(wrong_commits),
             )
         print(
diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml
index b538a179339..607fe2cb64a 100644
--- a/.github/workflows/code_style.yml
+++ b/.github/workflows/code_style.yml
@@ -15,14 +15,17 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt --assume-yes install libusb-1.0-0-dev
+          python3 -m pip install --upgrade pip
           python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
+          # Add for -DENABLE_PYTHON=ON, no cython
+          python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
 
       # Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector
       - name: CMake
         run: |
           mkdir build
           cd build
-          cmake -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT ..
+          cmake -DENABLE_PYTHON=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT ..
 
       - name: Check code style
         run: cmake --build build --target clang_format_check_all
diff --git a/cmake/features.cmake b/cmake/features.cmake
index fe1b8919b51..aff805adb15 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -6,6 +6,8 @@ ie_dependent_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON "X8
 
 ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
 
+ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON)
+
 ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
 
 ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
@@ -18,8 +20,6 @@ Supported values:\
 
 ie_option (ENABLE_PROFILING_FIRST_INFERENCE "Build with ITT tracing of first inference time." ON)
 
-ie_option (ENABLE_DOCS "Build docs using Doxygen" OFF)
-
 ie_option(ENABLE_TEMPLATE_PLUGIN "Register template plugin into plugins.xml" OFF)
 
 ie_option_enum(SELECTIVE_BUILD "Enable OpenVINO conditional compilation or statistics collection. \
@@ -33,6 +33,9 @@ ie_option(ENABLE_ERROR_HIGHLIGHT "Highlight errors and warnings during compile t
 find_package(PythonLibs 3 QUIET)
 ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONLIBS_FOUND" OFF)
 
+find_package(PythonInterp 3 QUIET)
+ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF)
+
 #
 # enable or disable output from NGRAPH_DEBUG statements
 #
diff --git a/docs/IE_DG/API_Changes.md b/docs/IE_DG/API_Changes.md
index a234471c13e..2534a4a6c38 100644
--- a/docs/IE_DG/API_Changes.md
+++ b/docs/IE_DG/API_Changes.md
@@ -14,6 +14,15 @@ The sections below contain detailed list of changes made to the Inference Engine
  * InferenceEngine::Parameter(std::shared_ptr<ngraph::Variant>& var)
  * std::shared_ptr<ngraph::Variant> InferenceEngine::Parameter::asVariant() const
  * InferenceEngine::Parameter::operator std::shared_ptr<ngraph::Variant>() const
+ * KEY_CLDNN_NV12_TWO_INPUTS GPU plugin option. Use KEY_GPU_NV12_TWO_INPUTS instead
+ * KEY_CLDNN_PLUGIN_PRIORITY GPU plugin option. Use KEY_GPU_PLUGIN_PRIORITY instead
+ * KEY_CLDNN_PLUGIN_THROTTLE GPU plugin option. Use KEY_GPU_PLUGIN_THROTTLE instead
+ * KEY_CLDNN_MEM_POOL GPU plugin option
+ * KEY_CLDNN_GRAPH_DUMPS_DIR GPU plugin option
+ * KEY_CLDNN_SOURCES_DUMPS_DIR GPU plugin option
+ * KEY_DUMP_KERNELS GPU plugin option
+ * KEY_TUNING_MODE GPU plugin option
+ * KEY_TUNING_FILE GPU plugin option
 
 ## 2021.3
 
@@ -528,7 +537,7 @@ The sections below contain detailed list of changes made to the Inference Engine
  * DLIA_CONFIG_KEY(ENABLE_STREAMING) config key
 
 ### Removed API
- 
+
  * InferenceEngine::EltwiseLayer::Select from InferenceEngine::EltwiseLayer::eOperation enumeration
 
 ## 2019 R2
@@ -577,7 +586,7 @@ The sections below contain detailed list of changes made to the Inference Engine
  * DLIA_CONFIG_KEY(IO_TRANSFORMATIONS_NATIVE) config key
  * DLIA_CONFIG_KEY(DUMP_SUPPORTED_LAYERS_INFORMATION) config key
  * GNA_CONFIG_VALUE(SW_FP32) config value for GNA_CONFIG_KEY(DEVICE_MODE) key
- * MULTI_CONFIG_KEY(DEVICE_PRIORITIES) config key for `MULTI` device 
+ * MULTI_CONFIG_KEY(DEVICE_PRIORITIES) config key for `MULTI` device
  * InferenceEngine::CNNNetReader::ReadNetwork(const std::wstring &filepath) new method
  * InferenceEngine::CNNNetReader::ReadWeights(const std::wstring &filepath) new method
  * InferenceEngine::ExecutableNetwork::ExecutableNetwork(IExecutableNetwork::Ptr actual, InferenceEnginePluginPtr plg) constructor with additional `plg` parameter
@@ -593,7 +602,7 @@ The sections below contain detailed list of changes made to the Inference Engine
  * InferenceEngine::EltwiseLayer::Logical_NOT, InferenceEngine::EltwiseLayer::Mean, InferenceEngine::EltwiseLayer::Select extensions to InferenceEngine::EltwiseLayer::eOperation enumeration
  * InferenceEngine::OneHotLayer new class
  * InferenceEngine::SelectLayer new class
- * InferenceEngine::BroadcastLayer new class 
+ * InferenceEngine::BroadcastLayer new class
  * InferenceEngine::MathLayer new class
  * InferenceEngine::ReduceLayer new class
  * InferenceEngine::TopKLayer new class
diff --git a/docs/IE_DG/Extensibility_DG/GPU_Kernel.md b/docs/IE_DG/Extensibility_DG/GPU_Kernel.md
index 09ace6f0a29..d9fd809f8e4 100644
--- a/docs/IE_DG/Extensibility_DG/GPU_Kernel.md
+++ b/docs/IE_DG/Extensibility_DG/GPU_Kernel.md
@@ -219,22 +219,6 @@ __kernel void example_relu_kernel(
 
 ## Debugging Tips<a name="debugging-tips"></a>
 
-* **Dumping the Resulting Kernels**.
-It is recommended to get a dump of the kernel with all of
-the values set by the Inference Engine, such as tensor sizes,
-floating-point, and integer kernel parameters. To get the dump, add the
-following line to your code that configures the GPU plugin to output the
-custom kernels:
-
-@snippet snippets/GPU_Kernel.cpp part1
-
-When the Inference Engine compiles the kernels for the specific network,
-it also outputs the resulting code for the custom kernels. In the
-directory of your executable, find files like
-`clDNN_program0.cl`, `clDNN_program1.cl`. There are as many files as
-distinct sets of parameters for your custom kernel: different input
-tensor sizes and kernel parameters.
-
 * **Using `printf` in the OpenCL™ Kernels**.
 To debug the specific values, you can use `printf` in your kernels.
 However, be careful: for instance, do not output excessively
diff --git a/docs/IE_DG/GPU_Kernels_Tuning.md b/docs/IE_DG/GPU_Kernels_Tuning.md
deleted file mode 100644
index 5bb6a8334b2..00000000000
--- a/docs/IE_DG/GPU_Kernels_Tuning.md
+++ /dev/null
@@ -1,39 +0,0 @@
-Using GPU Kernels Tuning {#openvino_docs_IE_DG_GPU_Kernels_Tuning}
-======================
-
-GPU Kernels Tuning allows you to tune models, so the heavy computational layers are configured to fit better into
-hardware, which the tuning was done on. It is required to achieve best performance on GPU.
-> **NOTE** Currently only convolution and fully connected layers undergo tuning process. It means that the performance boost depends on the amount of that layers in the model.
-
-OpenVINO™ releases include the `<INSTALL_DIR>/inference_engine/bin/intel64/Release/cache.json` file with pretuned data for current state of the art models. It is highly recommended to do the
-tuning for new kind of models, hardwares or drivers.
-
-## Tuned data
-
-GPU tuning data is saved in JSON format. The file is composed of 2 types of attributes and 1 type of value:
-* Execution units number (attribute): splits the content into different EU sections
-* Hash (attribute): hashed tuned kernel data
-* Key (value): Array with kernel name and kernel's mode index
-
-## Usage
-
----
-
-You can activate Kernels Tuning process by setting `KEY_TUNING_MODE` flag to `TUNING_CREATE` and `KEY_TUNING_FILE` to `<"filename">` in a configuration map that is
-passed to the plugin while loading a network.
-This configuration modifies the behavior of the `ExecutableNetwork` object. Instead of standard network compilation, it will run the tuning process.
-Please keep in mind that the tuning can be very time consuming. The bigger the network, the longer it will take.
-File with tuned data is the result of this step.
-
-> **NOTE** If a filename passed to `KEY_TUNING_FILE` points to existing tuned data and you are tuning a new model, then this file will be extended by new data. This allows you to extend existing `cache.json` provided in the OpenVINO™ release package. 
-
-The example below shows how to set and use the key files:
-
-@snippet snippets/GPU_Kernels_Tuning.cpp part0
-
----
-
-You can activate the inference with tuned data by setting `KEY_TUNING_MODE` flag to `TUNING_USE_EXISTING` and
-`KEY_TUNING_FILE` flag to `<"filename">`. 
-
-GPU backend will process the content of the file during network compilation to configure the OpenCL kernels for the best performance.
diff --git a/docs/IE_DG/Intro_to_Performance.md b/docs/IE_DG/Intro_to_Performance.md
index 66fcf48c34f..0c9457ed4bf 100644
--- a/docs/IE_DG/Intro_to_Performance.md
+++ b/docs/IE_DG/Intro_to_Performance.md
@@ -1,24 +1,29 @@
 # Introduction to the Performance Topics {#openvino_docs_IE_DG_Intro_to_Performance}
 
 This section is a shorter version of the
-[Optimization Guide](supported_plugins/MULTI.md) for the Intel Deep Learning Deployment Toolkit.
+[Optimization Guide](../optimization_guide/dldt_optimization_guide.md) for the Intel® Distribution of OpenVINO™ Toolkit.
 
 ## Precision
 Inference precision directly affects the performance. 
 
-Model Optimizer can produce an IR with different precision. For example, float16 IR initially targets VPU and GPU devices, while, for example, the CPU can also execute regular float32.
-Also, further device-specific inference precision settings are available, for example, [8-bit integer](Int8Inference.md) or [bfloat16](Bfloat16Inference.md) inference on the CPU.
-Note that for [MULTI device](supported_plugins/MULTI.md) that supports automatic inference on multiple devices in parallel, you can use the FP16 IR.
+Model Optimizer can produce an IR with different precision. For example, an FP16 IR initially targets VPU and GPU devices, while, for example, for the CPU, an FP16 IR is    typically up-scaled to the regular FP32 automatically upon loading. But notice that further device-specific inference precision settings are available, 
+for example, [8-bit integer](Int8Inference.md) or [bfloat16](Bfloat16Inference.md), which is specific to the CPU inference, below.
+Note that for the [MULTI device](supported_plugins/MULTI.md) plugin that supports automatic inference on multiple devices in parallel, you can use an FP16 IR (no need for FP32).
 You can find more information, including preferred data types for specific devices, in the
-[Supported Devices](supported_plugins/Supported_Devices.md) section.
+[Supported Devices](supported_plugins/Supported_Devices.md) document.
 
-## Lowering Inference Precision
-Default optimization is used for CPU and implies that inference is made with lower precision if it is possible on a given platform to reach better performance with acceptable range of accuracy.
-This approach can be used for CPU devices where the platform supports the AVX512_BF16 instruction. In this case, a regular float32 model is converted to [bfloat16](Bfloat16Inference.md) internal representation and inference is provided with bfloat16 layers usage.
-Below is the example command line to disable this feature on the CPU device with the AVX512_BF16 instruction and execute regular float32.
+## Automatic Lowering of the Inference Precision
+By default, plugins enable the optimizations that allow lower precision if the acceptable range of accuracy is preserved.
+For example, for the CPU that supports the AVX512_BF16 instructions, an FP16/FP32 model is converted to a [bfloat16](Bfloat16Inference.md) IR to accelerate inference.
+To compare the associated speedup, run the example command below to disable this feature on the CPU device with the AVX512_BF16 support and get regular FP32 execution:
 ```
 $ benchmark_app -m <model.xml> -enforcebf16=false
  ```
+Notice that for quantized (e.g. INT8) models the bfloat16 calculations (of the layers that remain in FP32) is disabled by default.
+Refer to the [CPU Plugin documentation](supported_plugins/CPU.md) for more details.
+
+Similarly, the GPU device automatically executes FP16 for the layers that remain in FP16 in the quantized models (assuming that the FP16 model was quantized).
+Refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/GPU.md).
 
 ## Latency vs. Throughput
 One way to increase computational efficiency is batching, which combines many (potentially tens) of
@@ -44,17 +49,17 @@ Below is the example command line that limits the execution to the single socket
 limited to the single socket).
 $ numactl -m 0 --physcpubind 0-27  benchmark_app -m <model.xml> -api sync -nthreads 28
  ```
-Note that if you have more than one input, running as many inference requests as you have NUMA nodes (or sockets)
+Note that if you have more than one input, running as many inference streams as you have NUMA nodes (or sockets)
 usually gives the same best latency as a single request on the single socket, but much higher throughput. Assuming two NUMA nodes machine:
 ```
 $ benchmark_app -m <model.xml> -nstreams 2
  ```
 Number of NUMA nodes on the machine can be queried via 'lscpu'.
-Please see more on the NUMA support in the [Optimization Guide](supported_plugins/MULTI.md).
+Please see more on the NUMA support in the [Optimization Guide](../optimization_guide/dldt_optimization_guide.md).
 
 ## Throughput Mode for CPU
 Unlike most accelerators, CPU is perceived as an inherently latency-oriented device. 
-Since 2018 R5 release, the Inference Engine introduced the "throughput" mode, which allows the Inference Engine to efficiently run multiple inference requests on the CPU simultaneously, greatly improving the throughput.
+OpenVINO™ toolkit provides a "throughput" mode that allows running multiple inference requests on the CPU simultaneously, which greatly improves the throughput.
 
 Internally, the execution resources are split/pinned into execution "streams".
 Using this feature gains much better performance for the networks that originally are not scaled well with a number of threads (for example, lightweight topologies). This is especially pronounced for the many-core server machines.
@@ -62,38 +67,26 @@ Using this feature gains much better performance for the networks that originall
 Run the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) and play with number of infer requests running in parallel, next section. 
 Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. 
 
-In addition to the number of streams, it is also possible to play with the batch size to find the throughput sweet-spot.
-
 The throughput mode relaxes the requirement to saturate the CPU by using a large batch: running multiple independent inference requests in parallel often gives much better performance, than using a batch only.
 This allows you to simplify the app-logic, as you don't need to combine multiple inputs into a batch to achieve good CPU performance.
 Instead, it is possible to keep a separate infer request per camera or another source of input and process the requests in parallel using Async API.
 
 ## Benchmark App
 [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample is the best performance reference.
-It has a lot of device-specific knobs, but the primary usage is as simple as: 
+It has a lot of device-specific knobs, but the primary usage is as simple as:
 ```bash
 $ ./benchmark_app –d GPU –m <model> -i <input>
 ```
-to measure the performance of the model on the GPU. 
+to measure the performance of the model on the GPU.
 Or
 ```bash
 $ ./benchmark_app –d CPU –m <model> -i <input>
 ```
 to execute on the CPU instead.
 
-For example, for the CPU throughput mode from the previous section, you can play with number of streams (`-nstreams` command-line param). 
-Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. For example, on a 8-core CPU, compare the `-nstreams 1` (which is a latency-oriented scenario) to the `2`, `4` and `8` streams. Notice that `benchmark_app` automatically queries/creates/runs number of requests required to saturate the given number of streams. 
+For example, for the CPU throughput mode from the previous section, you can play with number of streams (`-nstreams` command-line param).
+Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. For example, on a 8-core CPU, compare the `-nstreams 1` (which is a latency-oriented scenario) to the `2`, `4` and `8` streams. Notice that `benchmark_app` automatically queries/creates/runs number of requests required to saturate the given number of streams.
 
 Finally, notice that when you don't specify number of streams with `-nstreams`, "AUTO" value for the streams is used, e.g. for the CPU this is [CPU_THROUGHPUT_AUTO](supported_plugins/CPU.md). You can spot the actual value behind "AUTO" for your machine in the application output.
 Notice that the "AUTO" number is not necessarily most optimal, so it is generally recommended to play either with the benchmark_app's "-nstreams" as described above, or via  [new Workbench tool](@ref workbench_docs_Workbench_DG_Introduction).This allows you to simplify the app-logic, as you don't need to combine multiple inputs into a batch to achieve good CPU performance.
 Instead, it is possible to keep a separate infer request per camera or another source of input and process the requests in parallel using Async API.
-
-## Kernels Tuning for GPU
-
-GPU backend comes with a feature, that allows models tuning, so the workload is configured to fit better into hardware.
-
-Tuning is time consuming process, which internally execute every layer several (or even hundreds) times to find most performant configuration.
-
-This configuration is saved into json-formatted file, whose name can be passed as plugin param to network. GPU backend will process this data to configure kernels for the best performance.
-
-For more details about Kernels Tuning and How-To please refer to [GPU Kernels Tuning](GPU_Kernels_Tuning.md). 
diff --git a/docs/IE_DG/supported_plugins/CL_DNN.md b/docs/IE_DG/supported_plugins/GPU.md
similarity index 62%
rename from docs/IE_DG/supported_plugins/CL_DNN.md
rename to docs/IE_DG/supported_plugins/GPU.md
index 0216ae71d0d..cc12be98a12 100644
--- a/docs/IE_DG/supported_plugins/CL_DNN.md
+++ b/docs/IE_DG/supported_plugins/GPU.md
@@ -1,4 +1,4 @@
-GPU Plugin {#openvino_docs_IE_DG_supported_plugins_CL_DNN}
+GPU Plugin {#openvino_docs_IE_DG_supported_plugins_GPU}
 =======
 
 The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks.
@@ -89,13 +89,10 @@ Some layers are executed during the load time, not during the inference. One of
 
 The following layers are not accelerated on the GPU and executed on the host CPU instead:
 * Proposal
-* SimplerNMS
+* NonMaxSuppression
 * PriorBox
 * DetectionOutput
 
-## Known Layers Limitations
-* ROIPooling is supported for 'max' value of 'method' attribute.
-
 ## Supported Configuration Parameters
 
 The plugin supports the configuration parameters listed below.
@@ -107,31 +104,21 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | `KEY_CACHE_DIR`      | `"<cache_dir>"`                    | `""`              | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled             |
 | `KEY_PERF_COUNT`      | `YES` / `NO`                    | `NO`              | Collect performance counters during inference             |
 | `KEY_CONFIG_FILE`     | `"<file1> [<file2> ...]"`         | `""`              | Load custom layer configuration files                     |
-| `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers             |
-| `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file              |
-| `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use                               |
-| `KEY_CLDNN_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for clDNN OpenCL queue. 0 disables the setting. |
-| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
-| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format)                                     |
-| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory                                   |
-| `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
+| `KEY_GPU_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. |
+| `KEY_GPU_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
+| `KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS` | `YES` / `NO`                       | `YES`               | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs |
+| `KEY_GPU_NV12_TWO_INPUTS` | `YES` / `NO`                       | `NO`               | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input |
+| `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
 | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
-| `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
-| `KEY_CLDNN_ENABLE_LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
-
-## Note on Debug Capabilities of the GPU Plugin
-
-Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.
-
-The application can use the <code>SetConfig()</code> function with the key <code>PluginConfigParams::KEY_DUMP_KERNELS</code> and value: <code>PluginConfigParams::YES</code>. Then during network loading, all custom layers will print their OpenCL kernels with the JIT instrumentation added by the plugin.
-The kernels will be stored in the working directory under files named the following way: <code>clDNN_program0.cl</code>, <code>clDNN_program1.cl</code>.
-
-This option is disabled by default. Additionally, the application can call the <code>SetConfig()</code> function with the key <code>PluginConfigParams::KEY_DUMP_KERNELS</code> and value: <code>PluginConfigParams::NO</code> before network loading.
-
-How to verify that this option is disabled:
-1.  Delete all <code>clDNN_program*.cl</code> files from the current directory
-2.  Run your application to load a network
-3.  Examine the working directory for the presence of any kernel file (for example, <code>clDNN_program0.cl</code>)
+| `KEY_GPU_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. |
+| `KEY_GPU_ENABLE_LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
+| `KEY_CLDNN_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_PRIORITY |
+| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE |
+| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release                                     |
+| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release                                   |
+| `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers. **Deprecated**. Will be removed in the next release             |
+| `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file. **Deprecated**. Will be removed in the next release |
+| `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use. **Deprecated**. Will be removed in the next release |
 
 ## GPU Context and Video Memory Sharing RemoteBlob API
 
diff --git a/docs/IE_DG/supported_plugins/Supported_Devices.md b/docs/IE_DG/supported_plugins/Supported_Devices.md
index ed8cabec076..e1140ae4b74 100644
--- a/docs/IE_DG/supported_plugins/Supported_Devices.md
+++ b/docs/IE_DG/supported_plugins/Supported_Devices.md
@@ -9,11 +9,11 @@ The Inference Engine provides unique capabilities to infer deep learning models
 
 | Plugin                                   | Device types                                                                                                                                                |
 |------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|[GPU plugin](CL_DNN.md)            |Intel&reg; Processor Graphics, including Intel&reg; HD Graphics and Intel&reg; Iris&reg; Graphics                                                            |
+|[GPU plugin](GPU.md)            |Intel&reg; Processor Graphics, including Intel&reg; HD Graphics and Intel&reg; Iris&reg; Graphics                                                            |
 |[CPU plugin](CPU.md)              |Intel&reg; Xeon&reg; with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel&reg; Core&trade; Processors with Intel&reg; AVX2, Intel&reg; Atom&reg; Processors with Intel® Streaming SIMD Extensions (Intel® SSE) |
 |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit)            |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs                                                                                           |
 |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit)              |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor|
-|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel    |   
+|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel    |
 |[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel&reg; devices (for example if a device doesn't [support certain layers](#supported-layers)).                                                           |
 
 Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
@@ -60,7 +60,7 @@ For example, the CHW value at index (c,h,w) is physically located at index (c\*H
 |GNA plugin    |Supported               |Supported               |Not supported           |
 <br>\* - currently, only limited set of topologies might benefit from enabling I8 model on GPU<br>
 For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution
-the supported models formats depends on the actual underlying devices. _Generally, FP16 is preferable as it is most ubiquitous and performant_.  
+the supported models formats depends on the actual underlying devices. _Generally, FP16 is preferable as it is most ubiquitous and performant_.
 
 ### Supported Input Precision
 
@@ -73,7 +73,7 @@ the supported models formats depends on the actual underlying devices. _Generall
 
 <br>\* - Supported via `SetBlob` only, `GetBlob` returns FP32<br>
 For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution
-the supported input precision  depends on the actual underlying devices. _Generally, U8 is preferable as it is most ubiquitous_.  
+the supported input precision  depends on the actual underlying devices. _Generally, U8 is preferable as it is most ubiquitous_.
 
 ### Supported Output Precision
 
@@ -84,7 +84,7 @@ the supported input precision  depends on the actual underlying devices. _Genera
 |VPU plugins   |Supported |Supported     |
 |GNA plugin    |Supported |Not supported |
 For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution
-the supported output precision  depends on the actual underlying devices. _Generally, FP32 is preferable as it is most ubiquitous_. 
+the supported output precision  depends on the actual underlying devices. _Generally, FP32 is preferable as it is most ubiquitous_.
 
 ### Supported Input Layout
 
diff --git a/docs/IE_PLUGIN_DG/ExecutableNetwork.md b/docs/IE_PLUGIN_DG/ExecutableNetwork.md
index c5bfd889857..ae82b05e4ed 100644
--- a/docs/IE_PLUGIN_DG/ExecutableNetwork.md
+++ b/docs/IE_PLUGIN_DG/ExecutableNetwork.md
@@ -49,20 +49,15 @@ The function accepts a const shared pointer to `ngraph::Function` object and per
 
 This constructor creates a backend specific graph by importing from a stream object:
 
-> **NOTE**: The export of backend specific graph is done in the `ExportImpl` method, and data formats must be the same for both import and export.
+> **NOTE**: The export of backend specific graph is done in the `Export` method, and data formats must be the same for both import and export.
 
 @snippet src/template_executable_network.cpp executable_network:ctor_import_stream
 
-### `ExportImpl()`
-
-**Implementation details:**   
-Base InferenceEngine::ExecutableNetworkThreadSafeDefault class implements the public InferenceEngine::ExecutableNetworkThreadSafeDefault::Export method as following:
-- Writes `_plugin->GetName()` to the `model` stream.
-- Calls the `ExportImpl` method defined in a derived class to dump a backend specific graph.
+### `Export()`
 
 The implementation of the method should write all data to the `model` stream, which is required to import a backend specific graph later in the `Plugin::Import` method:
 
-@snippet src/template_executable_network.cpp executable_network:export_impl
+@snippet src/template_executable_network.cpp executable_network:export
 
 ### `CreateInferRequest()`
 
diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md
index cadc8660fd3..6003eb691fc 100644
--- a/docs/IE_PLUGIN_DG/Plugin.md
+++ b/docs/IE_PLUGIN_DG/Plugin.md
@@ -159,21 +159,13 @@ The snippet below provides an example of the implementation for `GetMetric`:
 
 > **NOTE**: If an unsupported metric key is passed to the function, it must throw an exception.
 
-### `ImportNetworkImpl()`
+### `ImportNetwork()`
 
 The importing network mechanism allows to import a previously exported backend specific graph and wrap it 
 using an [ExecutableNetwork](@ref executable_network) object. This functionality is useful if 
 backend specific graph compilation takes significant time and/or cannot be done on a target host 
 device due to other reasons.
 
-**Implementation details:** The base plugin class InferenceEngine::IInferencePlugin implements InferenceEngine::IInferencePlugin::ImportNetwork 
-as follows: exports a device type (InferenceEngine::IInferencePlugin::_pluginName) and then calls `ImportNetworkImpl`, 
-which is implemented in a derived class. 
-If a plugin cannot use the base implementation InferenceEngine::IInferencePlugin::ImportNetwork, it can override base 
-implementation and define an output blob structure up to its needs. This 
-can be useful if a plugin exports a blob in a special format for integration with other frameworks 
-where a common Inference Engine header from a base class implementation is not appropriate. 
-
 During export of backend specific graph using `ExecutableNetwork::Export`, a plugin may export any 
 type of information it needs to import a compiled graph properly and check its correctness. 
 For example, the export information may include:
diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
index f9aef04a0a9..bb599cf93b5 100644
--- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
+++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
@@ -627,4 +627,16 @@ It means that you trying to convert the topology which contains '_contrib_box_nm
   });
 </script>
 
-\endhtmlonly
\ No newline at end of file
+\endhtmlonly
+
+#### 103. What does the message "ModelOptimizer is not able to parse *.caffemodel" mean? <a name="question-103"></a>
+
+If a '*.caffemodel' file exists and it is correct, the error possibly occured due to the use of Python protobuf implementation. In some cases, it shows error message during model parsing, for example: "'utf-8' codec can't decode byte 0xe0 in position 4: invalid continuation byte in field: mo_caffe.SpatialTransformerParameter.transform_type". You can either use Python 3.6/3.7 or build 'cpp' implementation of protobuf yourself for your version of Python. For the complete instructions about building `protobuf` from sources, see the appropriate section in [Converting a Model to Intermediate Representation](Config_Model_Optimizer.md).
+
+#### 104. What does the message "SyntaxError: 'yield' inside list comprehension" during MxNet\* model conversion mean? <a name="question-104"></a>
+
+The issue "SyntaxError: 'yield' inside list comprehension" might occur during converting MXNet\* models (mobilefacedet-v1-mxnet, brain-tumor-segmentation-0001) on Windows* platform with Python* 3.8 environment. This issue is caused by API changes for `yield expression` in Python 3.8.
+The following workarounds are suggested to resolve this issue:
+1. Use Python 3.6/3.7 to convert MXNet\* models on Windows
+2. Update MXNet: pip install mxnet=1.7.0.post2
+Note that you might have conflicts between previously installed PyPI dependencies.
\ No newline at end of file
diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml
index f287487913d..503f9f38986 100644
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@@ -88,6 +88,7 @@ limitations under the License.
         <!-- Intermediate Representation and Operations Sets -->
         <tab id="intermediate_representaton_and_operations_sets" type="usergroup" title="Intermediate Representation and Operations Sets" url="@ref openvino_docs_MO_DG_IR_and_opsets">
             <tab type="usergroup" title="Available Operations Sets" url="@ref openvino_docs_ops_opset">
+                <tab type="user" title="opset8 Specification" url="@ref openvino_docs_ops_opset8"/>
                 <tab type="user" title="opset7 Specification" url="@ref openvino_docs_ops_opset7"/>
                 <tab type="user" title="opset6 Specification" url="@ref openvino_docs_ops_opset6"/>
                 <tab type="user" title="opset5 Specification" url="@ref openvino_docs_ops_opset5"/>
@@ -100,6 +101,8 @@ limitations under the License.
                 <tab type="user" title="Abs-1" url="@ref openvino_docs_ops_arithmetic_Abs_1"/>
                 <tab type="user" title="Acos-1" url="@ref openvino_docs_ops_arithmetic_Acos_1"/>
                 <tab type="user" title="Acosh-3" url="@ref openvino_docs_ops_arithmetic_Acosh_3"/>
+                <tab type="user" title="AdaptiveAvgPool-8" url="@ref openvino_docs_ops_pooling_AdaptiveAvgPool_8"/>
+                <tab type="user" title="AdaptiveMaxPool-8" url="@ref openvino_docs_ops_pooling_AdaptiveMaxPool_8"/>
                 <tab type="user" title="Add-1" url="@ref openvino_docs_ops_arithmetic_Add_1"/>
                 <tab type="user" title="Asin-1" url="@ref openvino_docs_ops_arithmetic_Asin_1"/>
                 <tab type="user" title="Asinh-3" url="@ref openvino_docs_ops_arithmetic_Asinh_3"/>
@@ -293,7 +296,6 @@ limitations under the License.
                 <tab type="user" title="[DEPRECATED] Import an ONNX model" url="@ref openvino_docs_IE_DG_OnnxImporterTutorial"/>
                 <tab type="user" title="Using Dynamic Batching Feature" url="@ref openvino_docs_IE_DG_DynamicBatching"/>
                 <tab type="user" title="Using Static Shape Infer Feature" url="@ref openvino_docs_IE_DG_ShapeInference"/>
-                <tab type="user" title="Using GPU kernels tuning" url="@ref openvino_docs_IE_DG_GPU_Kernels_Tuning"/>
                 <tab type="usergroup" title="Using Bfloat16 Inference" url="@ref openvino_docs_IE_DG_Bfloat16Inference">
                 </tab>
                 <tab type="usergroup" title="Using Low-Precision 8-bit Integer Inference" url="@ref openvino_docs_IE_DG_Int8Inference">
@@ -303,7 +305,7 @@ limitations under the License.
                 </tab>
                 <tab type="user" title="Introduction to OpenVINO state API" url="@ref openvino_docs_IE_DG_network_state_intro"/>
                 <tab type="usergroup" title="Supported Devices" url="@ref openvino_docs_IE_DG_supported_plugins_Supported_Devices">
-                    <tab type="usergroup" title="GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_CL_DNN">
+                    <tab type="usergroup" title="GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GPU">
                         <tab type="user" title="RemoteBlob API of GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API"/>
                     </tab>
                     <tab type="user" title="CPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_CPU"/>
diff --git a/docs/install_guides/installing-openvino-apt.md b/docs/install_guides/installing-openvino-apt.md
index 66518696991..1bd734bd856 100644
--- a/docs/install_guides/installing-openvino-apt.md
+++ b/docs/install_guides/installing-openvino-apt.md
@@ -2,7 +2,7 @@
 
 This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit for Linux* distributed through the APT repository.
 
-> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
+> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
 
 > **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ APT distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime). 
 
diff --git a/docs/install_guides/installing-openvino-windows.md b/docs/install_guides/installing-openvino-windows.md
index 1a1a31a07c6..054950292b6 100644
--- a/docs/install_guides/installing-openvino-windows.md
+++ b/docs/install_guides/installing-openvino-windows.md
@@ -248,8 +248,8 @@ Or proceed to the <a href="#get-started">Get Started</a> to get started with run
 
 > **NOTE**: These steps are required only if you want to use an Intel® integrated GPU.
 
-If your applications offload computation to **Intel® Integrated Graphics**, you must have the latest version of Intel Graphics Driver for Windows installed for your hardware. 
-[Download and install a higher version](http://downloadcenter.intel.com/product/80939/Graphics-Drivers). 
+If your applications offload computation to **Intel® Integrated Graphics**, you must have the Intel Graphics Driver for Windows installed for your hardware. 
+[Download and install the recommended version](https://downloadcenter.intel.com/download/30079/Intel-Graphics-Windows-10-DCH-Drivers). 
 
 To check if you have this driver installed:
 
@@ -265,8 +265,6 @@ To check if you have this driver installed:
 
    ![](../img/DeviceDriverVersion.PNG)
 
-> **NOTE**: To use the **Intel® Iris® Xe MAX Graphics**, see the [Drivers & Software](https://downloadcenter.intel.com/download/29993/Intel-Iris-Xe-MAX-Dedicated-Graphics-Drivers?product=80939) page for driver downloads and installation instructions.  
-
 You are done updating your device driver and are ready to use your GPU. Proceed to the <a href="#get-started">Get Started</a> to get started with running code samples and demo applications.
 
 ### <a name="hddl-myriad"></a> Optional: Additional Installation Steps for the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
diff --git a/docs/install_guides/installing-openvino-yum.md b/docs/install_guides/installing-openvino-yum.md
index 27e464d1b84..c326cb93a0f 100644
--- a/docs/install_guides/installing-openvino-yum.md
+++ b/docs/install_guides/installing-openvino-yum.md
@@ -2,7 +2,7 @@
 
 This guide provides installation steps for the Intel® Distribution of OpenVINO™ toolkit for Linux* distributed through the YUM repository.
 
-> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
+> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
 
 > **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ YUM distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime).
 
diff --git a/docs/model_server/README.md b/docs/model_server/README.md
index ae5d03914ab..e6c7144f3cb 100644
--- a/docs/model_server/README.md
+++ b/docs/model_server/README.md
@@ -1,29 +1,29 @@
 # OpenVINO&trade; Model Server {#openvino_docs_ovms}
 
-OpenVINO&trade; Model Server (OVMS) is a scalable, high-performance solution for serving machine learning models optimized for Intel&reg; architectures. 
-The server provides an inference service via gRPC or REST API - making it easy to deploy new algorithms and AI experiments using the same 
-architecture as [TensorFlow* Serving](https://github.com/tensorflow/serving) for any models trained in a framework that is supported 
-by [OpenVINO](https://software.intel.com/en-us/openvino-toolkit). 
+OpenVINO&trade; Model Server (OVMS) is a scalable, high-performance solution for serving machine learning models optimized for Intel&reg; architectures.
+The server provides an inference service via gRPC or REST API - making it easy to deploy new algorithms and AI experiments using the same
+architecture as [TensorFlow* Serving](https://github.com/tensorflow/serving) for any models trained in a framework that is supported
+by [OpenVINO](https://software.intel.com/en-us/openvino-toolkit).
 
 The server implements gRPC and REST API framework with data serialization and deserialization using TensorFlow Serving API,
  and OpenVINO&trade; as the inference execution provider. Model repositories may reside on a locally accessible file system (for example, NFS),
   Google Cloud Storage\* (GCS), Amazon S3\*, MinIO\*, or Azure Blob Storage\*.
-  
+
 OVMS is now implemented in C++ and provides much higher scalability compared to its predecessor in the Python version.
 You can take advantage of all the power of Xeon® CPU capabilities or AI accelerators and expose it over the network interface.
 Read the [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what's new in the C++ version.
 
 Review the [Architecture Concept](https://github.com/openvinotoolkit/model_server/blob/main/docs/architecture.md) document for more details.
 
-A few key features: 
+A few key features:
 - Support for multiple frameworks. Serve models trained in popular formats such as Caffe\*, TensorFlow\*, MXNet\*, and ONNX*.
 - Deploy new [model versions](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-version-policy) without changing client code.
-- Support for AI accelerators including [Intel Movidius Myriad VPUs](../IE_DG/supported_plugins/VPU), 
-[GPU](../IE_DG/supported_plugins/CL_DNN), and [HDDL](../IE_DG/supported_plugins/HDDL). 
+- Support for AI accelerators including [Intel Movidius Myriad VPUs](../IE_DG/supported_plugins/VPU.md),
+[GPU](../IE_DG/supported_plugins/GPU.md), and [HDDL](../IE_DG/supported_plugins/HDDL.md).
 - The server can be enabled both on [Bare Metal Hosts](https://github.com/openvinotoolkit/model_server/blob/main/docs/host.md) or in
 [Docker* containers](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md).
-- [Kubernetes deployments](https://github.com/openvinotoolkit/model_server/blob/main/deploy). The server can be deployed in a Kubernetes cluster allowing the inference service to scale horizontally and ensure high availability.  
-- [Model reshaping](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-reshaping). The server supports reshaping models in runtime. 
+- [Kubernetes deployments](https://github.com/openvinotoolkit/model_server/blob/main/deploy). The server can be deployed in a Kubernetes cluster allowing the inference service to scale horizontally and ensure high availability.
+- [Model reshaping](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-reshaping). The server supports reshaping models in runtime.
 - [Model ensemble](https://github.com/openvinotoolkit/model_server/blob/main/docs/ensemble_scheduler.md) (preview). Connect multiple models to deploy complex processing solutions and reduce overhead of sending data back and forth.
 
 > **NOTE**: OVMS has been tested on CentOS\* and Ubuntu\*. Publicly released [Docker images](https://hub.docker.com/r/openvino/model_server) are based on CentOS.
@@ -68,30 +68,30 @@ For more detailed guides on using the Model Server in various scenarios, visit t
 
 ## API Documentation
 
-### GRPC 
+### GRPC
 
-OpenVINO&trade; Model Server gRPC API is documented in the proto buffer files in [tensorflow_serving_api](https://github.com/tensorflow/serving/tree/r2.2/tensorflow_serving/apis). 
+OpenVINO&trade; Model Server gRPC API is documented in the proto buffer files in [tensorflow_serving_api](https://github.com/tensorflow/serving/tree/r2.2/tensorflow_serving/apis).
 
-> **NOTE:** The implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available. 
+> **NOTE:** The implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available.
 > These are the most generic function calls and should address most of the usage scenarios.
 
-[Predict proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/predict.proto) defines two message specifications: `PredictRequest` and `PredictResponse` used while calling Prediction endpoint.  
-* `PredictRequest` specifies information about the model spec, that is name and version, and a map of input data serialized via 
+[Predict proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/predict.proto) defines two message specifications: `PredictRequest` and `PredictResponse` used while calling Prediction endpoint.
+* `PredictRequest` specifies information about the model spec, that is name and version, and a map of input data serialized via
 [TensorProto](https://github.com/tensorflow/tensorflow/blob/r2.2/tensorflow/core/framework/tensor.proto) to a string format.
-* `PredictResponse` includes a map of outputs serialized by 
+* `PredictResponse` includes a map of outputs serialized by
 [TensorProto](https://github.com/tensorflow/tensorflow/blob/r2.2/tensorflow/core/framework/tensor.proto) and information about the used model spec.
- 
+
 [Get Model Metadata proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/get_model_metadata.proto) defines three message definitions used while calling Metadata endpoint:
  `SignatureDefMap`, `GetModelMetadataRequest`, `GetModelMetadataResponse`.
 
  A function call `GetModelMetadata` accepts model spec information as input and returns Signature Definition content in the format similar to TensorFlow Serving.
 
 [Get Model Status proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/get_model_status.proto) defines three message definitions used while calling Status endpoint:
- `GetModelStatusRequest`, `ModelVersionStatus`, `GetModelStatusResponse` that report all exposed versions including their state in their lifecycle. 
+ `GetModelStatusRequest`, `ModelVersionStatus`, `GetModelStatusResponse` that report all exposed versions including their state in their lifecycle.
 
 Refer to the [example client code](https://github.com/openvinotoolkit/model_server/blob/main/example_client) to learn how to use this API and submit the requests using the gRPC interface.
 
-Using the gRPC interface is recommended for optimal performance due to its faster implementation of input data deserialization. It enables you to achieve lower latency, especially with larger input messages like images. 
+Using the gRPC interface is recommended for optimal performance due to its faster implementation of input data deserialization. It enables you to achieve lower latency, especially with larger input messages like images.
 
 ### REST
 
@@ -99,9 +99,9 @@ OpenVINO&trade; Model Server RESTful API follows the documentation from the [Ten
 
 Both row and column format of the requests are implemented.
 
-> **NOTE**: Just like with gRPC, only the implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available. 
+> **NOTE**: Just like with gRPC, only the implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available.
 
-Only the numerical data types are supported. 
+Only the numerical data types are supported.
 
 Review the exemplary clients below to find out more how to connect and run inference requests.
 
@@ -110,9 +110,9 @@ REST API is recommended when the primary goal is in reducing the number of clien
 
 ## Known Limitations
 
-* Currently, `Predict`, `GetModelMetadata`, and `GetModelStatus` calls are implemented using the TensorFlow Serving API. 
+* Currently, `Predict`, `GetModelMetadata`, and `GetModelStatus` calls are implemented using the TensorFlow Serving API.
 * `Classify`, `Regress`, and `MultiInference` are not included.
-* `Output_filter` is not effective in the `Predict` call. All outputs defined in the model are returned to the clients. 
+* `Output_filter` is not effective in the `Predict` call. All outputs defined in the model are returned to the clients.
 
 ## OpenVINO Model Server Contribution Policy
 
diff --git a/docs/ops/arithmetic/Log_1.md b/docs/ops/arithmetic/Log_1.md
index 6f33b002b69..f1314919821 100644
--- a/docs/ops/arithmetic/Log_1.md
+++ b/docs/ops/arithmetic/Log_1.md
@@ -6,28 +6,28 @@
 
 **Short description**: *Log* performs element-wise natural logarithm operation with given tensor.
 
+**Detailed description**: *Log* does the following with the input tensor *a*:
+
+\f[
+a_{i} = log(a_{i})
+\f]
+
 **Attributes**:
 
     No attributes available.
 
 **Inputs**
 
-* **1**: An tensor of type T. **Required.**
+* **1**: An tensor of type T and arbitrary shape. **Required.**
 
 **Outputs**
 
-* **1**: The result of element-wise log operation. A tensor of type T.
+* **1**: The result of element-wise log operation. A tensor of type T and the same shape as input.
 
 **Types**
 
 * *T*: any numeric type.
 
-*Log* does the following with the input tensor *a*:
-
-\f[
-a_{i} = log(a_{i})
-\f]
-
 **Examples**
 
 *Example 1*
diff --git a/docs/ops/opset8.md b/docs/ops/opset8.md
new file mode 100644
index 00000000000..8f43927b5ec
--- /dev/null
+++ b/docs/ops/opset8.md
@@ -0,0 +1,169 @@
+# Operation Set `opset8` Specification {#openvino_docs_ops_opset8}
+
+This specification document describes the `opset8` operation set supported in OpenVINO™.
+Support for each particular operation from the list below depends on the capabilities of an inference plugin
+and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR V10 xml
+snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes
+declared in `namespace opset8`.
+
+
+## Table of Contents <a name="toc"></a>
+
+* [Abs](arithmetic/Abs_1.md)
+* [Acos](arithmetic/Acos_1.md)
+* [Acosh](arithmetic/Acosh_3.md)
+* [AdaptiveAvgPool](pooling/AdaptiveAvgPool_8.md)
+* [AdaptiveMaxPool](pooling/AdaptiveMaxPool_8.md)
+* [Add](arithmetic/Add_1.md)
+* [Asin](arithmetic/Asin_1.md)
+* [Asinh](arithmetic/Asinh_3.md)
+* [Assign](infrastructure/Assign_3.md)
+* [Atan](arithmetic/Atan_1.md)
+* [Atanh](arithmetic/Atanh_3.md)
+* [AvgPool](pooling/AvgPool_1.md)
+* [BatchNormInference](normalization/BatchNormInference_5.md)
+* [BatchToSpace](movement/BatchToSpace_2.md)
+* [BinaryConvolution](convolution/BinaryConvolution_1.md)
+* [Broadcast](movement/Broadcast_3.md)
+* [Bucketize](condition/Bucketize_3.md)
+* [CTCGreedyDecoder](sequence/CTCGreedyDecoder_1.md)
+* [CTCGreedyDecoderSeqLen](sequence/CTCGreedyDecoderSeqLen_6.md)
+* [CTCLoss](sequence/CTCLoss_4.md)
+* [Ceiling](arithmetic/Ceiling_1.md)
+* [Clamp](activation/Clamp_1.md)
+* [Concat](movement/Concat_1.md)
+* [Constant](infrastructure/Constant_1.md)
+* [Convert](type/Convert_1.md)
+* [ConvertLike](type/ConvertLike_1.md)
+* [Convolution](convolution/Convolution_1.md)
+* [ConvolutionBackpropData](convolution/ConvolutionBackpropData_1.md)
+* [Cos](arithmetic/Cos_1.md)
+* [Cosh](arithmetic/Cosh_1.md)
+* [CumSum](arithmetic/CumSum_3.md)
+* [DeformableConvolution](convolution/DeformableConvolution_1.md)
+* [DeformablePSROIPooling](detection/DeformablePSROIPooling_1.md)
+* [DepthToSpace](movement/DepthToSpace_1.md)
+* [DetectionOutput](detection/DetectionOutput_1.md)
+* [DFT](signals/DFT_7.md)
+* [Divide](arithmetic/Divide_1.md)
+* [Einsum](matrix/Einsum_7.md)
+* [Elu](activation/Elu_1.md)
+* [EmbeddingBagOffsetsSum](sparse/EmbeddingBagOffsetsSum_3.md)
+* [EmbeddingBagPackedSum](sparse/EmbeddingBagPackedSum_3.md)
+* [EmbeddingSegmentsSum](sparse/EmbeddingSegmentsSum_3.md)
+* [Equal](comparison/Equal_1.md)
+* [Erf](arithmetic/Erf_1.md)
+* [Exp](activation/Exp_1.md)
+* [ExperimentalDetectronDetectionOutput_6](detection/ExperimentalDetectronDetectionOutput_6.md)
+* [ExperimentalDetectronGenerateProposalsSingleImage_6](detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md)
+* [ExperimentalDetectronPriorGridGenerator_6](detection/ExperimentalDetectronPriorGridGenerator_6.md)
+* [ExperimentalDetectronROIFeatureExtractor_6](detection/ExperimentalDetectronROIFeatureExtractor_6.md)
+* [ExperimentalDetectronTopKROIs_6](sort/ExperimentalDetectronTopKROIs_6.md)
+* [ExtractImagePatches](movement/ExtractImagePatches_3.md)
+* [FakeQuantize](quantization/FakeQuantize_1.md)
+* [Floor](arithmetic/Floor_1.md)
+* [FloorMod](arithmetic/FloorMod_1.md)
+* [Gather](movement/Gather_7.md)
+* [GatherElements](movement/GatherElements_6.md)
+* [GatherND_5](movement/GatherND_5.md)
+* [GatherTree](movement/GatherTree_1.md)
+* [Gelu](activation/GELU_7.md)
+* [Greater](comparison/Greater_1.md)
+* [GreaterEqual](comparison/GreaterEqual_1.md)
+* [GRN](normalization/GRN_1.md)
+* [GroupConvolution](convolution/GroupConvolution_1.md)
+* [GroupConvolutionBackpropData](convolution/GroupConvolutionBackpropData_1.md)
+* [GRUCell](sequence/GRUCell_3.md)
+* [GRUSequence](sequence/GRUSequence_5.md)
+* [HardSigmoid](activation/HardSigmoid_1.md)
+* [HSigmoid](activation/HSigmoid_5.md)
+* [HSwish](activation/HSwish_4.md)
+* [IDFT](signals/IDFT_7.md)
+* [Interpolate](image/Interpolate_4.md)
+* [Less](comparison/Less_1.md)
+* [LessEqual](comparison/LessEqual_1.md)
+* [Log](arithmetic/Log_1.md)
+* [LogicalAnd](logical/LogicalAnd_1.md)
+* [LogicalNot](logical/LogicalNot_1.md)
+* [LogicalOr](logical/LogicalOr_1.md)
+* [LogicalXor](logical/LogicalXor_1.md)
+* [LogSoftmax](activation/LogSoftmax_5.md)
+* [Loop](infrastructure/Loop_5.md)
+* [LRN](normalization/LRN_1.md)
+* [LSTMCell](sequence/LSTMCell_1.md)
+* [LSTMSequence](sequence/LSTMSequence_1.md)
+* [MatMul](matrix/MatMul_1.md)
+* [MaxPool](pooling/MaxPool_1.md)
+* [Maximum](arithmetic/Maximum_1.md)
+* [Minimum](arithmetic/Minimum_1.md)
+* [Mish](activation/Mish_4.md)
+* [Mod](arithmetic/Mod_1.md)
+* [MVN](normalization/MVN_6.md)
+* [Multiply](arithmetic/Multiply_1.md)
+* [Negative](arithmetic/Negative_1.md)
+* [NonMaxSuppression](sort/NonMaxSuppression_5.md)
+* [NonZero](condition/NonZero_3.md)
+* [NormalizeL2](normalization/NormalizeL2_1.md)
+* [NotEqual](comparison/NotEqual_1.md)
+* [OneHot](sequence/OneHot_1.md)
+* [Pad](movement/Pad_1.md)
+* [Parameter](infrastructure/Parameter_1.md)
+* [Power](arithmetic/Power_1.md)
+* [PReLU](activation/PReLU_1.md)
+* [PriorBoxClustered](detection/PriorBoxClustered_1.md)
+* [PriorBox](detection/PriorBox_1.md)
+* [Proposal](detection/Proposal_4.md)
+* [PSROIPooling](detection/PSROIPooling_1.md)
+* [Range](generation/Range_4.md)
+* [ReLU](activation/ReLU_1.md)
+* [ReadValue](infrastructure/ReadValue_3.md)
+* [ReduceL1](reduction/ReduceL1_4.md)
+* [ReduceL2](reduction/ReduceL2_4.md)
+* [ReduceLogicalAnd](reduction/ReduceLogicalAnd_1.md)
+* [ReduceLogicalOr](reduction/ReduceLogicalOr_1.md)
+* [ReduceMax](reduction/ReduceMax_1.md)
+* [ReduceMean](reduction/ReduceMean_1.md)
+* [ReduceMin](reduction/ReduceMin_1.md)
+* [ReduceProd](reduction/ReduceProd_1.md)
+* [ReduceSum](reduction/ReduceSum_1.md)
+* [RegionYolo](detection/RegionYolo_1.md)
+* [ReorgYolo](detection/ReorgYolo_1.md)
+* [Reshape](shape/Reshape_1.md)
+* [Result](infrastructure/Result_1.md)
+* [ReverseSequence](movement/ReverseSequence_1.md)
+* [RNNCell](sequence/RNNCell_3.md)
+* [RNNSequence](sequence/RNNSequence_5.md)
+* [ROIAlign](detection/ROIAlign_3.md)
+* [ROIPooling](detection/ROIPooling_1.md)
+* [Roll](movement/Roll_7.md)
+* [Round](arithmetic/Round_5.md)
+* [ScatterElementsUpdate](movement/ScatterElementsUpdate_3.md)
+* [ScatterNDUpdate](movement/ScatterNDUpdate_3.md)
+* [ScatterUpdate](movement/ScatterUpdate_3.md)
+* [Select](condition/Select_1.md)
+* [Selu](activation/Selu_1.md)
+* [ShapeOf](shape/ShapeOf_3.md)
+* [ShuffleChannels](movement/ShuffleChannels_1.md)
+* [Sigmoid](activation/Sigmoid_1.md)
+* [Sign](arithmetic/Sign_1.md)
+* [Sin](arithmetic/Sin_1.md)
+* [Sinh](arithmetic/Sinh_1.md)
+* [SoftMax](activation/SoftMax_1.md)
+* [SoftPlus](activation/SoftPlus_4.md)
+* [SpaceToBatch](movement/SpaceToBatch_2.md)
+* [SpaceToDepth](movement/SpaceToDepth_1.md)
+* [Split](movement/Split_1.md)
+* [Sqrt](arithmetic/Sqrt_1.md)
+* [SquaredDifference](arithmetic/SquaredDifference_1.md)
+* [Squeeze](shape/Squeeze_1.md)
+* [StridedSlice](movement/StridedSlice_1.md)
+* [Subtract](arithmetic/Subtract_1.md)
+* [Swish](activation/Swish_4.md)
+* [Tan](arithmetic/Tan_1.md)
+* [Tanh](arithmetic/Tanh_1.md)
+* [TensorIterator](infrastructure/TensorIterator_1.md)
+* [Tile](movement/Tile_1.md)
+* [TopK](sort/TopK_3.md)
+* [Transpose](movement/Transpose_1.md)
+* [Unsqueeze](shape/Unsqueeze_1.md)
+* [VariadicSplit](movement/VariadicSplit_1.md)
diff --git a/docs/ops/pooling/AdaptiveAvgPool_8.md b/docs/ops/pooling/AdaptiveAvgPool_8.md
new file mode 100644
index 00000000000..beb2ec30492
--- /dev/null
+++ b/docs/ops/pooling/AdaptiveAvgPool_8.md
@@ -0,0 +1,70 @@
+## AdaptiveAvgPool<a name="AdaptiveAvgPool"></a> {#openvino_docs_ops_pooling_AdaptiveAvgPool_8}
+
+**Versioned name**: *AdaptiveAvgPool-8*
+
+**Category**: *Pooling*
+
+**Short description**: Applies average pooling with adaptive kernel size over the input.
+
+**Detailed description**: This operation calculates the output based on the first input and `output_size` determined by the second input.
+The kernel dimensions are calculated using the following formulae for the `NCDHW` input case:
+
+\f[
+\begin{array}{lcl}
+d_{start} &=& floor(i*D_{in}/D_{out})\\
+d_{end}   &=& ceil((i+1)*D_{in}/D_{out})\\
+h_{start} &=& floor(j*H_{in}/H_{out})\\
+h_{end}   &=& ceil((j+1)*H_{in}/H_{out})\\
+w_{start} &=& floor(k*W_{in}/W_{out})\\
+w_{end}   &=& ceil((k+1)*W_{in}/W_{out})
+\end{array}
+\f]
+
+The output is calculated with the following formula:
+
+\f[
+Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start})*(h_{end}-h_{start})*(w_{end}-w_{start})}
+\f]
+
+**Inputs**:
+
+*   **1**: 3D, 4D, or 5D input tensor of shape `[N, C, H]`, `[N, C, H, W]` or `[N, C, D, H, W]` and type *T*. Required.
+*   **2**: 1D tensor describing output shape for spatial dimensions. Can be `[H_out]` for 3D input, `[H_out, W_out]` for 4D input, `[D_out, H_out, W_out]` for 5D input and of type *T_SHAPE*. Required.
+
+**Outputs**:
+
+*   **1**: Output of type *T* and shape `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, D_out, H_out, W_out]`.
+
+**Types**
+
+*   *T*: floating-point type.
+*   *T_SHAPE*: `int32` or `int64`.
+
+**Examples**
+
+```xml
+<layer ... type="AdaptiveAvgPool" ... >
+    <data output_type="i64"/>
+    <input> 
+        <port id="0">
+            <dim>1</dim>
+            <dim>3</dim>
+            <dim>32</dim>
+            <dim>32</dim>
+        </port>
+    </input>
+    <input> 
+        <port id="1">
+            <dim>2</dim>
+        </port>
+    </input>
+    <output>
+        <port id="2">
+            <dim>1</dim>
+            <dim>3</dim>
+            <dim>16</dim>
+            <dim>16</dim>
+        </port>
+    </output>
+</layer>
+```
diff --git a/docs/ops/pooling/AdaptiveMaxPool_8.md b/docs/ops/pooling/AdaptiveMaxPool_8.md
new file mode 100644
index 00000000000..d7ad9a42412
--- /dev/null
+++ b/docs/ops/pooling/AdaptiveMaxPool_8.md
@@ -0,0 +1,87 @@
+## AdaptiveMaxPool<a name="AdaptiveMaxPool"></a> {#openvino_docs_ops_pooling_AdaptiveMaxPool_8}
+
+**Versioned name**: *AdaptiveMaxPool-8*
+
+**Category**: *Pooling*
+
+**Short description**: Applies max pooling with adaptive kernel size over the input.
+
+**Detailed description**: This operation calculates the output based on the first input and `output_size` determined by the second input.
+The kernel dimensions are calculated using the following formulae for the `NCDHW` input case:
+
+\f[
+\begin{array}{lcl}
+d_{start} &=& floor(i*D_{in}/D_{out})\\
+d_{end}   &=& ceil((i+1)*D_{in}/D_{out})\\
+h_{start} &=& floor(j*H_{in}/H_{out})\\
+h_{end}   &=& ceil((j+1)*H_{in}/H_{out})\\
+w_{start} &=& floor(k*W_{in}/W_{out})\\
+w_{end}   &=& ceil((k+1)*W_{in}/W_{out})
+\end{array}
+\f]
+
+The output is calculated following this formula:
+
+\f[
+Output(i,j,k) = max(Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}])
+\f]
+
+**Attributes**:
+
+*   *index_element_type*
+
+  * **Description**: the type of the second output containing indices
+  * **Range of values**: "i64" or "i32"
+  * **Type**: string
+  * **Default value**: "i64"
+  * **Required**: *No*
+
+**Inputs**:
+
+*   **1**: 3D, 4D, or 5D input tensor of shape `[N, C, H]`, `[N, C, H, W]` or `[N, C, D, H, W]` and type *T*. Required.
+*   **2**: 1D tensor describing output shape for spatial dimensions. Can be `[H_out]` for 3D input, `[H_out, W_out]` for 4D input, `[D_out, H_out, W_out]` for 5D input and of type *T_SHAPE*. Required.
+
+**Outputs**:
+
+*   **1**: Output of type *T* and shape `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, D_out, H_out, W_out]`.
+*   **2**: Output of type specified by *index_element_type* and same shape as the first output containing indices of elements in the first output. The values of indices are computed as if input was flatten 1-D tensor, so the values are in the range `[0, N * C * H * W * D)`.
+
+**Types**
+
+*   *T*: floating-point type.
+*   *T_SHAPE*: `int32` or `int64`.
+
+**Examples**
+
+```xml
+<layer ... type="AdaptiveMaxPool" ... >
+    <data output_type="i64"/>
+    <input> 
+        <port id="0">
+            <dim>1</dim>
+            <dim>3</dim>
+            <dim>32</dim>
+            <dim>32</dim>
+        </port>
+    </input>
+    <input> 
+        <port id="1">
+            <dim>2</dim>
+        </port>
+    </input>
+    <output>
+        <port id="1">
+            <dim>1</dim>
+            <dim>3</dim>
+            <dim>16</dim>
+            <dim>16</dim>
+        </port>
+        <port id="2">
+            <dim>1</dim>
+            <dim>3</dim>
+            <dim>16</dim>
+            <dim>16</dim>
+        </port>
+    </output>
+</layer>
+```
diff --git a/docs/optimization_guide/dldt_optimization_guide.md b/docs/optimization_guide/dldt_optimization_guide.md
index e70c0365a41..9ece7fec93a 100644
--- a/docs/optimization_guide/dldt_optimization_guide.md
+++ b/docs/optimization_guide/dldt_optimization_guide.md
@@ -2,13 +2,13 @@
 
 ## Introduction
 
-The purpose of this document is to give you performance-related insights to every step of the network deployment process.  
+The purpose of this document is to give you performance-related insights to every step of the network deployment process.
 
 For information on the general workflow, refer to the documentation in <a href="#see-also">See Also</a>. For an example Inference Engine API snippet, see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>.
 
 ### Deep Learning Inference Engine Overview <a name="dldt-overview"></a>
 
-Deep Learning Inference Engine is a part of Intel&reg; Deep Learning Deployment Toolkit (Intel&reg; DL Deployment Toolkit) and OpenVINO&trade; toolkit. Inference Engine facilitates deployment of deep learning solutions by delivering a unified, device-agnostic API.  
+Deep Learning Inference Engine is a part of Intel&reg; Deep Learning Deployment Toolkit (Intel&reg; DL Deployment Toolkit) and OpenVINO&trade; toolkit. Inference Engine facilitates deployment of deep learning solutions by delivering a unified, device-agnostic API.
 
 Below, there are the three main steps of the deployment process:
 
@@ -50,7 +50,7 @@ When evaluating performance of your model with the Inference Engine, you must me
 
 ### Latency vs. Throughput <a name="latency-vs-throughput"></a>
 
-In the asynchronous case (see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>), the performance of an individual infer request is usually of less concern. Instead, you typically execute multiple requests asynchronously and measure the throughput in images per second by dividing the number of images that were processed by the processing time. 
+In the asynchronous case (see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>), the performance of an individual infer request is usually of less concern. Instead, you typically execute multiple requests asynchronously and measure the throughput in images per second by dividing the number of images that were processed by the processing time.
 In contrast, for the latency-oriented tasks, the time to a single frame is more important.
 
 Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.
@@ -114,23 +114,23 @@ The resulting IR precision, for instance, `FP16` or `FP32`, directly affects per
 
 ## Multi-Device Execution <a name="multi-device-optimizations"></a>
 OpenVINO&trade; toolkit supports automatic multi-device execution, please see [MULTI-Device plugin description](../IE_DG/supported_plugins/MULTI.md).
-In the next chapter you can find the device-specific tips, while this section covers few recommendations 
+In the next chapter you can find the device-specific tips, while this section covers few recommendations
 for the multi-device execution:
--	MULTI usually performs best when the fastest device is specified first in the list of the devices. 
-    This is particularly important when the parallelism is not sufficient 
+-	MULTI usually performs best when the fastest device is specified first in the list of the devices.
+    This is particularly important when the parallelism is not sufficient
     (e.g. the number of request in the flight is not enough to saturate all devices).
-- It is highly recommended to query the optimal number of inference requests directly from the instance of the ExecutionNetwork 
-  (resulted from the LoadNetwork call with the specific multi-device configuration as a parameter). 
-Please refer to the code of the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for details.    
--   Notice that for example CPU+GPU execution performs better with certain knobs 
+- It is highly recommended to query the optimal number of inference requests directly from the instance of the ExecutionNetwork
+  (resulted from the LoadNetwork call with the specific multi-device configuration as a parameter).
+Please refer to the code of the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for details.
+-   Notice that for example CPU+GPU execution performs better with certain knobs
     which you can find in the code of the same [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample.
-    One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams (which is already a default for the GPU) to amortize slower 
+    One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams (which is already a default for the GPU) to amortize slower
     inference completion from the device to the host.
--	Multi-device logic always attempts to save on the (e.g. inputs) data copies between device-agnostic, user-facing inference requests 
-    and device-specific 'worker' requests that are being actually scheduled behind the scene. 
-    To facilitate the copy savings, it is recommended to start the requests in the order that they were created 
+-	Multi-device logic always attempts to save on the (e.g. inputs) data copies between device-agnostic, user-facing inference requests
+    and device-specific 'worker' requests that are being actually scheduled behind the scene.
+    To facilitate the copy savings, it is recommended to start the requests in the order that they were created
     (with ExecutableNetwork's CreateInferRequest).
-  
+
 
 ## Device-Specific Optimizations <a name="device-specific-optimizations"></a>
 
@@ -171,7 +171,7 @@ Notice that on a multi-socket machine, the bare minimum of streams for a latency
 
 In addition, you can play with the batch size to find the throughput sweet spot.
 
-If your application is hard or impossible to change in accordance with the multiple-requests logic, consider the "multiple-instance" trick to improve the throughput:  
+If your application is hard or impossible to change in accordance with the multiple-requests logic, consider the "multiple-instance" trick to improve the throughput:
 -   For multi-socket execution, it is recommended to set   [`KEY_CPU_THREADS_NUM`](../IE_DG/supported_plugins/CPU.md) to the number of cores per socket, and run as many instances of the application as you have sockets.
 -   Similarly, for extremely lightweight networks (running faster than 1ms) and/or many-core machines (16+ cores), try limiting the number of CPU inference threads to just `#&zwj;phys` cores and further, while trying to saturate the machine with running multiple instances of the application.
 
@@ -186,15 +186,15 @@ Inference Engine relies on the [Compute Library for Deep Neural Networks (clDNN)
 -	If your application is simultaneously using the inference on the CPU or otherwise loads the host heavily, make sure that the OpenCL driver threads do not starve. You can use [CPU configuration options](../IE_DG/supported_plugins/CPU.md) to limit number of inference threads for the CPU plugin.
 -	In the GPU-only scenario, a GPU driver might occupy a CPU core with spin-looped polling for completion. If the _CPU_ utilization is a concern, consider the `KEY_CLDND_PLUGIN_THROTTLE` configuration option.
 
-> **NOTE**: See the [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) code for a usage example. 
-Notice that while disabling the polling, this option might reduce the GPU performance, so usually this option is used with multiple [GPU streams](../IE_DG/supported_plugins/CL_DNN.md). 
+> **NOTE**: See the [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) code for a usage example.
+Notice that while disabling the polling, this option might reduce the GPU performance, so usually this option is used with multiple [GPU streams](../IE_DG/supported_plugins/GPU.md).
 
 
 ### Intel&reg; Movidius&trade; Myriad&trade; X Visual Processing Unit and Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs  <a name="myriad"></a>
 
 Since Intel&reg; Movidius&trade; Myriad&trade; X Visual Processing Unit (Intel&reg; Movidius&trade; Myriad&trade; 2 VPU) communicates with the host over USB, minimum four infer requests in flight are recommended to hide the data transfer costs. See <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a> and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) for more information.
 
-Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs requires to keep at least 32 inference requests in flight to fully saturate the device.  
+Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs requires to keep at least 32 inference requests in flight to fully saturate the device.
 
 ### FPGA <a name="fpga"></a>
 
@@ -274,7 +274,7 @@ The following tips are provided to give general guidance on optimizing execution
 
 -	Generally, GPU performance is better on heavy kernels (like Convolutions) and large inputs. So if the network inference time is already too small (~1ms of execution time), using the GPU would unlikely give a boost.
 
--	A typical strategy to start with is to test the CPU-only and GPU-only scenarios first (with samples this is plain `-d CPU` or `-d GPU`). If there are specific kernels that are not supported by the GPU, the best option to try is the `HETERO:GPU,CPU` that automatically applies default splitting (based on the plugins layers support). Then, you can play with the manual affinity settings (for example, to further minimize the number of subgraphs).  
+-	A typical strategy to start with is to test the CPU-only and GPU-only scenarios first (with samples this is plain `-d CPU` or `-d GPU`). If there are specific kernels that are not supported by the GPU, the best option to try is the `HETERO:GPU,CPU` that automatically applies default splitting (based on the plugins layers support). Then, you can play with the manual affinity settings (for example, to further minimize the number of subgraphs).
 
 -	The general affinity “rule of thumb” is to keep computationally-intensive kernels on the accelerator, and "glue" (or helper) kernels on the CPU. Notice that this includes the granularity considerations. For example, running some (custom) activation on the CPU would result in too many conversions.
 
@@ -337,7 +337,7 @@ For inference on the CPU there are multiple threads binding options, see
 
 If you are building an app-level pipeline with third-party components like GStreamer*, the general guidance for NUMA machines is as follows:
 - Whenever possible, use at least one instance of the pipeline per NUMA node:
-   - Pin the _entire_ pipeline instance to the specific NUMA node at the outer-most level (for example, use Kubernetes* and/or `numactl` command with proper settings before  actual GStreamer commands). 
+   - Pin the _entire_ pipeline instance to the specific NUMA node at the outer-most level (for example, use Kubernetes* and/or `numactl` command with proper settings before  actual GStreamer commands).
    - Disable any individual pinning by the pipeline components (e.g. set [CPU_BIND_THREADS to 'NO'](../IE_DG/supported_plugins/CPU.md)).
    - Limit each instance with respect to number of inference threads. Use  [CPU_THREADS_NUM](../IE_DG/supported_plugins/CPU.md) or  or other means (e.g. virtualization, Kubernetes*, etc), to avoid oversubscription.
 - If pinning instancing/pinning of the entire pipeline is not possible or desirable, relax the inference threads pinning to just 'NUMA'.
@@ -416,7 +416,7 @@ If your application simultaneously executes multiple infer requests:
 
 -	For FPGA and GPU, the actual work is serialized by a plugin and/or a driver anyway.
 
-- 	Finally, for <a href="#myriad">any VPU flavor</a>, using multiple requests is a must for achieving good throughput. 
+- 	Finally, for <a href="#myriad">any VPU flavor</a>, using multiple requests is a must for achieving good throughput.
 
 In the Inference Engine, there is no notion of requests priorities. It is left to the user side (for example, not queuing the low priority infer request, until another higher priority is waiting). Notice that it would require additional logic to synchronize between executable networks (queues) in your application code.
 
@@ -470,12 +470,12 @@ Example of Inference Engine calls:
 	Notice that `Task_runNOThrow` is an Async API wrapper and it is executed in a different thread and triggers the Intel MKL-DNN execution:
 
 	![](../img/vtune_timeline.png)
-	
+
 -	In the Intel VTune Amplifier **Top-down view**, grouped by the **Task Domain**.
 	Notice the `Task_runNoThrow` and `MKLDNN _INFER` that are bracketing the actual Intel MKL-DNN kernels execution:
-	
+
 	![](../img/vtune_topdown_view.jpg)
-	
+
 Similarly, you can use any GPU analysis in the Intel VTune Amplifier and get general correlation with Inference Engine API as well as the execution breakdown for OpenCL kernels.
 
 Just like with regular native application, further drill down in the counters is possible, however, this is mostly useful for <a href="#optimizing-custom-kernels">optimizing custom kernels</a>. Finally, with the Intel VTune Amplifier, the profiling is not limited to your user-level code (see the [corresponding section in the Intel&reg; VTune&trade; Amplifier User's Guide](https://software.intel.com/en-us/vtune-amplifier-help-analyze-performance)).
@@ -513,12 +513,12 @@ Since FPGA execution does not separate individual kernels, only bulk execution/d
 
 ```
 subgraph1: 1. input preprocessing (mean data/FPGA):EXECUTED   layerType: preprocessing   realTime: 129     cpu: 129
-subgraph1: 2. input transfer to DDR:EXECUTED       layerType:                    realTime: 201        cpu: 0              
-subgraph1: 3. FPGA execute time:EXECUTED           layerType:                    realTime: 3808       cpu: 0              subgraph1: 4. output transfer from DDR:EXECUTED    layerType:                    realTime: 55         cpu: 0              
-subgraph1: 5. FPGA output postprocessing:EXECUTED  layerType:                    realTime: 7          cpu: 7              
-subgraph1: 6. softmax/copy:   EXECUTED       layerType:                    realTime: 2          cpu: 2              
-subgraph2: out_prob:          NOT_RUN        layerType: Output             realTime: 0          cpu: 0              
-subgraph2: prob:              EXECUTED       layerType: SoftMax            realTime: 10         cpu: 10             
+subgraph1: 2. input transfer to DDR:EXECUTED       layerType:                    realTime: 201        cpu: 0
+subgraph1: 3. FPGA execute time:EXECUTED           layerType:                    realTime: 3808       cpu: 0              subgraph1: 4. output transfer from DDR:EXECUTED    layerType:                    realTime: 55         cpu: 0
+subgraph1: 5. FPGA output postprocessing:EXECUTED  layerType:                    realTime: 7          cpu: 7
+subgraph1: 6. softmax/copy:   EXECUTED       layerType:                    realTime: 2          cpu: 2
+subgraph2: out_prob:          NOT_RUN        layerType: Output             realTime: 0          cpu: 0
+subgraph2: prob:              EXECUTED       layerType: SoftMax            realTime: 10         cpu: 10
 Total time: 4212     microseconds
 ```
 
diff --git a/docs/snippets/GPU_Kernel.cpp b/docs/snippets/GPU_Kernel.cpp
index 5f849eb6a6a..8b21a79dfe2 100644
--- a/docs/snippets/GPU_Kernel.cpp
+++ b/docs/snippets/GPU_Kernel.cpp
@@ -1,5 +1,4 @@
 #include <ie_core.hpp>
-#include "cldnn/cldnn_config.hpp"
 
 int main() {
 using namespace InferenceEngine;
@@ -9,9 +8,5 @@ InferenceEngine::Core core;
 core.SetConfig({ { InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, "<path_to_the_xml_file>" } }, "GPU");
 //! [part0]
 
-//! [part1]
-core.SetConfig({ { PluginConfigParams::KEY_DUMP_KERNELS, PluginConfigParams::YES } }, "GPU");
-//! [part1]
-
 return 0;
 }
diff --git a/docs/snippets/GPU_Kernels_Tuning.cpp b/docs/snippets/GPU_Kernels_Tuning.cpp
deleted file mode 100644
index 25daeec5e2a..00000000000
--- a/docs/snippets/GPU_Kernels_Tuning.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <ie_core.hpp>
-#include "cldnn/cldnn_config.hpp"
-
-int main() {
-using namespace InferenceEngine;
-//! [part0]
-Core ie;          
-  ie.SetConfig({{ CONFIG_KEY(TUNING_MODE), CONFIG_VALUE(TUNING_CREATE) }}, "GPU");
-  ie.SetConfig({{ CONFIG_KEY(TUNING_FILE), "/path/to/tuning/file.json" }}, "GPU");
-  // Further LoadNetwork calls will use the specified tuning parameters
-//! [part0]
-
-return 0;
-}
diff --git a/docs/snippets/GPU_RemoteBlob_API2.cpp b/docs/snippets/GPU_RemoteBlob_API2.cpp
index 1bb00c17e03..13597ae4561 100644
--- a/docs/snippets/GPU_RemoteBlob_API2.cpp
+++ b/docs/snippets/GPU_RemoteBlob_API2.cpp
@@ -1,6 +1,6 @@
 #include <ie_core.hpp>
 #include <gpu/gpu_context_api_va.hpp>
-#include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 
 
 int main() {
@@ -28,7 +28,7 @@ auto shared_va_context = gpu::make_shared_context(ie, "GPU", disp);
 // compile network within a shared context
 ExecutableNetwork executable_network = ie.LoadNetwork(network,
                                                       shared_va_context,
-                                                      { { CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS,
+                                                      { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS,
                                                           PluginConfigParams::YES } });
 
 
diff --git a/docs/snippets/InferenceEngine_network_with_state_infer.cpp b/docs/snippets/InferenceEngine_network_with_state_infer.cpp
index 81a3070ba3b..7af9c076931 100644
--- a/docs/snippets/InferenceEngine_network_with_state_infer.cpp
+++ b/docs/snippets/InferenceEngine_network_with_state_infer.cpp
@@ -64,7 +64,13 @@ int main(int argc, char *argv[]) {
             inferRequest.Infer();
             // check states
             auto states = inferRequest.QueryState();
+            if (states.empty()) {
+                throw std::runtime_error("Queried states are empty");
+            }
             auto mstate = as<MemoryBlob>(states[0].GetState());
+            if (mstate == nullptr) {
+                throw std::runtime_error("Can't cast state to MemoryBlob");
+            }
             auto state_buf = mstate->rmap();
             float * state =state_buf.as<float*>(); 
             std::cout << state[0] << "\n";
diff --git a/docs/template_plugin/src/template_executable_network.cpp b/docs/template_plugin/src/template_executable_network.cpp
index e46bd63e5a0..4aba4622e50 100644
--- a/docs/template_plugin/src/template_executable_network.cpp
+++ b/docs/template_plugin/src/template_executable_network.cpp
@@ -175,9 +175,9 @@ InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const st
 }
 // ! [executable_network:get_metric]
 
-// ! [executable_network:export_impl]
-void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& modelStream) {
-    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "ExecutableNetwork::ExportImpl");
+// ! [executable_network:export]
+void TemplatePlugin::ExecutableNetwork::Export(std::ostream& modelStream) {
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "ExecutableNetwork::Export");
 
     // Note: custom ngraph extensions are not supported
     std::map<std::string, ngraph::OpSet> custom_opsets;
@@ -198,4 +198,4 @@ void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& modelStream) {
 
     // TODO: implement network precision, layout, preprocessing info serialization
 }
-// ! [executable_network:export_impl]
+// ! [executable_network:export]
diff --git a/docs/template_plugin/src/template_executable_network.hpp b/docs/template_plugin/src/template_executable_network.hpp
index ca3bca11ba8..a68df02f958 100644
--- a/docs/template_plugin/src/template_executable_network.hpp
+++ b/docs/template_plugin/src/template_executable_network.hpp
@@ -30,7 +30,7 @@ public:
 
     // Methods from a base class ExecutableNetworkThreadSafeDefault
 
-    void ExportImpl(std::ostream& model) override;
+    void Export(std::ostream& model) override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                                                        InferenceEngine::OutputsDataMap networkOutputs) override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp
index 87a509c8a77..a0f7a30ee17 100644
--- a/docs/template_plugin/src/template_plugin.cpp
+++ b/docs/template_plugin/src/template_plugin.cpp
@@ -95,14 +95,14 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(cons
 }
 // ! [plugin:load_exe_network_impl]
 
-// ! [plugin:import_network_impl]
-InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetworkImpl(std::istream& modelStream, const std::map<std::string, std::string>& config) {
-    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetworkImpl");
+// ! [plugin:import_network]
+InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& modelStream, const std::map<std::string, std::string>& config) {
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetwork");
 
     auto fullConfig = Configuration {config, _cfg};
     return std::make_shared<ExecutableNetwork>(modelStream, fullConfig, std::static_pointer_cast<Plugin>(shared_from_this()));
 }
-// ! [plugin:import_network_impl]
+// ! [plugin:import_network]
 
 // ! [plugin:query_network]
 InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) const {
diff --git a/docs/template_plugin/src/template_plugin.hpp b/docs/template_plugin/src/template_plugin.hpp
index ef2b506d497..71c37410ea7 100644
--- a/docs/template_plugin/src/template_plugin.hpp
+++ b/docs/template_plugin/src/template_plugin.hpp
@@ -28,7 +28,7 @@ public:
     void AddExtension(const std::shared_ptr<InferenceEngine::IExtension>& extension) override;
     InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
     InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
-    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetworkImpl(std::istream& model, const std::map<std::string, std::string>& config) override;
+    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& model, const std::map<std::string, std::string>& config) override;
 
 private:
     friend class ExecutableNetwork;
diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index 4ce1ef31365..b270c46f2da 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -295,25 +295,25 @@ if (ENABLE_SPEECH_DEMO)
     if(DEFINED IE_PATH_TO_DEPS)
         if (WIN32 AND X86_64)
             RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_WIN "speech_demo_1.0.0.755_windows.zip"
+                    ARCHIVE_WIN "speech_demo_1.0.0.774_windows.zip"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.755"
-                    SHA256 "58adef14b8a749f70fa83888614cee34b941956e6e958e445e3f48885b3c20a0")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
+                    SHA256 "67b25170be5e89a4f0e90e8b39623b60c9a15b965c30329385e295fcd2edc856")
             debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
         elseif (LINUX AND X86_64)
             if (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
                 RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_LIN "speech_demo_1.0.0.755_centos.tgz"
+                    ARCHIVE_LIN "speech_demo_1.0.0.774_centos.tgz"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.755"
-                    SHA256 "716201e377714ac50f3909c445d36d47a089de50a557d8ef65232de040671188")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
+                    SHA256 "5ec3b7be9ae05376aefae5bd5fd4a39b12c274e82817fd3218120b8e8fc8ff5a")
                 debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
             else()
                 RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_LIN "speech_demo_1.0.0.755_linux.tgz"
+                    ARCHIVE_LIN "speech_demo_1.0.0.774_linux.tgz"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.755"
-                    SHA256 "7714b8776ec0183ed73eed6d3d965ee6d5c15d2dc49ee5ae118cc368c89c7a9d")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
+                    SHA256 "f0bbd0a6218b0365e7cfb1f860b34e4ace7e0d47dd60b369cdea8a480329810f")
                 debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
             endif()
         else()
diff --git a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md
index c33d67103c6..f0701f963ae 100644
--- a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md
@@ -1,7 +1,8 @@
 # nGraph Function Creation Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_ngraph_function_creation_sample_README}
 
-This sample demonstrates how to execute an inference using [nGraph function feature](../../../../../docs/nGraph_DG/build_function.md) to create a network that uses weights from LeNet classification network. So you don't need an XML file, the model will be created from the source code on the fly.  
-In addition to regular images, the sample also supports single-channel ubyte images as an input.
+This sample demonstrates how to execute an inference using [nGraph function feature](../../../../../docs/nGraph_DG/build_function.md) to create a network that uses weights from LeNet classification network, which is known to work well on digit classification tasks. So you don't need an XML file, the model will be created from the source code on the fly.  
+
+In addition to regular grayscale images with a digit, the sample also supports single-channel `ubyte` images as an input.
 
 The following Inference Engine Python API is used in the application:
 
@@ -14,6 +15,9 @@ Basic Inference Engine API is covered by [Hello Classification Python* Sample](.
 
 | Options                    | Values                                                                  |
 | :------------------------- | :---------------------------------------------------------------------- |
+| Validated Models           | LeNet (image classification network)                                    |
+| Model Format               | Network weights file (\*.bin)                                           |
+| Validated images           | The sample uses OpenCV\* to [read input grayscale image](https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56) (\*.bmp, \*.png) or single-channel `ubyte` image                                          |
 | Supported devices          | [All](../../../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
 | Other language realization | [C++](../../../../samples/ngraph_function_creation_sample)              |
 
@@ -72,7 +76,7 @@ To run the sample, you need specify a model weights and image:
 You can do inference of an image using a pre-trained model on a GPU using the following command:
 
 ```sh
-python ngraph_function_creation_sample.py -m <path_to_model>/lenet.bin -i <path_to_image>/3.bmp -d GPU
+python ngraph_function_creation_sample.py -m <path_to_model>/lenet.bin -i <path_to_image>/3.png -d GPU
 ```
 
 ## Sample Output
@@ -84,10 +88,10 @@ The sample application logs each step in a standard output stream and outputs to
 [ INFO ] Loading the network using ngraph function with weights from <path_to_model>/lenet.bin
 [ INFO ] Configuring input and output blobs
 [ INFO ] Loading the model to the plugin
-[ WARNING ] <path_to_image>/3.bmp is inverted to white over black
-[ WARNING ] <path_to_image>/3.bmp is resized from (100, 100) to (28, 28)
+[ WARNING ] <path_to_image>/3.png is inverted to white over black
+[ WARNING ] <path_to_image>/3.png is is resized from (351, 353) to (28, 28)
 [ INFO ] Starting inference in synchronous mode
-[ INFO ] Image path: <path_to_image>/3.bmp
+[ INFO ] Image path: <path_to_image>/3.png
 [ INFO ] Top 10 results:
 [ INFO ] classid probability
 [ INFO ] -------------------
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index 1f623fb4833..17b8bf5b9b5 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -77,4 +77,5 @@ install(PROGRAMS __init__.py
         DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine
         COMPONENT ${PYTHON_VERSION})
 
-add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
\ No newline at end of file
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
index 5d942f93050..efb389259d3 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from .cimport ie_api_impl_defs as C
-from .ie_api_impl_defs cimport CBlob, CTensorDesc, InputInfo, CPreProcessChannel, CPreProcessInfo, CExecutableNetwork
+from .ie_api_impl_defs cimport CBlob, CTensorDesc, InputInfo, CPreProcessChannel, CPreProcessInfo, CExecutableNetwork, CVariableState
 
 import os
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
index c6315336ba2..27c9e7bf898 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
@@ -42,7 +42,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     target_compile_options(${TARGET_NAME} PRIVATE "-Wno-error=register")
 endif()
 
-add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
 
 # perform copy
 add_custom_command(TARGET ${TARGET_NAME}
diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
index 504125d9823..8367f941d9f 100644
--- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
@@ -48,4 +48,5 @@ add_custom_command(TARGET ${TARGET_NAME}
     COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/test_utils/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/__init__.py
 )
 
-add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
\ No newline at end of file
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
index 6928944139d..af79c0ff155 100644
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -16,6 +16,20 @@ test_net_xml, test_net_bin = model_path(is_myriad)
 path_to_img = image_path()
 
 
+def create_function_with_memory(input_shape, data_type):
+    import ngraph as ng
+    from ngraph.impl import Function, Type
+
+    input_data = ng.parameter(input_shape, name="input_data", dtype=data_type)
+    rv = ng.read_value(input_data, "var_id_667")
+    add = ng.add(rv, input_data, name="MemoryAdd")
+    node = ng.assign(add, "var_id_667")
+    res = ng.result(add, "res")
+    func = Function(results=[res], sinks=[node], parameters=[input_data], name="name")
+    caps = Function.to_capsule(func)
+    return caps
+
+
 def read_image():
     import cv2
     n, c, h, w = (1, 3, 32, 32)
@@ -525,28 +539,56 @@ def test_resize_algorithm_work(device):
     assert np.allclose(res_1, res_2, atol=1e-2, rtol=1e-2)
 
 
-# issue 56653
-@pytest.mark.skip(reason="Test will enable when nGraph Python API allows to create network with memory")
-def test_query_state(device):
-    import ngraph as ng
-    from ngraph.impl import Function
-    input_data = ng.parameter([5, 7], name="input_data", dtype=np.float32)
-    rv = ng.read_value(input_data, "var_id_667")
-    #a = ng.add(rv, input_data)
-    node = ng.assign(rv, "var_id_667")
-    res = ng.result(rv, "res")
-    func = Function([res], sinks=[node], parameters=[input_data], name='test')
-    caps = Function.to_capsule(func)
+@pytest.mark.parametrize("mode", ["set_init_memory_state", "reset_memory_state", "normal"])
+@pytest.mark.parametrize("data_type", ["FP32", "FP16", "I32"])
+@pytest.mark.parametrize("input_shape", [[10], [10, 10], [10, 10, 10], [2, 10, 10, 10]])
+@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU",
+                    reason=f"Can't run test on device {os.environ.get('TEST_DEVICE', 'CPU')}, "
+                    "Memory layers fully supported only on CPU")
+def test_query_state_write_buffer(device, input_shape, data_type, mode):
+    ie_core = ie.IECore()
+    if device == "CPU":
+        if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+            pytest.skip("Can't run on ARM plugin")
 
-    net = ie.IENetwork(caps)
+    layout = ["C", "HW", "CHW", "NCHW"]
+    np_data_type = {"FP32": np.float32, "FP16": np.float16, "I32": np.int32}
+
+    from openvino.inference_engine import TensorDesc, Blob
+
+    net = ie.IENetwork(create_function_with_memory(input_shape, np_data_type[data_type]))
     ie_core = ie.IECore()
     exec_net = ie_core.load_network(network=net, device_name=device, num_requests=1)
     request = exec_net.requests[0]
     mem_states = request.query_state()
     mem_state = mem_states[0]
-    with pytest.raises(ValueError) as e:
-        ones_arr = np.ones(shape=(1, 800), dtype=np.float32)
-        mem_state.state.buffer[:] = ones_arr
-    assert "assignment destination is read-only" in str(e.value)
-    assert mem_state.name == 'id_1'
-    assert mem_state.state.tensor_desc.precision == 'FP32'
+
+    assert mem_state.name == 'var_id_667'
+    # todo: Uncomment after fix 45611,
+    #  CPU plugin returns outputs and memory state in FP32 in case of FP16 original precision
+    #assert mem_state.state.tensor_desc.precision == data_type
+
+    for i in range(1, 10):
+        if mode == "set_init_memory_state":
+            # create initial value
+            const_init = 5
+            init_array = np.full(input_shape, const_init, dtype=np_data_type[mem_state.state.tensor_desc.precision])
+            tensor_desc = TensorDesc(mem_state.state.tensor_desc.precision, input_shape, layout[len(input_shape) - 1])
+            blob = Blob(tensor_desc, init_array)
+            mem_state.state = blob
+
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+            expected_res = np.full(input_shape, 1 + const_init, dtype=np_data_type[data_type])
+        elif mode == "reset_memory_state":
+            # reset initial state of ReadValue to zero
+            mem_state.reset()
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+
+            # always ones
+            expected_res = np.full(input_shape, 1, dtype=np_data_type[data_type])
+        else:
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+            expected_res = np.full(input_shape, i, dtype=np_data_type[data_type])
+
+        assert np.allclose(res['MemoryAdd'], expected_res, atol=1e-6), \
+            "Expected values: {} \n Actual values: {} \n".format(expected_res, res)
diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/include/cldnn/cldnn_config.hpp
index cbc2aef0242..3e5dc4cfb12 100644
--- a/inference-engine/include/cldnn/cldnn_config.hpp
+++ b/inference-engine/include/cldnn/cldnn_config.hpp
@@ -11,47 +11,11 @@
 #pragma once
 
 #include "ie_plugin_config.hpp"
+#include "ie_api.h"
+#include "gpu/gpu_config.hpp"
 
 namespace InferenceEngine {
 
-namespace Metrics {
-
-/**
- * @def GPU_METRIC_KEY(name)
- * @brief shortcut for defining GPU plugin metrics
- */
-#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
-#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
-
-/**
- * @def DECLARE_GPU_METRIC_VALUE(name)
- * @brief shortcut for defining gpu metric values
- */
-#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
-
-/**
- * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
- */
-DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
-
-/**
- * @brief Metric to get microarchitecture identifier in major.minor.revision format
- */
-DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
-
-/**
- * @brief Metric to get count of execution units for current GPU
- */
-DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
-
-/**
- * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
- *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
- */
-DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
-
-}  // namespace Metrics
-
 /**
  * @brief GPU plugin configuration
  */
@@ -70,6 +34,7 @@ namespace CLDNNConfigParams {
  * this option should be used with an unsigned integer value (1 is lowest priority)
  * 0 means no priority hint is set and default queue is created.
  */
+INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::GPUConfigParams::GPU_PLUGIN_PRIORITY instead")
 DECLARE_CLDNN_CONFIG_KEY(PLUGIN_PRIORITY);
 
 /**
@@ -78,22 +43,26 @@ DECLARE_CLDNN_CONFIG_KEY(PLUGIN_PRIORITY);
  * chapter 9.19. This option should be used with an unsigned integer value (1 is lowest energy consumption)
  * 0 means no throttle hint is set and default queue created.
  */
+INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::GPUConfigParams::GPU_PLUGIN_THROTTLE instead")
 DECLARE_CLDNN_CONFIG_KEY(PLUGIN_THROTTLE);
 
 /**
  * @brief This key controls clDNN memory pool optimization.
  * Turned off by default.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CLDNN_CONFIG_KEY(MEM_POOL);
 
 /**
  * @brief This key defines the directory name to which clDNN graph visualization will be dumped.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR);
 
 /**
  * @brief This key defines the directory name to which full program sources will be dumped.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR);
 
 /**
@@ -108,43 +77,19 @@ DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS);
  * @brief This key should be set to correctly handle NV12 input without pre-processing.
  * Turned off by default.
  */
+INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::GPUConfigParams::GPU_NV12_TWO_INPUTS instead")
 DECLARE_CLDNN_CONFIG_KEY(NV12_TWO_INPUTS);
 
-/**
- * @brief This key sets the max number of host threads that can be used by GPU plugin on model loading.
- * Default value is maximum number of threads available in the environment.
- */
-DECLARE_CLDNN_CONFIG_KEY(MAX_NUM_THREADS);
-
-/**
- * @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count.
- * This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb).
- * Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16).
- * Note that turning this key on will increase the graph loading time in proportion to the iteration counts.
- * Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
-DECLARE_CLDNN_CONFIG_KEY(ENABLE_LOOP_UNROLLING);
-
 }  // namespace CLDNNConfigParams
 
 namespace PluginConfigParams {
 
-/**
- * @brief Optimize GPU plugin execution to maximize throughput.
- *
- * It is passed to Core::SetConfig(), this option should be used with values:
- * - KEY_GPU_THROUGHPUT_AUTO creates bare minimum of streams that might improve performance in some cases,
- *   this option allows to enable throttle hint for opencl queue thus reduce CPU load without significant performance
- * drop
- * - a positive integer value creates the requested number of streams
- */
-DECLARE_CONFIG_VALUE(GPU_THROUGHPUT_AUTO);
-DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
-
 /**
  * @brief This key enables dumping of the kernels used by the plugin for custom layers.
  *
  * This option should be used with values: PluginConfigParams::YES or PluginConfigParams::NO (default)
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(DUMP_KERNELS);
 
 /**
@@ -159,17 +104,24 @@ DECLARE_CONFIG_KEY(DUMP_KERNELS);
  *
  * For values TUNING_CREATE and TUNING_RETUNE the file will be created if it does not exist.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(TUNING_MODE);
 
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_CREATE);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_USE_EXISTING);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_DISABLED);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_UPDATE);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_RETUNE);
 
 /**
  * @brief This key defines the tuning data filename to be created/used
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(TUNING_FILE);
 
 }  // namespace PluginConfigParams
diff --git a/inference-engine/include/gpu/gpu_config.hpp b/inference-engine/include/gpu/gpu_config.hpp
new file mode 100644
index 00000000000..96f8754ac86
--- /dev/null
+++ b/inference-engine/include/gpu/gpu_config.hpp
@@ -0,0 +1,120 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header for advanced hardware related properties for GPU plugin
+ *        To use in SetConfig() method of plugins
+ *
+ * @file gpu_config.hpp
+ */
+#pragma once
+
+#include "ie_plugin_config.hpp"
+
+namespace InferenceEngine {
+
+namespace Metrics {
+
+/**
+ * @def GPU_METRIC_KEY(name)
+ * @brief shortcut for defining GPU plugin metrics
+ */
+#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
+#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
+
+/**
+ * @def DECLARE_GPU_METRIC_VALUE(name)
+ * @brief shortcut for defining gpu metric values
+ */
+#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
+
+/**
+ * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
+ */
+DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
+
+/**
+ * @brief Metric to get microarchitecture identifier in major.minor.revision format
+ */
+DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
+
+/**
+ * @brief Metric to get count of execution units for current GPU
+ */
+DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
+
+/**
+ * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
+ *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
+ */
+DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
+
+}  // namespace Metrics
+
+/**
+ * @brief GPU plugin configuration
+ */
+namespace GPUConfigParams {
+
+/**
+ * @brief shortcut for defining configuration keys
+ */
+#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name)
+#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name)
+#define DECLARE_GPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GPU_##name)
+
+/**
+ * @brief This key instructs the GPU plugin to use the OpenCL queue priority hint
+ * as defined in https://www.khronos.org/registry/OpenCL/specs/opencl-2.1-extensions.pdf
+ * this option should be used with an unsigned integer value (1 is lowest priority)
+ * 0 means no priority hint is set and default queue is created.
+ */
+DECLARE_GPU_CONFIG_KEY(PLUGIN_PRIORITY);
+
+/**
+ * @brief This key instructs the GPU plugin to use throttle hints the OpenCL queue throttle hint
+ * as defined in https://www.khronos.org/registry/OpenCL/specs/opencl-2.1-extensions.pdf,
+ * chapter 9.19. This option should be used with an unsigned integer value (1 is lowest energy consumption)
+ * 0 means no throttle hint is set and default queue created.
+ */
+DECLARE_GPU_CONFIG_KEY(PLUGIN_THROTTLE);
+
+/**
+ * @brief This key should be set to correctly handle NV12 input without pre-processing.
+ * Turned off by default.
+ */
+DECLARE_GPU_CONFIG_KEY(NV12_TWO_INPUTS);
+
+/**
+ * @brief This key sets the max number of host threads that can be used by GPU plugin on model loading.
+ * Default value is maximum number of threads available in the environment.
+ */
+DECLARE_GPU_CONFIG_KEY(MAX_NUM_THREADS);
+
+/**
+ * @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count.
+ * This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb).
+ * Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16).
+ * Note that turning this key on will increase the graph loading time in proportion to the iteration counts.
+ * Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
+DECLARE_GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING);
+
+}  // namespace GPUConfigParams
+
+namespace PluginConfigParams {
+
+/**
+ * @brief Optimize GPU plugin execution to maximize throughput.
+ *
+ * It is passed to Core::SetConfig(), this option should be used with values:
+ * - KEY_GPU_THROUGHPUT_AUTO creates bare minimum of streams that might improve performance in some cases,
+ *   this option allows to enable throttle hint for opencl queue thus reduce CPU load without significant performance
+ * drop
+ * - a positive integer value creates the requested number of streams
+ */
+DECLARE_CONFIG_VALUE(GPU_THROUGHPUT_AUTO);
+DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
+}  // namespace PluginConfigParams
+
+}  // namespace InferenceEngine
diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie_core.hpp
index e87f8c65719..96f8d6b58af 100644
--- a/inference-engine/include/ie_core.hpp
+++ b/inference-engine/include/ie_core.hpp
@@ -174,9 +174,18 @@ public:
      * operation*
      * @return An executable network reference
      */
-    ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName = {},
+    ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                     const std::map<std::string, std::string>& config = {});
 
+    /**
+     * @deprecated Use Core::ImportNetwork with explicit device name
+     * @brief Creates an executable network from a previously exported network
+     * @param networkModel network model stream
+     * @return An executable network reference
+     */
+    INFERENCE_ENGINE_DEPRECATED("Use Core::ImportNetwork with explicit device name")
+    ExecutableNetwork ImportNetwork(std::istream& networkModel);
+
     /**
      * @brief Creates an executable network from a previously exported network within a specified
      * remote context.
diff --git a/inference-engine/include/ie_version.hpp b/inference-engine/include/ie_version.hpp
index 13215d0b68d..10e649a09d3 100644
--- a/inference-engine/include/ie_version.hpp
+++ b/inference-engine/include/ie_version.hpp
@@ -20,8 +20,8 @@
  * @brief Defines Inference Engine patch version
  */
 
-#define IE_VERSION_MAJOR 2021
-#define IE_VERSION_MINOR 4
+#define IE_VERSION_MAJOR 2022
+#define IE_VERSION_MINOR 1
 #define IE_VERSION_PATCH 0
 
 #include "ie_api.h"
diff --git a/inference-engine/samples/benchmark_app/inputs_filling.cpp b/inference-engine/samples/benchmark_app/inputs_filling.cpp
index e12f7656f17..ef8a045279a 100644
--- a/inference-engine/samples/benchmark_app/inputs_filling.cpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp
@@ -39,6 +39,7 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
     return filtered;
 }
 
+template <typename T>
 void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const benchmark_app::InputInfo& app_info,
                    const size_t& requestId, const size_t& inputId, const size_t& inputSize) {
     MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
@@ -50,7 +51,7 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
     // locked memory holder should be alive all time while access to its buffer
     // happens
     auto minputHolder = minput->wmap();
-    auto inputBlobData = minputHolder.as<uint8_t*>();
+    auto inputBlobData = minputHolder.as<T*>();
 
     /** Collect images data ptrs **/
     std::vector<std::shared_ptr<uint8_t>> vreader;
@@ -90,7 +91,7 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
                     size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
                                                                                   ? (ch * width * height + h * width + w)
                                                                                   : (h * width * numChannels + w * numChannels + ch));
-                    inputBlobData[offset] = vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch];
+                    inputBlobData[offset] = static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]);
                 }
             }
         }
@@ -142,7 +143,7 @@ using uniformDistribution =
                               typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
 
 template <typename T, typename T2>
-void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<T>::min(), T rand_max = std::numeric_limits<T>::max()) {
+void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<uint8_t>::min(), T rand_max = std::numeric_limits<uint8_t>::max()) {
     MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
     if (!minput) {
         IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
@@ -270,7 +271,19 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
             if (app_info.isImage()) {
                 if (!imageFiles.empty()) {
                     // Fill with Images
-                    fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    if (precision == InferenceEngine::Precision::FP32) {
+                        fillBlobImage<float>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::FP16) {
+                        fillBlobImage<short>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::I32) {
+                        fillBlobImage<int32_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::I64) {
+                        fillBlobImage<int64_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::U8) {
+                        fillBlobImage<uint8_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else {
+                        IE_THROW() << "Input precision is not supported for " << item.first;
+                    }
                     continue;
                 }
             } else {
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 849dc05ad33..cd7ddc641dc 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -4,8 +4,8 @@
 
 #include <algorithm>
 #include <chrono>
-#include <cldnn/cldnn_config.hpp>
 #include <gna/gna_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include <inference_engine.hpp>
 #include <map>
 #include <memory>
@@ -282,7 +282,7 @@ int main(int argc, char* argv[]) {
                                << "which releases another CPU thread (that is otherwise "
                                   "used by the GPU driver for active polling)"
                                << slog::endl;
-                    device_config[CLDNN_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
+                    device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
                 }
             } else if (device == "MYRIAD") {
                 device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
diff --git a/inference-engine/samples/hello_query_device/README.md b/inference-engine/samples/hello_query_device/README.md
index a185147f8ec..059077c48ad 100644
--- a/inference-engine/samples/hello_query_device/README.md
+++ b/inference-engine/samples/hello_query_device/README.md
@@ -63,20 +63,20 @@ Available devices:
                 SUPPORTED_METRICS : [ AVAILABLE_DEVICES SUPPORTED_METRICS FULL_DEVICE_NAME OPTIMIZATION_CAPABILITIES SUPPORTED_CONFIG_KEYS RANGE_FOR_ASYNC_INFER_REQUESTS RANGE_FOR_STREAMS ]
                 FULL_DEVICE_NAME : Intel(R) UHD Graphics 620 (iGPU)
                 OPTIMIZATION_CAPABILITIES : [ FP32 BIN FP16 ]
-                SUPPORTED_CONFIG_KEYS : [ CACHE_DIR CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS CLDNN_GRAPH_DUMPS_DIR CLDNN_MAX_NUM_THREADS CLDNN_MEM_POOL CLDNN_NV12_TWO_INPUTS CLDNN_PLUGIN_PRIORITY CLDNN_PLUGIN_THROTTLE CLDNN_SOURCES_DUMPS_DIR CLDNN_ENABLE_LOOP_UNROLLING CONFIG_FILE DEVICE_ID DUMP_KERNELS DYN_BATCH_ENABLED EXCLUSIVE_ASYNC_REQUESTS GPU_THROUGHPUT_STREAMS PERF_COUNT TUNING_FILE TUNING_MODE ]
+                SUPPORTED_CONFIG_KEYS : [ CACHE_DIR CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS CLDNN_GRAPH_DUMPS_DIR GPU_MAX_NUM_THREADS CLDNN_MEM_POOL CLDNN_NV12_TWO_INPUTS CLDNN_PLUGIN_PRIORITY CLDNN_PLUGIN_THROTTLE CLDNN_SOURCES_DUMPS_DIR GPU_ENABLE_LOOP_UNROLLING CONFIG_FILE DEVICE_ID DUMP_KERNELS DYN_BATCH_ENABLED EXCLUSIVE_ASYNC_REQUESTS GPU_THROUGHPUT_STREAMS PERF_COUNT TUNING_FILE TUNING_MODE ]
                 RANGE_FOR_ASYNC_INFER_REQUESTS : { 1, 2, 1 }
                 RANGE_FOR_STREAMS : { 1, 2 }
         Default values for device configuration keys:
                 CACHE_DIR : ""
                 CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS : YES
                 CLDNN_GRAPH_DUMPS_DIR : ""
-                CLDNN_MAX_NUM_THREADS : 8
                 CLDNN_MEM_POOL : YES
                 CLDNN_NV12_TWO_INPUTS : NO
                 CLDNN_PLUGIN_PRIORITY : 0
                 CLDNN_PLUGIN_THROTTLE : 0
                 CLDNN_SOURCES_DUMPS_DIR : ""
-                CLDNN_ENABLE_LOOP_UNROLLING : YES
+                GPU_MAX_NUM_THREADS : 8
+                GPU_ENABLE_LOOP_UNROLLING : YES
                 CONFIG_FILE : ""
                 DEVICE_ID : ""
                 DUMP_KERNELS : NO
diff --git a/inference-engine/samples/ngraph_function_creation_sample/README.md b/inference-engine/samples/ngraph_function_creation_sample/README.md
index 1410241c3a5..9f7b4f8d433 100644
--- a/inference-engine/samples/ngraph_function_creation_sample/README.md
+++ b/inference-engine/samples/ngraph_function_creation_sample/README.md
@@ -1,6 +1,6 @@
 # nGraph Function Creation C++ Sample {#openvino_inference_engine_samples_ngraph_function_creation_sample_README}
 
-This sample demonstrates how to execute an synchronous inference using [nGraph function feature](../../../docs/nGraph_DG/build_function.md) to create a network, which uses weights from LeNet classification network.
+This sample demonstrates how to execute an synchronous inference using [nGraph function feature](../../../docs/nGraph_DG/build_function.md) to create a network, which uses weights from LeNet classification network, which is known to work well on digit classification tasks.
 
 The sample supports only single-channel `ubyte` images as an input.
 
diff --git a/inference-engine/samples/speech_sample/fileutils.cpp b/inference-engine/samples/speech_sample/fileutils.cpp
index f3211a21a4b..102cca25297 100644
--- a/inference-engine/samples/speech_sample/fileutils.cpp
+++ b/inference-engine/samples/speech_sample/fileutils.cpp
@@ -108,15 +108,18 @@ void NumpyFile::GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, u
     cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
     auto it = my_npz1.begin();
     std::advance(it, numArrayToFindSize);
+    if (it != my_npz1.end()) {
+        numArrays = my_npz1.size();
+        cnpy::NpyArray my_npy = it->second;
+        numMemoryBytes = my_npy.data_holder->size();
 
-    numArrays = my_npz1.size();
-    cnpy::NpyArray my_npy = it->second;
-    numMemoryBytes = my_npy.data_holder->size();
-
-    if (ptrNumArrays != NULL)
-        *ptrNumArrays = numArrays;
-    if (ptrNumMemoryBytes != NULL)
-        *ptrNumMemoryBytes = numMemoryBytes;
+        if (ptrNumArrays != NULL)
+            *ptrNumArrays = numArrays;
+        if (ptrNumMemoryBytes != NULL)
+            *ptrNumMemoryBytes = numMemoryBytes;
+    } else {
+        throw std::runtime_error(std::string("Failed to get info %s  GetFileInfo()!\n") + fileName);
+    }
 }
 
 void NumpyFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
@@ -124,16 +127,20 @@ void NumpyFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string&
     cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
     auto it = my_npz1.begin();
     std::advance(it, arrayIndex);
-    ptrName = it->first;
-    cnpy::NpyArray my_npy = it->second;
-    *ptrNumRows = my_npy.shape[0];
-    *ptrNumColumns = my_npy.shape[1];
+    if (it != my_npz1.end()) {
+        ptrName = it->first;
+        cnpy::NpyArray my_npy = it->second;
+        *ptrNumRows = my_npy.shape[0];
+        *ptrNumColumns = my_npy.shape[1];
 
-    for (size_t i = 0; i < my_npy.data_holder->size(); i++) {
-        memory.at(i) = my_npy.data_holder->at(i);
+        for (size_t i = 0; i < my_npy.data_holder->size(); i++) {
+            memory.at(i) = my_npy.data_holder->at(i);
+        }
+
+        *ptrNumBytesPerElement = sizeof(float);
+    } else {
+        throw std::runtime_error(std::string("Failed to open %s for reading in LoadFile()!\n") + fileName);
     }
-
-    *ptrNumBytesPerElement = sizeof(float);
 }
 
 void NumpyFile::SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) {
diff --git a/inference-engine/src/auto_plugin/auto_exec_network.cpp b/inference-engine/src/auto_plugin/auto_exec_network.cpp
index 353196a88d4..49b0963c04d 100644
--- a/inference-engine/src/auto_plugin/auto_exec_network.cpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.cpp
@@ -3,10 +3,8 @@
 //
 
 #include <string>
-#include <vector>
 #include <memory>
 #include <map>
-#include <unordered_map>
 
 #include "ie_metric_helpers.hpp"
 #include "auto_exec_network.hpp"
@@ -15,8 +13,8 @@
 namespace AutoPlugin {
 using namespace InferenceEngine;
 
-AutoExecutableNetwork::AutoExecutableNetwork(const SoExecutableNetworkInternal& network) :
-    _network(network) {
+AutoExecutableNetwork::AutoExecutableNetwork(const SoExecutableNetworkInternal& network, bool enablePerfCount) :
+    _network(network), _enablePerfCount(enablePerfCount) {
 }
 
 AutoExecutableNetwork::~AutoExecutableNetwork() = default;
@@ -24,7 +22,7 @@ AutoExecutableNetwork::~AutoExecutableNetwork() = default;
 InferenceEngine::IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
                                                                                           OutputsDataMap networkOutputs) {
     SoIInferRequestInternal inferRequest = {_network, _network->CreateInferRequest()};
-    return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest);
+    return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest, _enablePerfCount);
 }
 
 void AutoExecutableNetwork::Export(std::ostream& networkModel) {
diff --git a/inference-engine/src/auto_plugin/auto_exec_network.hpp b/inference-engine/src/auto_plugin/auto_exec_network.hpp
index a39478b19a7..e29970711eb 100644
--- a/inference-engine/src/auto_plugin/auto_exec_network.hpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.hpp
@@ -19,16 +19,11 @@ namespace AutoPlugin {
 
 using DeviceName = std::string;
 
-struct DeviceInformation {
-    DeviceName deviceName;
-    std::map<std::string, std::string> config;
-};
-
 class AutoExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
 public:
     using Ptr = std::shared_ptr<AutoExecutableNetwork>;
 
-    explicit AutoExecutableNetwork(const InferenceEngine::SoExecutableNetworkInternal& network);
+    explicit AutoExecutableNetwork(const InferenceEngine::SoExecutableNetworkInternal& network, bool enablePerfCount);
 
     void Export(std::ostream& networkModel) override;
     InferenceEngine::RemoteContext::Ptr GetContext() const override;
@@ -43,6 +38,7 @@ public:
 
 private:
     InferenceEngine::SoExecutableNetworkInternal _network;
+    bool _enablePerfCount;
 };
 
 }  // namespace AutoPlugin
diff --git a/inference-engine/src/auto_plugin/auto_infer_request.cpp b/inference-engine/src/auto_plugin/auto_infer_request.cpp
index f0777409830..46d60318715 100644
--- a/inference-engine/src/auto_plugin/auto_infer_request.cpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.cpp
@@ -11,13 +11,23 @@ namespace AutoPlugin {
 
 AutoInferRequest::AutoInferRequest(const InputsDataMap&              networkInputs,
                                    const OutputsDataMap&             networkOutputs,
-                                   const SoIInferRequestInternal&    inferRequest)
+                                   const SoIInferRequestInternal&    inferRequest,
+                                   bool                              enablePerfCount)
     : IInferRequestInternal(networkInputs, networkOutputs)
-    , _inferRequest(inferRequest) {
+    , _inferRequest(inferRequest)
+    , _enablePerfCount(enablePerfCount) {
 }
 
 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
-    return _inferRequest->GetPerformanceCounts();
+    if (_enablePerfCount) {
+        try {
+            return _inferRequest->GetPerformanceCounts();
+        } catch (...) {
+            return {};
+        }
+    } else {
+        return {};
+    }
 }
 
 void AutoInferRequest::InferImpl() {
diff --git a/inference-engine/src/auto_plugin/auto_infer_request.hpp b/inference-engine/src/auto_plugin/auto_infer_request.hpp
index 1ccaf0093b2..c97b2fa5aed 100644
--- a/inference-engine/src/auto_plugin/auto_infer_request.hpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.hpp
@@ -24,7 +24,8 @@ public:
     using Ptr = std::shared_ptr<AutoInferRequest>;
     explicit AutoInferRequest(const InferenceEngine::InputsDataMap&             networkInputs,
                               const InferenceEngine::OutputsDataMap&            networkOutputs,
-                              const InferenceEngine::SoIInferRequestInternal&   inferRequest);
+                              const InferenceEngine::SoIInferRequestInternal&   inferRequest,
+                              bool                                              enablePerfCount);
     std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
     void InferImpl() override;
     void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
@@ -37,6 +38,7 @@ public:
 
 private:
     InferenceEngine::SoIInferRequestInternal _inferRequest;
+    bool                                     _enablePerfCount;
 };
 
 }  // namespace AutoPlugin
diff --git a/inference-engine/src/auto_plugin/auto_plugin.cpp b/inference-engine/src/auto_plugin/auto_plugin.cpp
index 1fc20063575..274fa9d224f 100644
--- a/inference-engine/src/auto_plugin/auto_plugin.cpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.cpp
@@ -75,11 +75,11 @@ IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& n
     }
 
     auto fullConfig = mergeConfigs(_config, config);
-    auto metaDevices = GetDeviceChoice(fullConfig);
+    auto metaDevices = GetDeviceList(fullConfig);
     std::unordered_set<std::string> supportedLayers;
     for (auto&& value : metaDevices) {
         try {
-            auto deviceQr = GetCore()->QueryNetwork(network, value.deviceName, value.config);
+            auto deviceQr = GetCore()->QueryNetwork(network, value, {});
             std::unordered_set<std::string> deviceSupportedLayers;
             for (auto &&layerQr : deviceQr.supportedLayersMap) {
                 deviceSupportedLayers.emplace(layerQr.first);
@@ -111,7 +111,19 @@ IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
 
 void AutoInferencePlugin::SetConfig(const ConfigType& config) {
     for (auto && kvp : config) {
-        _config[kvp.first] = kvp.second;
+        if (kvp.first.find("AUTO_") == 0) {
+            _config[kvp.first] = kvp.second;
+        } else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
+            if (kvp.second == IE::PluginConfigParams::YES ||
+                kvp.second == IE::PluginConfigParams::NO) {
+                _config[kvp.first] = kvp.second;
+            } else {
+                IE_THROW() << "Unsupported config value: " << kvp.second
+                           << " for key: " << kvp.first;
+            }
+        } else {
+            IE_THROW() << "Unsupported config key: " << kvp.first;
+        }
     }
 }
 
@@ -128,7 +140,10 @@ IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
         std::string device_name = {"Inference Engine AUTO device"};
         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
-        std::vector<std::string> configKeys;
+        std::vector<std::string> configKeys = {
+            IE::KEY_AUTO_DEVICE_LIST,
+            IE::PluginConfigParams::KEY_PERF_COUNT
+        };
         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
         std::vector<std::string> capabilities = GetOptimizationCapabilities(options);
@@ -139,42 +154,21 @@ IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
 }
 
 //////////////////////////////////// private & protected functions ///////////////////
-std::vector<AutoPlugin::DeviceInformation> AutoInferencePlugin::GetDeviceChoice(const ConfigType&  config) const {
-    std::vector<DeviceInformation> metaDevices;
-    std::vector<std::string> availableDevices;
+std::vector<DeviceName> AutoInferencePlugin::GetDeviceList(const ConfigType& config) const {
+    std::vector<DeviceName> deviceList;
 
     auto deviceListConfig = config.find(IE::KEY_AUTO_DEVICE_LIST);
     if (deviceListConfig == config.end()) {
-        availableDevices = GetCore()->GetAvailableDevices();
+        deviceList = GetCore()->GetAvailableDevices();
     } else {
-        availableDevices = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
+        deviceList = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
     }
 
-    auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
-        IE::DeviceIDParser deviceParser(deviceWithID);
-        std::string deviceName = deviceParser.getDeviceName();
-        ConfigType tconfig = config;
-
-        // set device ID if any
-        std::string deviceIDLocal = deviceParser.getDeviceID();
-        if (!deviceIDLocal.empty()) {
-            tconfig[IE::PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
-        }
-
-        return GetSupportedConfig(tconfig, deviceName);
-    };
-
-    for (auto && d : availableDevices) {
-        if (d != _pluginName) {
-            metaDevices.push_back({ d, getDeviceConfig(d)});
-        }
-    }
-
-    if (metaDevices.empty()) {
+    if (deviceList.empty()) {
         IE_THROW() << "Please, check environment due to no supported devices can be used";
     }
 
-    return metaDevices;
+    return deviceList;
 }
 
 std::vector<std::string> AutoInferencePlugin::GetOptimizationCapabilities(const std::map<std::string, IE::Parameter> & options) const {
@@ -215,7 +209,21 @@ ConfigType AutoInferencePlugin::GetSupportedConfig(const ConfigType&  config,
     return supportedConfig;
 }
 
-DeviceInformation AutoInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
+void AutoInferencePlugin::CheckConfig(const ConfigType& config) {
+    std::vector<std::string> supportedConfigKeys = GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
+    for (auto&& c : config) {
+        auto itKey = std::find(supportedConfigKeys.begin(), supportedConfigKeys.end(), c.first);
+        if (supportedConfigKeys.end() == itKey) {
+            // CVS-57233
+            if (c.first.find("AUTO_") == 0) {
+                continue;
+            }
+            IE_THROW() << "AUTO plugin doesn't support config key " << c.first;
+        }
+    }
+}
+
+DeviceName AutoInferencePlugin::SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision) {
     if (metaDevices.empty()) {
         IE_THROW(NotFound) << "No available device to select in AUTO plugin";
     }
@@ -223,15 +231,15 @@ DeviceInformation AutoInferencePlugin::SelectDevice(const std::vector<DeviceInfo
         return metaDevices.at(0);
     }
 
-    std::vector<DeviceInformation> CPU;
-    std::vector<DeviceInformation> GPU;
+    std::vector<DeviceName> CPU;
+    std::vector<DeviceName> GPU;
 
     for (auto& item : metaDevices) {
-        if (item.deviceName.find("CPU") == 0) {
+        if (item.find("CPU") == 0) {
             CPU.push_back(item);
             continue;
         }
-        if (item.deviceName.find("GPU") == 0) {
+        if (item.find("GPU") == 0) {
             GPU.push_back(item);
             continue;
         }
@@ -242,10 +250,10 @@ DeviceInformation AutoInferencePlugin::SelectDevice(const std::vector<DeviceInfo
     }
 
     // Sort GPU by name: GPU.2 > GPU.1 > GPU.0 > GPU, so we always choose the GPU[0] as best device
-    std::sort(GPU.begin(), GPU.end(), [](const DeviceInformation& a, const DeviceInformation& b)->bool{return b.deviceName < a.deviceName;});
+    std::sort(GPU.begin(), GPU.end(), [](const DeviceName& a, const DeviceName& b)->bool{return b < a;});
 
     for (auto&& item : GPU) {
-        std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+        std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
         auto res = std::find(capability.begin(), capability.end(), networkPrecision);
         if (res != capability.end()) {
             return item;
diff --git a/inference-engine/src/auto_plugin/auto_plugin.hpp b/inference-engine/src/auto_plugin/auto_plugin.hpp
index af42e9f0ef7..858ee2143fd 100644
--- a/inference-engine/src/auto_plugin/auto_plugin.hpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.hpp
@@ -30,10 +30,11 @@ public:
     void SetConfig(const ConfigType& config) override;
 
 private:
-    std::vector<AutoPlugin::DeviceInformation> GetDeviceChoice(const ConfigType&  config) const;
+    std::vector<DeviceName> GetDeviceList(const ConfigType&  config) const;
     std::vector<std::string> GetOptimizationCapabilities(const std::map<std::string, IE::Parameter>& options) const;
-    DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
-    ConfigType GetSupportedConfig(const ConfigType& config, const AutoPlugin::DeviceName & deviceName) const;
+    DeviceName SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
+    ConfigType GetSupportedConfig(const ConfigType& config, const DeviceName & deviceName) const;
+    void CheckConfig(const ConfigType& config);
     static ConfigType mergeConfigs(ConfigType config, const ConfigType& local);
 
     template <typename T>
@@ -41,18 +42,21 @@ private:
         if (GetCore() == nullptr) {
             IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
         }
+
+        CheckConfig(config);
+
         auto fullConfig = mergeConfigs(_config, config);
-        auto metaDevices = GetDeviceChoice(fullConfig);
-        DeviceInformation selectedDevice;
+        auto metaDevices = GetDeviceList(fullConfig);
+        DeviceName selectedDevice;
         IE::SoExecutableNetworkInternal executableNetwork;
         while (!metaDevices.empty()) {
             selectedDevice = SelectDevice(metaDevices, networkPrecision);
             try {
-                executableNetwork = GetCore()->LoadNetwork(param, selectedDevice.deviceName, selectedDevice.config);
+                executableNetwork = GetCore()->LoadNetwork(param, selectedDevice, {});
                 break;
             } catch (...) {
                 auto eraseDevice = std::find_if(metaDevices.begin(), metaDevices.end(),
-                    [=](const DeviceInformation& d)->bool{return d.deviceName == selectedDevice.deviceName;});
+                    [=](const DeviceName& d)->bool{return d == selectedDevice;});
                 if (eraseDevice == metaDevices.end()) {
                     IE_THROW() << "Didn't find the selected device name";
                 }
@@ -63,7 +67,10 @@ private:
         if (!executableNetwork) {
             IE_THROW() << "Failed to load network by AUTO plugin";
         }
-        auto impl = std::make_shared<AutoExecutableNetwork>(executableNetwork);
+
+        bool enablePerfCount = fullConfig.find(IE::PluginConfigParams::KEY_PERF_COUNT) != fullConfig.end();
+
+        auto impl = std::make_shared<AutoExecutableNetwork>(executableNetwork, enablePerfCount);
 
         if (std::is_same<std::string, T>::value) {
             SetExeNetworkInfo(impl, executableNetwork->GetInputsInfo(),
diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp
index ff5d9693522..3de19bdff87 100644
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@@ -5,6 +5,7 @@
 #include <sys/stat.h>
 
 #include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include "cldnn_config.h"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "ie_api.h"
@@ -39,6 +40,7 @@ static void createDirectory(std::string _path) {
     }
 }
 
+IE_SUPPRESS_DEPRECATED_START
 void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap");
     for (auto& kvp : configMap) {
@@ -69,7 +71,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
             } else {
                 IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY) == 0 ||
+                   key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
             std::stringstream ss(val);
             uint32_t uVal(0);
             ss >> uVal;
@@ -93,7 +96,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
                     IE_THROW(ParameterMismatch) << "Unsupported queue priority value: " << uVal;
             }
 
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) == 0 ||
+                   key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
             std::stringstream ss(val);
             uint32_t uVal(0);
             ss >> uVal;
@@ -205,7 +209,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
             } else {
                 IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS) == 0 ||
+                   key.compare(CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS) == 0) {
             if (val.compare(PluginConfigParams::YES) == 0) {
                 nv12_two_inputs = true;
             } else if (val.compare(PluginConfigParams::NO) == 0) {
@@ -221,7 +226,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
             } else {
                 IE_THROW(NotFound) << "Unsupported KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS flag value: " << val;
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_MAX_NUM_THREADS) == 0) {
             int max_threads = std::max(1, static_cast<int>(std::thread::hardware_concurrency()));
             try {
                 int val_i = std::stoi(val);
@@ -231,17 +236,17 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
                     n_threads = val_i;
                 }
             } catch (const std::exception&) {
-                IE_THROW() << "Wrong value for property key " << CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS << ": " << val
+                IE_THROW() << "Wrong value for property key " << GPUConfigParams::KEY_GPU_MAX_NUM_THREADS << ": " << val
                                    << "\nSpecify the number of threads use for build as an integer."
                                    << "\nOut of range value will be set as a default value, maximum concurrent threads.";
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING) == 0) {
             if (val.compare(PluginConfigParams::YES) == 0) {
                 enable_loop_unrolling = true;
             } else if (val.compare(PluginConfigParams::NO) == 0) {
                 enable_loop_unrolling = false;
             } else {
-                IE_THROW(ParameterMismatch) << "Unsupported KEY_CLDNN_ENABLE_LOOP_UNROLLING flag value: " << val;
+                IE_THROW(ParameterMismatch) << "Unsupported KEY_GPU_ENABLE_LOOP_UNROLLING flag value: " << val;
             }
         } else {
             IE_THROW(NotFound) << "Unsupported property key by plugin: " << key;
@@ -297,6 +302,7 @@ void Config::adjustKeyMapValues() {
         default: break;
         }
         key_config_map[CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY] = qp;
+        key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY] = qp;
     }
     {
         std::string qt = "0";
@@ -307,6 +313,7 @@ void Config::adjustKeyMapValues() {
         default: break;
         }
         key_config_map[CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE] = qt;
+        key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE] = qt;
     }
     {
         std::string tm = PluginConfigParams::TUNING_DISABLED;
@@ -328,11 +335,13 @@ void Config::adjustKeyMapValues() {
     key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
     key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
     key_config_map[PluginConfigParams::KEY_CONFIG_FILE] = "";
-    key_config_map[CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS] = std::to_string(n_threads);
+    key_config_map[GPUConfigParams::KEY_GPU_MAX_NUM_THREADS] = std::to_string(n_threads);
 
     if (enable_loop_unrolling)
-        key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING] = PluginConfigParams::YES;
+        key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::YES;
     else
-        key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING] = PluginConfigParams::NO;
+        key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::NO;
 }
+IE_SUPPRESS_DEPRECATED_END
+
 }  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 86b9f2e4b95..171919a8077 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -79,7 +79,7 @@
 #include "cldnn_executable_network.h"
 #include "cldnn_custom_layer.h"
 #include "cldnn_itt.h"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 #ifdef __linux__
 # include <dlfcn.h>
diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
index c2289fa9fb0..5191da35c2e 100644
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@@ -16,7 +16,6 @@
 #include "cldnn_itt.h"
 
 #include <description_buffer.hpp>
-#include <cldnn/cldnn_config.hpp>
 #include "cldnn_infer_request.h"
 #include <threading/ie_executor_manager.hpp>
 #include "cldnn_async_infer_request.h"
diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
index 04d40c9815d..1f835d8ac2c 100644
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@@ -16,7 +16,6 @@
 #include "cldnn_graph.h"
 #include "simple_math.h"
 #include <description_buffer.hpp>
-#include <cldnn/cldnn_config.hpp>
 #include "cldnn_infer_request.h"
 #include <threading/ie_executor_manager.hpp>
 #include <fstream>
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
index 9b0eccaea59..3a283cae895 100644
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@@ -10,13 +10,18 @@
 namespace GNAPluginNS {
 namespace GNALimitations {
 
+constexpr uint32_t bufferMaxSize = 65528;
+
 constexpr uint32_t convMinFiltersNum = 4;
 constexpr uint32_t convMaxFiltersNum = 65532;
 constexpr uint32_t convFiltersNumDivider = 4;
+constexpr uint32_t convFilterMaxSize = 768;
 constexpr uint32_t convEachKernelByteAlignment = 16;
 constexpr uint32_t noOfInputsDivisor = 8;
 constexpr uint32_t noOfInputsLowPrecDivisor = 16;
 
+constexpr uint32_t affineMaxBatchSize = 8;
+
 namespace Cnn2D {
 struct RangeLimit {
     uint32_t min;
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 3c1fdaac0e7..11f13a7a9ac 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -370,14 +370,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
             auto minOutValue = quantizedParams->_dst_quant.GetMinValues().front();
             auto maxOutValue = quantizedParams->_dst_quant.GetMaxValues().front();
             auto absMax = std::max(std::abs(minOutValue), std::abs(maxOutValue));
-            auto absMin = std::min(std::abs(minOutValue), std::abs(maxOutValue));
 
             result = (quantizedParams->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
-            if (0 && fp32eq(absMin, 0.0f) && !fp32eq(absMax, 0.0f)) {
-                result = (quantizedParams->_dst_quant.GetLevels() - 1) / (2 * absMax);
-            }
-            //
-            //result = MAX_VAL_2B_FEAT / absMax;
             if (std::isinf(result) || fp32eq(absMax, 0.0f)) {
                 result = max_activation_scale_factor;
             }
@@ -401,6 +395,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
                 (layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) {
                 auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
                 if (!fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
+                    prevLayerQuant->_src_quant.IsStatsSet() &&
                     (prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) {
                     result = prevLayerQuant->_src_quant.GetScale();
                     usePrevScaleFactor = true;
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 23685b4734f..bf44e437af0 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -158,25 +158,27 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
                 THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer";
             }
 
-            auto dataOutput = outFunctionalLayer.first->insData[outFunctionalLayer.second].lock();
+            for (int idx : outFunctionalLayer.second) {
+                auto dataOutput = outFunctionalLayer.first->insData[idx].lock();
 
-            padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize())
-                                                        * dataOutput->getPrecision().size();
-            output_layer_size =
-                    InferenceEngine::details::product(begin(dataOutput->getDims()),
-                                                     end(dataOutput->getDims())) * dataOutput->getPrecision().size();
+                padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize())
+                                                            * dataOutput->getPrecision().size();
+                output_layer_size =
+                        InferenceEngine::details::product(begin(dataOutput->getDims()),
+                                                        end(dataOutput->getDims())) * dataOutput->getPrecision().size();
 
-            if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
-                size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
-                layerInfoItem.splitOutputLayers.emplace_back(
-                    outFunctionalLayer.first,
-                    outFunctionalLayer.second,
-                    aligned64_offset * dataOutput->getPrecision().size(),
-                    output_layer_size);
-            } else {
-                layerInfoItem.splitOutputLayers.emplace_back(
-                    outFunctionalLayer.first, outFunctionalLayer.second, split_size, output_layer_size);
-            }
+                if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
+                    size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
+                    layerInfoItem.splitOutputLayers.emplace_back(
+                        outFunctionalLayer.first,
+                        idx,
+                        aligned64_offset * dataOutput->getPrecision().size(),
+                        output_layer_size);
+                } else {
+                    layerInfoItem.splitOutputLayers.emplace_back(
+                        outFunctionalLayer.first, idx, split_size, output_layer_size);
+                }
+             }
         }
 
         // in case of unconnected split - we need properly increment size
diff --git a/inference-engine/src/gna_plugin/gna_graph_tools.hpp b/inference-engine/src/gna_plugin/gna_graph_tools.hpp
index e9cf70790ac..51701268209 100644
--- a/inference-engine/src/gna_plugin/gna_graph_tools.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_tools.hpp
@@ -155,14 +155,14 @@ inline InferenceEngine::CNNLayerPtr  CNNNetPrevLayerSkipCertain(Layer layer, int
  */
 
 template <class Layer>
-inline std::pair<InferenceEngine::CNNLayerPtr, int>  CNNNetCheckNextLayerSkipCertain(Layer layer, int oidx, int iidx, bool bOnlyCheck,
+inline std::pair<InferenceEngine::CNNLayerPtr, std::vector<int>>  CNNNetCheckNextLayerSkipCertain(Layer layer, int oidx, int iidx, bool bOnlyCheck,
                                                                 const std::function<bool(CNNLayerPtr)> &shouldSkip) {
     if (oidx >= layer->outData.size()) {
-        if (bOnlyCheck) return {nullptr, 0};
+        if (bOnlyCheck) return {nullptr, {}};
         THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx;
     }
     if (getInputTo(layer->outData[oidx]).empty() || iidx >= getInputTo(layer->outData[oidx]).size()) {
-        if (bOnlyCheck) return {nullptr, 0};
+        if (bOnlyCheck) return {nullptr, {}};
         THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx << " and inputTo index: " << iidx;
     }
 
@@ -174,12 +174,12 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int>  CNNNetCheckNextLayerSkipCer
 
     while (shouldSkip(outLayer->second)) {
         if (outLayer->second->outData.size() <= new_oidx) {
-            if (bOnlyCheck) return { nullptr, 0 };
+            if (bOnlyCheck) return { nullptr, {} };
             THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx;
         }
 
         if (getInputTo(outLayer->second->outData[new_oidx]).size() <= new_iidx) {
-            if (bOnlyCheck) return { nullptr, 0 };
+            if (bOnlyCheck) return { nullptr, {} };
             THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx << " and inputTo index: " << new_iidx;
         }
 
@@ -188,11 +188,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int>  CNNNetCheckNextLayerSkipCer
     }
 
     auto insDataIdx = CNNLayerFindInsDataIdxes(layer->outData[new_oidx], outLayer->second);
-    if (insDataIdx.size() != 1) {
-        if (bOnlyCheck) return { nullptr, 0 };
-        THROW_GNA_LAYER_EXCEPTION(layer) << " has multiple connection to " << new_oidx << " outData";
-    }
-    return { outLayer->second, insDataIdx.front() };
+    return { outLayer->second, insDataIdx };
 }
 
 /**
@@ -256,7 +252,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int>  CNNNetCheckNextLayerSkipCer
 
 /// @brief alias for strict checkNextLayer (false)
 template <class Layer>
-inline std::pair<InferenceEngine::CNNLayerPtr, int>  CNNNetGetNextLayerSkipCertain(Layer layer, int oidx, int iidx,
+inline std::pair<InferenceEngine::CNNLayerPtr, std::vector<int>>  CNNNetGetNextLayerSkipCertain(Layer layer, int oidx, int iidx,
                                                                                const std::function<bool(CNNLayerPtr)> &shouldSkip) {
     return CNNNetCheckNextLayerSkipCertain(layer, oidx, iidx, false, shouldSkip);
 }
diff --git a/inference-engine/src/gna_plugin/gna_groups.hpp b/inference-engine/src/gna_plugin/gna_groups.hpp
index 21abe5d0124..2449338821c 100644
--- a/inference-engine/src/gna_plugin/gna_groups.hpp
+++ b/inference-engine/src/gna_plugin/gna_groups.hpp
@@ -46,14 +46,10 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
  * @param layer
  */
 inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
-    if (GNAPluginNS::LayerInfo(layer).isPower())
+    if (GNAPluginNS::LayerInfo(layer).isPower() || GNAPluginNS::LayerInfo(layer).isCopy())
         return true;
 
-    if (!GNAPluginNS::LayerInfo(layer).isScaleShift())
-        return false;
-
-    // Don't reshape user-defined ScaleShift layers
-    if (layer->name.rfind("SyntheticScaleShift", 0) == std::string::npos)
+    if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
         return false;
 
     // Don't reshape the first dnn layer since it breaks groups recognition
@@ -61,8 +57,7 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
         return LayerInfo(ptr).isNonValuesChangable();
     });
     IE_ASSERT(prevLayer != nullptr);
-    if (LayerInfo(prevLayer).isInput())
-        return false;
+    if (LayerInfo(prevLayer).isInput()) return false;
 
     // Don't reshape diagonallayers with bias connection
     return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp
index fdb99d7f273..e32ded8a9e3 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -17,6 +17,7 @@
 #include <mm_malloc.h>
 #include <serial/headers/2dot2/gna_model_header.hpp>
 #include <serial/headers/2dot5/gna_model_header.hpp>
+#include <serial/headers/2dot6/gna_model_header.hpp>
 
 #endif
 
@@ -133,10 +134,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
                 }
                 case 5:
                 case 6:
+                case 7:
                     readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
                     break;
                 default:
-                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 4 and is: " << header.version.minor;
+                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << header.version.minor;
             }
             break;
         default:
@@ -154,6 +156,40 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
     return header;
 }
 
+GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
+    is.exceptions(std::istream::failbit);
+
+    HeaderLatest::RuntimeEndPoint endPoint;
+    switch (modelHeader.version.major) {
+        case 2:
+            switch (modelHeader.version.minor) {
+                case 1:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                {
+                    Header2dot6::RuntimeEndPoint tempEndPoint2dot6;
+                    readBits(tempEndPoint2dot6, is);
+                    endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, modelHeader.nGroup);
+                    break;
+                }
+                case 7:
+                    readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is);
+                    break;
+                default:
+                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << modelHeader.version.minor;
+            }
+            break;
+        default:
+            THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: "
+            << modelHeader.version.major << " is not implemented";
+    }
+
+    return endPoint;
+}
+
 #define offsetFromBase(field)\
 getOffsetFromBase(field, #field)
 
@@ -324,18 +360,6 @@ void GNAModelSerial::Import(void *basePointer,
     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 }
 
-
-uint32_t guessGrouping(Gna2Model const& model) {
-    if (model.NumberOfOperations == 0 ||
-        model.Operations == nullptr ||
-        model.Operations[0].Operands == nullptr ||
-        model.Operations[0].NumberOfOperands == 0 ||
-        model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
-        THROW_GNA_EXCEPTION << "Can not guess grouping";
-    }
-    return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
-}
-
 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
     os.exceptions(std::ostream::failbit);
 
@@ -366,6 +390,9 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
         out.scaleFactor = ep.scaleFactor;
         out.element_size = ep.element_size;
+        out.shape = ep.shape;
+        out.layout = ep.layout;
+        out.precision = ep.precision;
         out.orientation = ep.orientation;
         return out;
     };
@@ -381,7 +408,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     header.headerSize = sizeof(HeaderLatest::ModelHeader);
     header.gnaMemSize = gnaGraphSize;
     header.layersCount = layers.size();
-    header.nGroup = guessGrouping(*gna2Model);
+    header.nGroup = 1; // just to support the old models
     header.nInputs = inputs.size();
     header.nOutputs = outputs.size();
     header.nTransposeInputs = transposeInputsInfo.size();
@@ -796,13 +823,22 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
     std::size_t outputIndex = 0;
     for (auto const &output : outputsDataMap) {
         auto outputName = output.first;
-        auto inputDims = output.second->getTensorDesc().getDims();
-        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
-
+        auto outputDims = output.second->getTensorDesc().getDims();
+        HeaderLatest::RuntimeEndPoint::Shape outputShape;
+        outputShape.NumberOfDimensions = outputDims.size();
+        for (size_t i=0; i < outputShape.NumberOfDimensions; ++i) {
+            outputShape.Dimensions[i] = static_cast<uint32_t>(outputDims[i]);
+        }
+        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
+        InferenceEngine::Layout outputLayout = output.second->getLayout();
+        InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
         HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
                                                  outputsDesc[outputIndex].ptrs[0],
                                                  outputsDesc[outputIndex].num_bytes_per_element,
                                                  elementsCount,
+                                                 outputShape,
+                                                 outputLayout,
+                                                 outputPrecision,
                                                  outputsDesc[outputIndex].orientation);
         endPoints.push_back(endPoint);
         outputIndex++;
@@ -818,18 +854,26 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
     for (auto const& input : inputsDataMap) {
         auto inputName = input.first;
         auto inputDims = input.second->getTensorDesc().getDims();
-
+        HeaderLatest::RuntimeEndPoint::Shape inputShape;
+        inputShape.NumberOfDimensions = inputDims.size();
+        for (size_t i=0; i < inputShape.NumberOfDimensions; ++i) {
+            inputShape.Dimensions[i] = static_cast<uint32_t>(inputDims[i]);
+        }
         double scaleFactor = inputDesc->getScaleFactor(inputIndex);
         std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
         IE_ASSERT(descriptor_ptr.size() > 0);
         uint32_t element_size = 2u;
         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
         intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
-
+        InferenceEngine::Layout inputLayout = input.second->getLayout();
+        InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
         HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
                                                  descriptor_ptr[0],
                                                  element_size,
                                                  elementsCount,
+                                                 inputShape,
+                                                 inputLayout,
+                                                 inputPrecision,
                                                  orientation);
         endPoints.push_back(endPoint);
         inputIndex++;
@@ -846,20 +890,24 @@ void GNAModelSerial::ImportInputs(std::istream &is,
     for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                 ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
-        HeaderLatest::RuntimeEndPoint input;
-        is.read(reinterpret_cast<char *>(&input), sizeof(input));
+
+        HeaderLatest::RuntimeEndPoint input = ReadEndPoint(is);
         inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
         inputsDesc->orientation_in[name] = input.orientation;
         inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
 
-        auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
-
+        auto inputDims = InferenceEngine::SizeVector();
+        for (auto i = 0; i < input.shape.NumberOfDimensions; ++i) {
+            inputDims.push_back(input.shape.Dimensions[i]);
+        }
+        InferenceEngine::Layout inputLayout = static_cast<InferenceEngine::Layout>(input.layout);
+        InferenceEngine::Precision inputPresicion = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(input.precision));
         dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
         dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
                                                             InferenceEngine::TensorDesc(
-                                                                    InferenceEngine::Precision::FP32,
+                                                                    inputPresicion,
                                                                     inputDims,
-                                                                    InferenceEngine::Layout::NC)));
+                                                                    inputLayout)));
         inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
     }
 }
@@ -875,8 +923,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
     for (uint32_t outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                                   ? outputNames.at(outputIndex) : std::string("output" + std::to_string(outputIndex));
-        HeaderLatest::RuntimeEndPoint output;
-        is.read(reinterpret_cast<char *>(&output), sizeof(output));
+
+        HeaderLatest::RuntimeEndPoint output = ReadEndPoint(is);
         OutputDesc description;
         description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
         description.orientation = kDnnInterleavedOrientation;
@@ -884,12 +932,17 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
         description.num_bytes_per_element = output.element_size;
         description.scale_factor = output.scaleFactor;
 
-        auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
+        auto outputDims = InferenceEngine::SizeVector();
+        for (auto i = 0; i < output.shape.NumberOfDimensions; ++i) {
+            outputDims.push_back(output.shape.Dimensions[i]);
+        }
+        InferenceEngine::Layout outputLayout = static_cast<InferenceEngine::Layout>(output.layout);
+        InferenceEngine::Precision outputPresicion =  InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(output.precision));
         dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
                                                  InferenceEngine::TensorDesc(
-                                                         InferenceEngine::Precision::FP32,
+                                                         outputPresicion,
                                                          outputDims,
-                                                         InferenceEngine::Layout::NC));
+                                                         outputLayout));
         desc.at(outputIndex) = description;
     }
 }
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.hpp b/inference-engine/src/gna_plugin/gna_model_serial.hpp
index d756a23f9fc..f5310d826c4 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@@ -138,6 +138,8 @@ private:
      */
     static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
 
+    GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is);
+
     /**
      * @brief Import model from FS into preallocated buffer,
      * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index e76eafa6d53..f49d543def1 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -54,12 +54,17 @@
 #include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
 #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
+#include <transformations/utils/utils.hpp>
 
 #include "transformations/remove_extra_reshapes.hpp"
 #include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
 #include "transformations/insert_transpose_before_matmul.hpp"
 #include "transformations/reorder_activation_and_pooling.hpp"
 #include "transformations/swap_input_matmul_gna.hpp"
+#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
+#include "transformations/split_convolution_with_large_buffer_size.hpp"
+
+#include <ngraph/opsets/opset7.hpp>
 
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@@ -667,6 +672,15 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
         manager.register_pass<ngraph::pass::ConvertPriorBox>();
         manager.register_pass<ngraph::pass::CommonOptimizations>();
+        // TODO enable this transformation for networks with convolutions
+        if (!ngraph::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
+            manager.register_pass<ConvertMatmulWithFqToPointWiseConvolution>();
+            manager.register_pass<ConvertMatmulWithBiasToPointWiseConvolution>();
+            manager.register_pass<ConvertMatmulToPointWiseConvolution>();
+        }
+        manager.register_pass<SplitConvolutionWithFq>();
+        manager.register_pass<SplitConvolutionWithBias>();
+        manager.register_pass<SplitConvolution>();
         manager.register_pass<InsertTransposeBeforeMatmul>();
         manager.register_pass<SwapInputMatMul>();
         manager.register_pass<InsertTransposeAfterConvOrPool>();
@@ -735,6 +749,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         passes->registerPass<SubstitutePReluPass>();
         passes->registerPass<SubstituteSoftSignPass>();
 
+        passes->registerPass<BroadcastConstPass>();
         passes->registerPass<ReorderMaxPoolPass>();
         passes->registerPass<EltwiseSplitOverChannelsPass>();
         passes->registerPass<InsertSplitAligningFilterPass>();
@@ -753,7 +768,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
 
         passes->registerPass<InsertIdentityLayerPass>();
         passes->registerPass<BreakFusingOfOutputLayersPass>();
-        passes->registerPass<BroadcastConstPass>();
         passes->registerPass<InsertDiagonalLayerPass>();
         passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
 #if GNA_LIB_VER == 2
@@ -1465,7 +1479,11 @@ static InferenceEngine::Layout GetLayoutForDims(const InferenceEngine::SizeVecto
 Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) {
     // need to have intermediate blob for interleave conversion
     InferenceEngine::Blob::Ptr outputBlob;
-    auto outputDims = outputsDataMap[name]->getTensorDesc().getDims();
+    auto outputDataIt = outputsDataMap.find(name);
+    if (outputDataIt == std::end(outputsDataMap)) {
+        THROW_GNA_EXCEPTION << "Output " << name << " isn't found";
+    }
+    auto outputDims = outputDataIt->second->getTensorDesc().getDims();
     outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, GetLayoutForDims(outputDims)));
     outputBlob->allocate();
     return outputBlob;
@@ -1475,7 +1493,11 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
     InferenceEngine::Blob::Ptr inputBlob;
     // need to have intermediate blob for interleave conversion
     // TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
-    auto inputDims = inputsDataMap[name]->getTensorDesc().getDims();
+    auto inputDataIt = inputsDataMap.find(name);
+    if (inputDataIt == std::end(inputsDataMap)) {
+        THROW_GNA_EXCEPTION << "Input " << name << " isn't found";
+    }
+    auto inputDims = inputDataIt->second->getTensorDesc().getDims();
     inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, GetLayoutForDims(inputDims)));
     inputBlob->allocate();
     return inputBlob;
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 4d3b71b9622..b8962cebd36 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -86,7 +86,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
     });
     IE_ASSERT(inputLayer != nullptr);
     size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ?
-                         weightsSize = nextLayer->outData[0]->getDims().back() :
+                         nextLayer->outData[0]->getDims().back() :
                          Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
     std::vector<float> weightsValues(weightsSize, fillValue);
     IE_ASSERT(diagLayer != nullptr);
@@ -314,6 +314,7 @@ void HandleMultipleActivationsForTheLayerPass::run() {
                 LayerInfo info(inputTo.second);
 
                 if (info.isActivation()) {
+                    if (odata->getDims().empty()) continue;
                     if (!activations.empty() && odata->getDims()[0] != 1) {
                         THROW_GNA_EXCEPTION << "Unsupported batch size " << odata->getDims()[0]
                                             << " for diagonal layer insertion";
@@ -741,12 +742,17 @@ void RemovePermutationsNHWCToNCHWPass::run() {
         IE_ASSERT(!input_to.empty());
         auto current_layer = input_to.begin()->second;
         setNHWCOrder(current_layer->input());
-        while (current_layer != pattern_end) {
-            setNHWCOrder(current_layer->outData[0]);
-            input_to = getInputTo(current_layer->outData[0]);
-            IE_ASSERT(!input_to.empty());
-            current_layer = input_to.begin()->second;
-        }
+        std::function<void(CNNLayerPtr)> propogateNHWCOrderRecursive =
+            [pattern_end, &propogateNHWCOrderRecursive, &setNHWCOrder](CNNLayerPtr current_layer) {
+            if (current_layer == pattern_end) return;
+            for (size_t i = 0; i < current_layer->outData.size(); ++i) {
+                setNHWCOrder(current_layer->outData[i]);
+                auto input_to = getInputTo(current_layer->outData[i]);
+                IE_ASSERT(!input_to.empty());
+                propogateNHWCOrderRecursive(input_to.begin()->second);
+            }
+        };
+        propogateNHWCOrderRecursive(current_layer);
 
         if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) {
             auto layer_before_permute = CNNNetPrevLayer(pattern_start);
@@ -1447,21 +1453,19 @@ void EltwiseSplitOverChannelsPass::run() {
             THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
         }
         auto oData = l->outData.front();
+        auto out_width = GetDataDimSize(oData, DataDimName::W);
         auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
         auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
         if (totalElementsForOutput <= maxAffineElements) {
             continue;
         }
 
-        // TODO: for now lets put split of 2 elements as restrictions
         auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
-        if (totalSplits > 2) {
-            THROW_GNA_LAYER_EXCEPTION(l) << "split layer over output channels on more than 2 layers unsupported";
-        }
 
         pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
         auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
 
+        bool sameInputs = l->insData[0].lock() == l->insData[1].lock();
         std::vector<CNNLayerPtr> splitLayers(2);
         for (size_t kThEltwiseInput = 0; kThEltwiseInput != 2; kThEltwiseInput++) {
             // create split layer
@@ -1472,31 +1476,38 @@ void EltwiseSplitOverChannelsPass::run() {
 
             split->insData.push_back(l->insData[kThEltwiseInput]);
             auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
-            // need to split this desc
-            if (inputDesc.getLayout() != Layout::NC) {
-                THROW_GNA_LAYER_EXCEPTION(l)
-                << "cannot split over channel: input " << std::to_string(kThEltwiseInput)
-                << " layout need to be NC";
-            }
 
             // create split layer outputs
-            for (size_t i = 0;; i++) {
-                auto elements_num = std::min(totalElementsForOutput - i * maxAffineElements,
+            size_t usedElements = 0;
+            for (size_t i = 0; i < totalSplits; i++) {
+                SizeVector newDims;
+                size_t elements_num = std::min(totalElementsForOutput - usedElements,
                         static_cast<size_t>(maxAffineElements));
+                if (inputDesc.getDims().size() == 2) {
+                    newDims = SizeVector{1, elements_num};
+                } else {
+                    elements_num = elements_num - elements_num % out_width;
+                    newDims = SizeVector{1, elements_num / out_width, out_width};
+                }
 
-                SizeVector newDims = {1, elements_num};
                 auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
                 auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
                 getCreatorLayer(data) = split;
                 split->outData.push_back(data);
 
-                if (elements_num != maxAffineElements) {
+                usedElements += elements_num;
+                if (usedElements == totalElementsForOutput) {
                     break;
                 }
             }
             // replacing connection X->eltwise to X->split
             auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
             oData.second->second = split;
+
+            if (sameInputs) {
+                splitLayers[1] = splitLayers[0];
+                break;
+            }
         }
 
         // create concatlayer
@@ -1507,8 +1518,6 @@ void EltwiseSplitOverChannelsPass::run() {
         concat->outData.push_back(masterEltwise->outData.front());
         getCreatorLayer(masterEltwise->outData.front()) = concat;
 
-
-        // create new eltwise layers - here 2 hardcode
         for (size_t k = 0; k != totalSplits; k++) {
             auto eltwiseRaw = std::make_shared<EltwiseLayer>(
                     LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
@@ -1517,7 +1526,6 @@ void EltwiseSplitOverChannelsPass::run() {
             eltwiseRaw->coeff = masterEltwise->coeff;
             auto eltwise = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(eltwiseRaw) : eltwiseRaw;
 
-
             eltwise->insData.push_back(splitLayers[0]->outData[k]);
             eltwise->insData.push_back(splitLayers[1]->outData[k]);
             getInputTo(splitLayers[0]->outData[k])[eltwise->name] = eltwise;
@@ -1529,6 +1537,15 @@ void EltwiseSplitOverChannelsPass::run() {
             auto data = std::make_shared<Data>(l->name + "/elwise/out/" + std::to_string(k), newDesc);
             getCreatorLayer(data) = eltwise;
             eltwise->outData.push_back(data);
+            if (quantized) {
+                auto eltwiseQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(eltwise);
+                if (quantized->_src_quant.IsStatsSet()) {
+                    eltwiseQuant->_src_quant.CopyStats(quantized->_src_quant);
+                }
+                if (quantized->_dst_quant.IsStatsSet()) {
+                    eltwiseQuant->_dst_quant.CopyStats(quantized->_dst_quant);
+                }
+            }
             getInputTo(data)[concat->name] = concat;
             concat->insData.push_back(data);
         }
@@ -1919,13 +1936,20 @@ void FuseFQIntoWeightsPass::run() {
         }
 
         GNAFakeQuantizeLayer gnaFakeQuantizeLayer(fqLayer);
-        size_t layers_connected_to_fq_count = getInputTo(fqLayer->outData[0]).size();
+        auto inputTo = getInputTo(fqLayer->outData[0]);
+        size_t layers_connected_to_fq_count = inputTo.size();
+        auto layerBeforeWeightable = fqLayer;
+        while (layers_connected_to_fq_count == 1 && LayerInfo(inputTo.begin()->second).isNonFunctional()) {
+            layerBeforeWeightable = inputTo.begin()->second;
+            inputTo = getInputTo(layerBeforeWeightable->outData[0]);
+            layers_connected_to_fq_count = inputTo.size();
+        }
         for (int index = 0; index < layers_connected_to_fq_count; index++) {
-            auto weightableLayer = CNNNetGetNextLayerSkipCertain(fqLayer, 0, index, isNonFunctional).first;
+            auto weightableLayer = CNNNetGetNextLayerSkipCertain(layerBeforeWeightable, 0, index, isNonFunctional).first;
             if (!LayerInfo(weightableLayer).isWeightable()) {
                 continue;
             }
-            if (weightableLayer->insData.size() != 3) {
+            if (weightableLayer->insData.size() < 2) {
                 continue;
             }
 
@@ -1942,7 +1966,8 @@ void FuseFQIntoWeightsPass::run() {
             pass_trace() << "found " << LAYER_NAME(fqLayer) << " that will be converted to weights of "
                 << LAYER_NAME(weightableLayer) << "\n";
 
-            auto biases = LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx);
+            auto biases = weightableLayer->insData.size() == 3 ?
+                LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
             auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData();
 
             // 1. broke existing connections - by detaching fq subgraph from rest of graph
@@ -2149,8 +2174,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
         }
         GNAFakeQuantizeLayer fqLayer(l);
         auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, donotSkip);
-        if (prevLayer->outData.size() != 1) {
-            THROW_GNA_LAYER_EXCEPTION(prevLayer) << " fake quantize input that connected to something else not supported";
+        auto prevDataIt = std::find_if(std::begin(prevLayer->outData), std::end(prevLayer->outData), [l](DataPtr data) {
+            return getInputTo(data).find(l->name) != std::end(getInputTo(data));
+        });
+        if (prevDataIt == std::end(prevLayer->outData)) {
+            THROW_GNA_LAYER_EXCEPTION(fqLayer) << "Invalid connection between " << prevLayer->name << " and " << l->name;
         }
 
         auto inputRange = fqLayer.getInputRange();
@@ -2181,8 +2209,18 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
         quantParamsPrevLayer->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
         quantParamsPrevLayer->_dst_quant.SetMaxValues({ outputRange.second[0] }, false);
 
+        // Propogate destination statistics to multiply layer if it's set for the next sum/sub layer (is considered as bias)
+        if (LayerInfo(prevLayer).isEltwiseSum() || LayerInfo(prevLayer).isEltwiseSub()) {
+            auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, donotSkip);
+            auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, donotSkip);
+            if (LayerInfo(eltwPrevLayer).isEltwise() && LayerInfo(constLayer).isConst()) {
+                auto quantParamsEltwLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(eltwPrevLayer);
+                quantParamsEltwLayer->_dst_quant.CopyStats(quantParamsPrevLayer->_dst_quant);
+            }
+        }
+
         auto fqQauntParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
-        fqQauntParams->_dst_quant.SetLevels(fqLevels);
+        fqQauntParams->_dst_quant.SetLevels(UINT16_MAX);
         fqQauntParams->_dst_quant.SetMinValues({ inputRange.first[0] }, true);
         fqQauntParams->_dst_quant.SetMaxValues({ inputRange.second[0] }, true);
         fqQauntParams->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
@@ -2198,7 +2236,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
         // FQ Layer is fused only when previous layer is const, memory or activation layer
         // or a next layer is activation layer.
         bool isFQFuseAllowed = allowFQFuse(l);
-        auto prevData = prevLayer->outData.front();
+        auto prevData = *prevDataIt;
 
         // Find all output layers connected to FQ
         auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
@@ -2207,7 +2245,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
         }
 
         if (isFQFuseAllowed) {
-            getInputTo(prevLayer->outData.front()).clear();
+            getInputTo(prevData).clear();
         }
 
         // Connect all next layers after FQ to the layer that is before FQ
@@ -2222,7 +2260,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
                 for (int insDataIdx : insDatas) {
                     nextLayers[i]->insData[insDataIdx] = prevData;
                 }
-                getInputTo(prevLayer->outData.front())[nextLayers[i]->name] = nextLayers[i];
+                getInputTo(prevData)[nextLayers[i]->name] = nextLayers[i];
             }
 
             propagateStatistics(quantParamsPrevLayer, nextLayers[i]);
diff --git a/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp
new file mode 100644
index 00000000000..14badf3adcf
--- /dev/null
+++ b/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp
@@ -0,0 +1,197 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include "backend/dnn_types.h"
+#include "serial/headers/2dot4/gna_model_header.hpp"
+#include "serial/headers/2dot6/gna_model_header.hpp"
+#include "serial/headers/latest/gna_model_header.hpp"
+#include "gna_data_types.hpp"
+
+#pragma pack(push, 1)
+
+namespace GNAPluginNS {
+namespace Header2dot7 {
+
+/**
+ Maximal number of supported shape dimensions.
+ */
+#define GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS 8
+
+/**
+ * @brief Header version 2.7
+ */
+struct ModelHeader {
+    /**
+     *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
+     */
+    char gnam[4] = {};
+    /**
+     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
+     * usually it is an indicator of working with version of model different that is current export function produce
+     */
+    uint32_t headerSize = 0u;
+    struct Version {
+        /**
+         * @details Version of format Major – unsigned int, ex: 0x0001
+         * every change in the header or in the layers definition should be reflected in version change
+         * for backward compatibility new parsers can read old versions of model with certain restrictions
+         */
+        uint16_t major = 2u;
+        /**
+         * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
+         * changes in minor version are not affected layout of model
+         */
+        uint32_t minor = 7u;
+    } version;
+    /**
+     * @brief Memory required to be allocated using GNAAlloc()
+     */
+    uint64_t gnaMemSize = 0ull;
+    /**
+     * @brief Number of GNA Layers
+     */
+    uint64_t layersCount = 0ull;
+    /**
+     * @brief Grouping level
+     * This is depricted field and used for old models only (<=2.6)
+     */
+    uint32_t nGroup = 0u;
+
+    /**
+     * Convolution related setting - they are affecting input transformation
+     */
+    uint32_t nRotateRows = 0u;
+    uint32_t nRotateColumns = 0u;
+    bool doRotateInput = false;
+
+    uint32_t nInputs = 0u;
+    uint32_t nOutputs = 0u;
+
+    /**
+     * Convolution related setting - they are affecting output transformation
+     */
+    uint32_t nRotateOutputRows = 0u;
+    uint32_t nRotateOutputColumns = 0u;
+    bool doRotateOutput = false;
+
+    uint32_t nTransposeInputs = 0u;
+    uint32_t nTransposeOutputs = 0u;
+
+    /**
+     * Reserved Data might be here
+     */
+    ModelHeader() = default;
+    ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+        version.minor = old.version.minor;
+    }
+    ModelHeader(GNAPluginNS::Header2dot4::ModelHeader const &old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+        nRotateOutputRows = old.nRotateOutputRows;
+        nRotateOutputColumns = old.nRotateOutputColumns;
+        doRotateOutput = old.doRotateOutput;
+        version.minor = old.version.minor;
+    }
+};
+#pragma pack(pop)
+
+/*
+ * In runtime endpoint mostly same as in serial version, except of descriptor field
+ */
+struct RuntimeEndPoint {
+    /**
+     * if scale factor is different then pased into infer , network might need to be requantized
+     */
+    float scaleFactor = 0;
+    /**
+     * Pointer descriptor
+     */
+    void* descriptor_ptr = nullptr;
+    /**
+     * Endpoint resolution in bytes.
+     */
+    uint32_t element_size = 0;
+    /**
+     * Number of elements
+     */
+    uint32_t elements_count = 0;
+    /**
+     * Offset in bytes of pointer descriptor
+    */
+    uint64_t descriptor_offset = 0ull;
+    /**
+     Shape specifying dimension values.
+    */
+    struct Shape {
+        /**
+         Number of dimensions or rank or order.
+        */
+        uint32_t NumberOfDimensions = 0;
+        /**
+         array specifying value of each dimension.
+        Set all zeros for scalars.
+        */
+        uint32_t Dimensions[GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS] = {0};
+    } shape;
+    /**
+     * Blob layout
+     */
+    uint8_t layout = InferenceEngine::Layout::NC;
+    /**
+     * Blob precision
+     */
+    uint8_t precision = InferenceEngine::Precision::FP32;
+
+    intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
+
+    RuntimeEndPoint() = default;
+    RuntimeEndPoint(const GNAPluginNS::Header2dot6::RuntimeEndPoint &old, uint32_t ngroup) {
+        scaleFactor = old.scaleFactor;
+        descriptor_ptr = old.descriptor_ptr;
+        element_size = old.element_size;
+        elements_count = old.elements_count;
+        orientation = old.orientation;
+        layout = InferenceEngine::Layout::NC;
+        precision = InferenceEngine::Precision::FP32;
+        descriptor_offset = old.descriptor_offset;
+        InferenceEngine::SizeVector dims = {ngroup, elements_count / ngroup};
+        shape.NumberOfDimensions = static_cast<uint32_t>(dims.size());
+        for (auto i = 0; i < dims.size(); i++) {
+            shape.Dimensions[i] = dims[i];
+        }
+    }
+    RuntimeEndPoint(double scaleFactor,
+                    void* descriptor_ptr,
+                    uint32_t element_size,
+                    uint32_t elements_count,
+                    Shape shape,
+                    uint8_t layout,
+                    uint8_t precision,
+                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
+                                                           descriptor_ptr(descriptor_ptr),
+                                                           element_size(element_size),
+                                                           elements_count(elements_count),
+                                                           shape(shape),
+                                                           layout(layout),
+                                                           precision(precision),
+                                                           orientation(orientation) { }
+};
+} // namespace Header2dot7
+} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
index 89292ab88af..7ec27b2caed 100644
--- a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
+++ b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
@@ -4,11 +4,11 @@
 
 #pragma once
 
-#include "serial/headers/2dot6/gna_model_header.hpp"
+#include "serial/headers/2dot7/gna_model_header.hpp"
 
 namespace GNAPluginNS {
 namespace HeaderLatest {
-using ModelHeader = GNAPluginNS::Header2dot6::ModelHeader;
-using RuntimeEndPoint = GNAPluginNS::Header2dot6::RuntimeEndPoint;
+using ModelHeader = GNAPluginNS::Header2dot7::ModelHeader;
+using RuntimeEndPoint = GNAPluginNS::Header2dot7::RuntimeEndPoint;
 }
 }
diff --git a/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp
new file mode 100644
index 00000000000..da7e6279624
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp
@@ -0,0 +1,180 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
+
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "layers/gna_permute.hpp"
+#include "backend/gna_limitations.hpp"
+
+using namespace GNAPluginNS;
+
+NGRAPH_RTTI_DEFINITION(ConvertMatmulToPointWiseConvolution, "ConvertMatmulToPointWiseConvolution", 0);
+NGRAPH_RTTI_DEFINITION(ConvertMatmulWithBiasToPointWiseConvolution, "ConvertMatmulWithBiasToPointWiseConvolution", 0);
+NGRAPH_RTTI_DEFINITION(ConvertMatmulWithFqToPointWiseConvolution, "ConvertMatmulWithFqToPointWiseConvolution", 0);
+
+static std::tuple<bool, uint32_t, uint32_t, uint32_t> VerifyAndGetConvParams(std::shared_ptr<ngraph::Node> matmul_node) {
+    auto input1_shape = matmul_node->get_input_shape(0);
+    auto input2_shape = matmul_node->get_input_shape(1);
+    auto output_shape = matmul_node->get_output_shape(0);
+    if (input1_shape.size() == 3 && input1_shape.front() == 1) {
+        input1_shape.erase(std::begin(input1_shape));
+    }
+
+    if (input1_shape.size() != 2 || input2_shape.size() != 2 || output_shape.size() < 2) {
+        return std::make_tuple(false, 0, 0, 0);
+    }
+
+    // Check if MatMul or corresponding pointwise convolution are supported by GNA
+    const uint32_t width = input1_shape.front();
+    const uint32_t in_channels = input2_shape.back();
+    const uint32_t out_channels = input2_shape.front();
+    if (input1_shape.front() <= GNALimitations::affineMaxBatchSize ||
+        out_channels % GNALimitations::convFiltersNumDivider != 0 ||
+        out_channels > GNALimitations::convMaxFiltersNum ||
+        in_channels > GNALimitations::convFilterMaxSize) {
+        return std::make_tuple(false, 0, 0, 0);
+    }
+
+    return std::make_tuple(true, width, in_channels, out_channels);
+}
+
+static bool Convert(std::shared_ptr<ngraph::Node> matmul_node,
+                    std::shared_ptr<ngraph::Node> add,
+                    std::shared_ptr<ngraph::Node> bias,
+                    std::shared_ptr<ngraph::Node> fq) {
+    bool supported;
+    uint32_t width, in_channels, out_channels;
+    std::tie(supported, width, in_channels, out_channels) = VerifyAndGetConvParams(matmul_node);
+    if (!supported) return false;
+
+    auto input_node = matmul_node->input_value(0).get_node_shared_ptr();
+    auto weights_node = matmul_node->input_value(1).get_node_shared_ptr();
+    auto base_name = matmul_node->get_friendly_name();
+
+    auto reshape_const_before = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
+                                                                            ngraph::Shape{4},
+                                                                            ngraph::Shape{1, 1, width, in_channels});
+    auto reshape_before =  std::make_shared<ngraph::opset7::Reshape>(input_node, reshape_const_before, false);
+    reshape_before->set_friendly_name(base_name + "/reshape_in");
+
+    auto transpose_before = std::make_shared<ngraph::opset7::Transpose>(reshape_before,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+        GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW)));
+    transpose_before->set_friendly_name(base_name + "/transpose_in");
+
+    auto weights_reshape_const = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
+        ngraph::Shape{4}, ngraph::Shape{out_channels, in_channels, 1, 1});
+    auto weights_reshaped =  std::make_shared<ngraph::opset7::Reshape>(weights_node, weights_reshape_const, false);
+
+    std::shared_ptr<ngraph::Node> conv_node = std::make_shared<ngraph::opset7::Convolution>(transpose_before, weights_reshaped,
+            ngraph::Strides{1, 1}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0},
+            ngraph::Strides{1, 1}, ngraph::op::PadType::VALID);
+    conv_node->set_friendly_name(base_name + "/conv");
+
+    std::shared_ptr<ngraph::Node> root_node = matmul_node;
+    if (bias != nullptr) {
+         conv_node = std::make_shared<ngraph::opset7::Add>(conv_node, bias);
+         root_node = add;
+    }
+
+    if (fq != nullptr) {
+        conv_node = fq->clone_with_new_inputs({conv_node, fq->input_value(1), fq->input_value(2),
+            fq->input_value(3), fq->input_value(4)});
+        root_node = fq;
+    }
+
+    auto transpose_after = std::make_shared<ngraph::opset7::Transpose>(conv_node,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+        GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)));
+    transpose_after->set_friendly_name(base_name + "/transpose_out");
+
+    auto output_shape = matmul_node->get_output_shape(0);
+    output_shape[output_shape.size() - 1] = out_channels;
+    output_shape[output_shape.size() - 2] = width;
+    auto reshape_const_after = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
+                                                                            ngraph::Shape{output_shape.size()},
+                                                                            output_shape);
+    auto reshape_after =  std::make_shared<ngraph::opset7::Reshape>(transpose_after, reshape_const_after, false);
+    reshape_after->set_friendly_name(base_name);
+
+    ngraph::replace_node(root_node, reshape_after);
+    return true;
+}
+
+ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
+    auto second_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{const_input, const_fq});
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), second_input});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        return Convert(pattern_map.at(matmul).get_node_shared_ptr(), nullptr, nullptr, nullptr);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatmulToPointWiseConvolution");
+    this->register_matcher(m, callback);
+}
+
+ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseConvolution() {
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
+    auto second_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{const_input, const_fq});
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), second_input});
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({matmul, bias});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        return Convert(pattern_map.at(matmul).get_node_shared_ptr(), pattern_map.at(add).get_node_shared_ptr(),
+            pattern_map.at(bias).get_node_shared_ptr(), nullptr);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(add, "ConvertMatmulWithBiasToPointWiseConvolution");
+    this->register_matcher(m, callback);
+}
+
+ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolution() {
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
+    auto second_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{const_input, const_fq});
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), second_input});
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({matmul, bias});
+    auto matmul_out = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{add, matmul});
+    auto out_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({matmul_out,
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto add_it = pattern_map.find(add);
+        auto add_node = (add_it == std::end(pattern_map) ? nullptr : add_it->second.get_node_shared_ptr());
+        auto bias_it = pattern_map.find(bias);
+        auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
+        return Convert(pattern_map.at(matmul).get_node_shared_ptr(), add_node, bias_node,
+             pattern_map.at(out_fq).get_node_shared_ptr());
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "ConvertMatmulWithFqToPointWiseConvolution");
+    this->register_matcher(m, callback);
+}
\ No newline at end of file
diff --git a/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.hpp b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.hpp
new file mode 100644
index 00000000000..999b529194d
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.hpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace GNAPluginNS {
+
+/**
+ * @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout
+ * with transposes around it:
+ *                                      Transose (NHWC -> NCHW)
+ *                                                 |
+ * Matmul                               Convolution in NHWC layout
+ * Input1: [A, B] B > 8     ------->    Input: [1, 1, A, B]
+ * Input2: [B, C]                       Kernel: [C, B, 1, 1]
+ * Output: [A, C]                       Output: [1, 1, A, C]
+ *                                                  |
+ *                                      Transose (NCHW -> NHWC)
+ */
+class ConvertMatmulToPointWiseConvolution : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  ConvertMatmulToPointWiseConvolution();
+};
+
+/**
+ * @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout
+ * with transposes around it, moved add with bias before the last transpose:
+ *                                      Transose (NHWC -> NCHW)
+ *                                                 |
+ * Matmul                               Convolution in NHWC layout
+ * Input1: [A, B] B > 8     ------->    Input: [1, 1, A, B]
+ * Input2: [B, C]                       Kernel: [C, B, 1, 1]
+ * Output: [A, C]                       Output: [1, 1, A, C]
+ *       |                                         |
+ *      Add (const)                            Add (const)
+ *                                                 |
+ *                                      Transose (NCHW -> NHWC)
+ */
+class ConvertMatmulWithBiasToPointWiseConvolution : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  ConvertMatmulWithBiasToPointWiseConvolution();
+};
+
+/**
+ * @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout
+ * with transposes around it, moved add with bias and/or fake quantize before the last transpose:
+ *                                      Transose (NHWC -> NCHW)
+ *                                                 |
+ * Matmul                               Convolution in NHWC layout
+ * Input1: [A, B] B > 8     ------->    Input: [1, 1, A, B]
+ * Input2: [B, C]                       Kernel: [C, B, 1, 1]
+ * Output: [A, C]                       Output: [1, 1, A, C]
+ *       |                                         |
+ *      Add (const)                            Add (const)
+ *       |                                         |
+ *     FakeQuantize                            FakeQuantize
+ *                                                 |
+ *                                         Transose (NCHW -> NHWC)
+ */
+class ConvertMatmulWithFqToPointWiseConvolution : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  ConvertMatmulWithFqToPointWiseConvolution();
+};
+
+} // namespace GNAPluginNS
\ No newline at end of file
diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
new file mode 100644
index 00000000000..a9d79c831ab
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
@@ -0,0 +1,131 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/split_convolution_with_large_buffer_size.hpp"
+
+#include <numeric>
+
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "backend/gna_limitations.hpp"
+
+using namespace GNAPluginNS;
+
+NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
+NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
+NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
+
+static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
+    uint32_t width = conv->get_input_shape(0).back();
+    uint32_t in_channels = conv->get_input_shape(0).at(1);
+    uint32_t usedWidth = 0;
+    std::vector<int64_t> split_sizes;
+    uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
+    width_max_size = width_max_size - width_max_size % 64;
+    while (usedWidth < width) {
+        uint32_t width_part = std::min(width - usedWidth, width_max_size);
+        split_sizes.push_back(width_part);
+        usedWidth += width_part;
+    }
+    IE_ASSERT(usedWidth == width);
+    return split_sizes;
+}
+
+static bool Convert(std::shared_ptr<ngraph::Node> conv,
+                    std::shared_ptr<ngraph::Node> add,
+                    std::shared_ptr<ngraph::Node> bias,
+                    std::shared_ptr<ngraph::Node> fq) {
+    auto input_size = std::accumulate(std::begin(conv->get_input_shape(0)),
+        std::end(conv->get_input_shape(0)), 1, std::multiplies<size_t>());
+    if (input_size <= GNALimitations::bufferMaxSize) {
+        return false;
+    }
+
+    auto split_sizes = GetConvSplitSizes(conv);
+    IE_ASSERT(split_sizes.size() > 1);
+
+    /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
+        otherwise this split axis isn't supported */
+    const int64_t width_axis = conv->get_input_shape(0).size() - 1;
+    auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes));
+    split_node->set_friendly_name(conv->get_friendly_name() + "/split");
+    ngraph::OutputVector convOutputs;
+    std::shared_ptr<ngraph::Node> root_node = fq ? fq : (add ? add : conv);
+    for (int i = 0; i < split_sizes.size(); ++i) {
+        std::shared_ptr<ngraph::Node> output = conv->clone_with_new_inputs({split_node->output(i), conv->input_value(1)});
+        output->set_friendly_name(conv->get_friendly_name() + "_" + std::to_string(i));
+        if (bias) {
+            output = std::make_shared<ngraph::opset7::Add>(output, bias);
+        }
+
+        if (fq) {
+            output = fq->clone_with_new_inputs({output, fq->input_value(1), fq->input_value(2),
+                fq->input_value(3), fq->input_value(4)});
+        }
+        convOutputs.push_back(output);
+    }
+
+    auto concat = std::make_shared<ngraph::opset7::Concat>(convOutputs, width_axis);
+    concat->set_friendly_name(conv->get_friendly_name());
+    ngraph::replace_node(root_node, concat);
+    return true;
+}
+
+SplitConvolution::SplitConvolution() {
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
+        ngraph::pattern::any_input()});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "SplitConvolution");
+    this->register_matcher(m, callback);
+}
+
+SplitConvolutionWithBias::SplitConvolutionWithBias() {
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
+        ngraph::pattern::any_input()});
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        return Convert(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(add).get_node_shared_ptr(),
+            pattern_map.at(bias).get_node_shared_ptr(), nullptr);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(add, "SplitConvolutionWithBias");
+    this->register_matcher(m, callback);
+}
+
+SplitConvolutionWithFq::SplitConvolutionWithFq() {
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
+        ngraph::pattern::any_input()});
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
+    auto conv_output = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, add});
+    auto out_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({conv_output,
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
+        ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto add_it = pattern_map.find(add);
+        auto add_node = (add_it == std::end(pattern_map) ? nullptr : add_it->second.get_node_shared_ptr());
+        auto bias_it = pattern_map.find(bias);
+        auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
+        return Convert(pattern_map.at(conv).get_node_shared_ptr(), add_node, bias_node, pattern_map.at(out_fq).get_node_shared_ptr());
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "SplitConvolutionWithFq");
+    this->register_matcher(m, callback);
+}
\ No newline at end of file
diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.hpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.hpp
new file mode 100644
index 00000000000..8667f4273bf
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace GNAPluginNS {
+
+// @brief Splits convolution with large input buffer
+class SplitConvolution : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  SplitConvolution();
+};
+
+// @brief Splits convolution with large input buffer, move add with bias to each convolution before concat
+class SplitConvolutionWithBias : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  SplitConvolutionWithBias();
+};
+
+/* @brief Splits convolution with large input buffer,
+ * move add with bias and/or fake quantize to each convolution before concat
+ */
+class SplitConvolutionWithFq : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  SplitConvolutionWithFq();
+};
+
+} // namespace GNAPluginNS
\ No newline at end of file
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
index 58fb35111af..994ba866f7a 100644
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
@@ -312,6 +312,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
     struct Subgraph {
         ngraph::ResultVector    _results;
         ngraph::ParameterVector _parameters;
+        ngraph::SinkVector      _sinks;
         std::string             _affinity;
     };
     std::unordered_map<int, Subgraph> subgraphs;
@@ -325,6 +326,9 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
         } else if (ngraph::op::is_parameter(node)) {
             subgraph._parameters.emplace_back(
                 std::dynamic_pointer_cast<ngraph::op::v0::Parameter>(node->shared_from_this()));
+        } else if (ngraph::op::is_sink(node)) {
+            subgraph._sinks.emplace_back(
+                    std::dynamic_pointer_cast<ngraph::op::Sink>(node->shared_from_this()));
         }
         auto itAffinity = affinities.find(node);
         if (itAffinity != affinities.end()) {
@@ -373,7 +377,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
     for (auto&& subgraph : orderedSubgraphs) {
         _networks[id]._device = subgraph._affinity;
         subFunctions[id] =
-            std::make_shared<ngraph::Function>(subgraph._results, subgraph._parameters,
+            std::make_shared<ngraph::Function>(subgraph._results, subgraph._sinks, subgraph._parameters,
                                                      _name + '_' + std::to_string(id));
         _networks[id]._clonedNetwork = CNNNetwork{subFunctions[id]};
         // update of pre-processing info
@@ -550,7 +554,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&
     this->SetPointerToPlugin(_heteroPlugin->shared_from_this());
 }
 
-void HeteroExecutableNetwork::ExportImpl(std::ostream& heteroModel) {
+void HeteroExecutableNetwork::Export(std::ostream& heteroModel) {
     pugi::xml_document doc;
     auto heteroNode = doc.append_child("hetero");
     heteroNode.append_attribute("name").set_value(_name.c_str());
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
index 85fc8d9c19c..59574ca2ce7 100644
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
@@ -56,7 +56,7 @@ public:
 
     InferenceEngine::Parameter GetMetric(const std::string &name) const override;
 
-    void ExportImpl(std::ostream& modelFile) override;
+    void Export(std::ostream& modelFile) override;
 
 private:
     void InitCNNImpl(const InferenceEngine::CNNNetwork&    network);
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.cpp b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
index 1d8647716af..09986b1e48e 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
@@ -57,13 +57,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(cons
     return std::make_shared<HeteroExecutableNetwork>(network, mergeConfigs(_config, config), this);
 }
 
-InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetworkImpl(std::istream& heteroModel, const Configs& config) {
-    if (GetCore() == nullptr) {
-        IE_THROW() << "Please, work with HETERO device via InferencEngine::Core object";
-    }
-
-    return std::make_shared<HeteroExecutableNetwork>(heteroModel,
-        mergeConfigs(_config, config), this);
+InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istream& heteroModel, const std::map<std::string, std::string>& config) {
+    return std::make_shared<HeteroExecutableNetwork>(heteroModel, mergeConfigs(_config, config), this);
 }
 
 Engine::Configs Engine::GetSupportedConfig(const Engine::Configs& config, const std::string & deviceName) const {
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.hpp b/inference-engine/src/hetero_plugin/hetero_plugin.hpp
index 2b5a93b829b..fbc602116d1 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.hpp
@@ -37,10 +37,11 @@ public:
     InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string,
                                          InferenceEngine::Parameter> & options) const override;
 
-    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetworkImpl(std::istream& heteroModel, const Configs& config) override;
+    InferenceEngine::IExecutableNetworkInternal::Ptr
+    ImportNetwork(std::istream& heteroModel, const std::map<std::string, std::string>& config) override;
 
     DeviceMetaInformationMap GetDevicePlugins(const std::string& targetFallback,
-        const Configs & localConfig) const;
+                                              const Configs & localConfig) const;
 
 private:
     Configs GetSupportedConfig(const Configs& config, const std::string & deviceName) const;
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
index bf3086551c1..6b5bb34c970 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
@@ -49,19 +49,17 @@ std::shared_ptr<IInferRequestInternal> IExecutableNetworkInternal::CreateInferRe
 }
 
 void IExecutableNetworkInternal::Export(const std::string& modelFileName) {
-    // we need to write to stringstream first
-    // because in case of exception in ExportImpl the file is not created
-    std::stringstream strm;
-    ExportImpl(strm);
-    std::ofstream(modelFileName.c_str()) << strm.rdbuf();
+    std::ofstream modelFile(modelFileName, std::ios::out | std::ios::binary);
+
+    if (modelFile.is_open()) {
+        Export(modelFile);
+    } else {
+        IE_THROW() << "The " << modelFileName << " file can not be opened for Export";
+    }
 }
 
 void IExecutableNetworkInternal::Export(std::ostream& networkModel) {
-    std::stringstream strm;
-    strm.write(exportMagic.data(), exportMagic.size());
-    strm << _plugin->GetName() << std::endl;
-    ExportImpl(strm);
-    networkModel << strm.rdbuf();
+    IE_THROW(NotImplemented);
 }
 
 CNNNetwork IExecutableNetworkInternal::GetExecGraphInfo() {
@@ -97,7 +95,4 @@ std::shared_ptr<IInferRequestInternal> IExecutableNetworkInternal::CreateInferRe
     IE_THROW(NotImplemented);
 }
 
-void IExecutableNetworkInternal::ExportImpl(std::ostream&) {
-    IE_THROW(NotImplemented);
-}
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
index 5637701754e..88599aa78b3 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
@@ -16,24 +16,12 @@
 #include <blob_factory.hpp>
 
 #include <istream>
+#include <fstream>
 #include <map>
 #include <memory>
 #include <string>
 
 namespace InferenceEngine {
-namespace {
-void parsePluginName(std::istream& networkModel) {
-    ExportMagic magic = {};
-    auto currentPos = networkModel.tellg();
-    networkModel.read(magic.data(), magic.size());
-    auto exportedWithName = (exportMagic == magic);
-    if (exportedWithName) {
-        networkModel.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-    } else {
-        networkModel.seekg(currentPos, networkModel.beg);
-    }
-}
-}  // namespace
 
 PreProcessInfo copyPreProcess(const PreProcessInfo& from) {
     PreProcessInfo to = from;
@@ -170,22 +158,26 @@ RemoteContext::Ptr IInferencePlugin::GetDefaultContext(const ParamMap&) {
     IE_THROW(NotImplemented);
 }
 
-std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(const std::string&,
-                                                                            const std::map<std::string, std::string>&) {
-    IE_THROW(NotImplemented);
+std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(const std::string& modelFileName,
+                                                                            const std::map<std::string, std::string>& config) {
+    std::ifstream blobFile(modelFileName, std::ios::binary);
+
+    if (!blobFile.is_open()) {
+        IE_THROW(NetworkNotRead);
+    }
+
+    return ImportNetwork(blobFile, config);
 }
 
 std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(std::istream& networkModel,
                                                                             const std::map<std::string, std::string>& config) {
-    parsePluginName(networkModel);
-    return ImportNetworkImpl(networkModel, config);
+    IE_THROW(NotImplemented);
 }
 
 std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(std::istream& networkModel,
                                                                             const std::shared_ptr<RemoteContext>& context,
                                                                             const std::map<std::string, std::string>& config) {
-    parsePluginName(networkModel);
-    return ImportNetworkImpl(networkModel, context, config);
+   IE_THROW(NotImplemented);
 }
 
 void IInferencePlugin::SetCore(ICore* core) {
@@ -213,17 +205,6 @@ std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadExeNetworkImpl
     IE_THROW(NotImplemented);
 }
 
-std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetworkImpl(std::istream&,
-                                                                                const std::map<std::string, std::string>&) {
-    IE_THROW(NotImplemented);
-}
-
-std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetworkImpl(std::istream&,
-                                                                                const std::shared_ptr<RemoteContext>&,
-                                                                                const std::map<std::string, std::string>&) {
-    IE_THROW(NotImplemented);
-}
-
 void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork,
                                          const ConstInputsDataMap& inputs,
                                          const ConstOutputsDataMap& outputs) {
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp
index 28563a29b62..63814215037 100644
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -395,6 +395,7 @@ public:
         opsetNames.insert("opset4");
         opsetNames.insert("opset5");
         opsetNames.insert("opset6");
+        opsetNames.insert("opset7");
     }
 
     ~Impl() override = default;
@@ -566,18 +567,6 @@ public:
     SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                               const std::map<std::string, std::string>& config) override {
         auto parsed = parseDeviceNameIntoConfig(deviceName, config);
-
-        if (parsed._deviceName.empty()) {
-            ExportMagic magic = {};
-            auto currentPos = networkModel.tellg();
-            networkModel.read(magic.data(), magic.size());
-            auto exportedWithName = (exportMagic == magic);
-            if (exportedWithName) {
-                std::getline(networkModel, parsed._deviceName);
-            }
-            networkModel.seekg(currentPos, networkModel.beg);
-        }
-
         return GetCPPPluginByName(parsed._deviceName).ImportNetwork(networkModel, parsed._config);
     }
 
@@ -1022,18 +1011,6 @@ void Core::AddExtension(const IExtensionPtr& extension) {
 ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName,
                                       const std::map<std::string, std::string>& config) {
     OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork");
-
-    // TODO: remove once NotImplemented exception is deprecated and not used
-    if (deviceName.find("HETERO") == 0) {
-        IE_THROW() << "HETERO device does not support ImportNetwork";
-    }
-    if (deviceName.find("MULTI") == 0) {
-        IE_THROW() << "MULTI device does not support ImportNetwork";
-    }
-    if (deviceName.find("AUTO") == 0) {
-        IE_THROW() << "AUTO device does not support ImportNetwork";
-    }
-
     auto parsed = parseDeviceNameIntoConfig(deviceName, config);
     auto exec = _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config);
     return { exec, exec };
@@ -1041,10 +1018,33 @@ ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const st
 
 ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                       const std::map<std::string, std::string>& config) {
+    OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork");
     auto exec = _impl->ImportNetwork(networkModel, deviceName, config);
     return { exec, exec };
 }
 
+ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) {
+    OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork");
+
+    using ExportMagic = std::array<char, 4>;
+    constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}};
+
+    std::string deviceName;
+    ExportMagic magic = {};
+    auto currentPos = networkModel.tellg();
+    networkModel.read(magic.data(), magic.size());
+    if (exportMagic == magic) {
+        std::getline(networkModel, deviceName);
+    } else {
+        IE_THROW() << "Passed compiled stream does not contain device name. "
+            "Please, provide device name manually";
+    }
+    networkModel.seekg(currentPos, networkModel.beg);
+
+    auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {});
+    return { exec, exec };
+}
+
 ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
                                       const RemoteContext::Ptr& context,
                                       const std::map<std::string, std::string>& config) {
@@ -1124,8 +1124,8 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
             IE_THROW()
                 << "You can only GetConfig of the AUTO itself (without devices). "
                    "GetConfig is also possible for the individual devices before creating the AUTO on top.";
-      }
-  }
+        }
+    }
 
     auto parsed = parseDeviceNameIntoConfig(deviceName);
 
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index 08b54640ee6..7d92c77219d 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -244,6 +244,9 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
     LayerParams params = {layer->get_friendly_name(), "TensorIterator",
                           details::convertPrecision(layer->get_output_element_type(0))};
     auto res = std::make_shared<InferenceEngine::TensorIterator>(params);
+    if (res == nullptr) {
+        IE_THROW() << "Can't create TensorIterator";
+    }
     res->body = body;
 
     // Port map: outputs
diff --git a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp
index a3ec122b9c7..cab07f54a76 100644
--- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp
+++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp
@@ -9,7 +9,6 @@
 #include <vector>
 
 #include <ngraph/opsets/opset1.hpp>
-
 #include <legacy/ngraph_ops/crop_ie.hpp>
 #include <ngraph/rt_info.hpp>
 
@@ -137,7 +136,6 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
                     lb = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), lb);
                     ub = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), ub);
 
-                    offset.emplace_back(lb);
 
                     // set default value for stride or use given value
                     int64_t stride = 1;
@@ -153,6 +151,7 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
                             ub = -1;
 
                         lb = std::min(lb, static_cast<int64_t>(input_shape[input_shape_idx]) - 1);
+                        offset.emplace_back(lb);
                         lb -= 1;  // we always get 1st element, so we need decrease range
                         if (ub <= lb)
                             dimension = (ub - lb) / stride + 1;
@@ -160,12 +159,16 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
                         // apply masks
                         if (begin_mask.count(axis))
                             lb = 0;
-                        if (end_mask.count(axis))
+                        offset.emplace_back(lb);
+
+                        if (end_mask.count(axis)) {
                             ub = static_cast<int64_t>(input_shape[input_shape_idx]);
+                        }
 
                         lb += 1;  // we always get 1st element, so we need decrease range
-                        if (ub >= lb)
+                        if (ub >= lb) {
                             dimension = (ub - lb) / stride + 1;
+                        }
                     }
 
                     dim.emplace_back(dimension);
diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp
index 4988e29b1e2..f6d860ed172 100644
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -43,19 +43,21 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         return false;
     }
 
-    // precisions can be different
+    // Concat operations precision is defined:
+    // 1. consumers after Concat
+    // 2. FakeQuantize precisions without zero point
     ngraph::Node& quantizationLayer = *subgraph.quantizationLayers[0];
     std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer.shared_from_this());
     if (!NetworkHelper::isQuantizeSupported(fq)) {
         return false;
     }
-
-    std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
-    fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
-    if (concatParentsChildrensPrecisions.empty()) {
+    DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
+    if (dataPrecision.precision == ngraph::element::undefined) {
         return false;
     }
 
+    std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
+
     for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
         fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
         if (fq == nullptr) {
@@ -72,20 +74,28 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         if (quantizationDetails.inputHighValues.size() != 1ul) {
             return false;
         }
-        std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
-        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
-        concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
 
-        if (concatParentsChildrensPrecisions.empty()) {
+        // define concatenation operation consumers precisions
+        std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
+        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrenPrecisions);
+        concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
+        if (concatChildrenPrecisions.empty()) {
             return false;
         }
+
+        // define FakeQuantize precisions without zero point
+        const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
+        if (dataPrecision2.precision == ngraph::element::undefined) {
+            return false;
+        }
+
+        if (dataPrecision.precision != dataPrecision2.precision) {
+            dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
+        }
     }
 
-    DataPrecision dataPrecision;
-    if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
-        dataPrecision = DataPrecision(element::i8);
-    } else {
-        dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
+    if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
+        dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
     }
 
     std::vector<QuantizationDetails> quantizationLayersDetails;
diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
index dc81d51cd71..e36c2b5aa74 100644
--- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
@@ -64,14 +64,23 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context
 
     DataPrecision dataPrecision;
     {
+        std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
         for (auto quantizationLayer : subgraph.quantizationLayers) {
             std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer->shared_from_this());
             if (!NetworkHelper::isQuantizeSupported(fq)) {
                 return false;
             }
 
-            const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
+            // define concatenation operation consumers precisions
+            std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
+            fillAvailablePrecisions(quantizationLayer, fqChildrenPrecisions);
+            concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
+            if (concatChildrenPrecisions.empty()) {
+                return false;
+            }
 
+            // define FakeQuantize precisions without zero point
+            const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
             if (dataPrecision.precision == ngraph::element::undefined) {
                 dataPrecision = tmp;
                 continue;
@@ -81,6 +90,10 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context
                 dataPrecision = tmp;
             }
         }
+
+        if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
+            dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
+        }
     }
 
     for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h
index 01eb0e23c5e..54336d58495 100644
--- a/inference-engine/src/mkldnn_plugin/config.h
+++ b/inference-engine/src/mkldnn_plugin/config.h
@@ -4,9 +4,11 @@
 
 #pragma once
 
+#include <threading/ie_istreams_executor.hpp>
+#include "utils/debug_capabilities.h"
+
 #include <string>
 #include <map>
-#include <threading/ie_istreams_executor.hpp>
 
 namespace MKLDNNPlugin {
 
@@ -35,6 +37,10 @@ struct Config {
     bool manualEnforceBF16 = false;
 #endif
 
+#ifdef CPU_DEBUG_CAPS
+    DebugCaps::Config debugCaps;
+#endif
+
     void readProperties(const std::map<std::string, std::string> &config);
     void updateProperties();
     std::map<std::string, std::string> _config;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index b5ff60efed0..1415dc1ae95 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -32,7 +32,7 @@ bool MKLDNNEdge::isUseExternalMemory() const {
     return externalMemoryPtr;
 }
 
-bool MKLDNNEdge::isDropped() {
+bool MKLDNNEdge::isDropped() const {
     bool not_in_parent = true;
     bool not_in_child = true;
 
@@ -124,6 +124,10 @@ void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) {
     status = Status::Allocated;
 }
 
+const InferenceEngine::TensorDesc& MKLDNNEdge::getInputDescRO() const {
+    return inputDesc;
+}
+
 InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() {
     if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) {
         inputDesc = getSpecifiedInputDesc({});
@@ -131,6 +135,10 @@ InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() {
     return inputDesc;
 }
 
+const InferenceEngine::TensorDesc& MKLDNNEdge::getOutputDescRO() const {
+    return outputDesc;
+}
+
 InferenceEngine::TensorDesc MKLDNNEdge::getOutputDesc() {
     if (outputDesc.getLayout() == InferenceEngine::Layout::ANY) {
         outputDesc = getSpecifiedOutputDesc({});
@@ -145,11 +153,11 @@ InferenceEngine::TensorDesc MKLDNNEdge::getDesc() {
     return getInputDesc();
 }
 
-int MKLDNNEdge::getInputNum() {
+int MKLDNNEdge::getInputNum() const {
     return parent_port;
 }
 
-int MKLDNNEdge::getOutputNum() {
+int MKLDNNEdge::getOutputNum() const {
     return child_port;
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
index c9884caf56e..63e2a16414d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
@@ -61,11 +61,11 @@ public:
     MKLDNNMemoryPtr& getMemoryPtr();
 
     bool needReorder();
-    bool isDropped();
+    bool isDropped() const;
     bool isUseExternalMemory() const;
 
-    int getInputNum();
-    int getOutputNum();
+    int getInputNum() const;
+    int getOutputNum() const;
 
     void setChildPort(const size_t port) { child_port = port; }
 
@@ -73,10 +73,12 @@ public:
     MKLDNNEdgePtr getSharedEdge() const;
     MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
 
+    const InferenceEngine::TensorDesc& getInputDescRO() const;
+    const InferenceEngine::TensorDesc& getOutputDescRO() const;
+
 private:
     std::string name();
 
-private:
     std::weak_ptr<MKLDNNNode> parent;
     std::weak_ptr<MKLDNNNode> child;
     int parent_port;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index b92afb8a9f0..efc99bddb84 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -78,7 +78,10 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg
 
     Replicate(net, extMgr);
     InitGraph();
+
     status = Ready;
+
+    ENABLE_CPU_DEBUG_CAP(serialize(*this));
 }
 
 template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
@@ -344,10 +347,6 @@ void MKLDNNGraph::InitGraph() {
         graphNode->cleanup();
     }
 #endif
-
-#if !defined(NDEBUG) && defined(PRINT_GRAPH_INFO)
-    printGraphInfo();
-#endif
     ExecuteConstantNodesOnly();
 }
 
@@ -809,7 +808,7 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
 
     mkldnn::stream stream(eng);
 
-    ENABLE_CPU_DEBUG_CAP(NodeDumper nd(infer_count));
+    ENABLE_CPU_DEBUG_CAP(NodeDumper nd(config.debugCaps, infer_count));
 
     for (int i = 0; i < graphNodes.size(); i++) {
         if (request != nullptr) {
@@ -954,6 +953,10 @@ void MKLDNNGraph::setConfig(const Config &cfg) {
     config = cfg;
 }
 
+const Config& MKLDNNGraph::getConfig() const {
+    return config;
+}
+
 void MKLDNNGraph::setProperty(const std::map<std::string, std::string>& properties) {
     config.readProperties(properties);
 }
@@ -1217,21 +1220,3 @@ void MKLDNNGraph::EnforceBF16() {
 InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
     return dump_graph_as_ie_ngraph_net(*this);
 }
-
-void MKLDNNGraph::printGraphInfo() const {
-    for (auto &graphNode : graphNodes) {
-        std::cout << "name: " << graphNode->getName() << " [ ";
-        if (graphNode->parentEdges.size() > 0) {
-            auto prnt_out_desc = graphNode->parentEdges[0].lock()->getOutputDesc();
-            std::cout << "in: " << prnt_out_desc.getPrecision().name()
-                      << "/l=" << prnt_out_desc.getLayout()
-                      << "; ";
-        }
-        if (graphNode->childEdges.size() > 0) {
-            auto chld_in_desc = graphNode->childEdges[0].lock()->getInputDesc();
-            std::cout << "out: " << chld_in_desc.getPrecision().name()
-                      << "/l=" << chld_in_desc.getLayout();
-        }
-        std::cout << " ]"  << std::endl;
-    }
-}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index c3fcb0d5c9c..1b54f71e88c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -39,6 +39,8 @@ public:
     }
 
     void setConfig(const Config &cfg);
+    const Config& getConfig() const;
+
     void setProperty(const std::map<std::string, std::string> &properties);
     Config getProperty() const;
 
@@ -59,6 +61,10 @@ public:
 
     void Infer(MKLDNNInferRequest* request = nullptr, int batch = -1);
 
+    const std::vector<MKLDNNNodePtr>& GetNodes() const {
+        return graphNodes;
+    }
+
     std::vector<MKLDNNNodePtr>& GetNodes() {
         return graphNodes;
     }
@@ -219,7 +225,6 @@ protected:
 
 private:
     void EnforceBF16();
-    void printGraphInfo() const;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
index 14d2f6a28ae..ac4bfff6b6d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
@@ -5,9 +5,11 @@
 #include "mkldnn_graph_dumper.h"
 #include <ie_ngraph_utils.hpp>
 #include "exec_graph_info.hpp"
+#include "ie_common.h"
 #include "mkldnn_debug.h"
 #include <ngraph/variant.hpp>
 #include "ngraph/ngraph.hpp"
+#include "utils/debug_capabilities.h"
 
 #include <vector>
 #include <string>
@@ -18,6 +20,9 @@ using namespace InferenceEngine;
 
 namespace MKLDNNPlugin {
 
+void serializeToCout(const MKLDNNGraph &graph);
+void serializeToXML(const MKLDNNGraph &graph, const std::string& path);
+
 namespace {
 
 std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &node) {
@@ -207,4 +212,46 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph
     return net;
 }
 
+#ifdef CPU_DEBUG_CAPS
+void serialize(const MKLDNNGraph &graph) {
+    const std::string& path = graph.getConfig().debugCaps.execGraphPath;
+
+    if (path.empty())
+        return;
+
+    if (path == "cout")
+        serializeToCout(graph);
+    else if (!path.compare(path.size() - 4, 4, ".xml"))
+        serializeToXML(graph, path);
+    else
+        IE_THROW() << "Unknown serialize format. Should be either 'cout' or '*.xml'. Got " << path;
+}
+
+void serializeToXML(const MKLDNNGraph &graph, const std::string& path) {
+    if (path.empty())
+        return;
+
+    graph.dump().serialize(path);
+}
+
+void serializeToCout(const MKLDNNGraph &graph) {
+    for (const auto& node : graph.GetNodes()) {
+        std::cout << "name: " << node->getName() << " [ ";
+        if (!node->getParentEdges().empty()) {
+            const auto& parentEdge = *(node->getParentEdges()[0].lock());
+            const auto& prnt_out_desc = parentEdge.getOutputDescRO();
+            std::cout << "in: " << prnt_out_desc.getPrecision().name()
+                      << "/l=" << prnt_out_desc.getLayout()
+                      << "; ";
+        }
+        if (!node->getChildEdges().empty()) {
+            const auto& childEdge = *(node->getChildEdges()[0].lock());
+            const auto& chld_in_desc = childEdge.getInputDescRO();
+            std::cout << "out: " << chld_in_desc.getPrecision().name()
+                      << "/l=" << chld_in_desc.getLayout();
+        }
+        std::cout << " ]"  << std::endl;
+    }
+}
+#endif
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
index d954695baaa..597568224f3 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
@@ -6,11 +6,14 @@
 
 #include "cpp/ie_cnn_network.h"
 #include "mkldnn_graph.h"
+#include "utils/debug_capabilities.h"
 
 #include <memory>
 
 namespace MKLDNNPlugin {
 
 InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
-
+#ifdef CPU_DEBUG_CAPS
+void serialize(const MKLDNNGraph &graph);
+#endif // CPU_DEBUG_CAPS
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
index 09d3e7e0554..b850bd98ae2 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
@@ -22,7 +22,11 @@ MKLDNNPlugin::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
 
     ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) {
         auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
+        if (!fc)
+            return false;
         auto reshape = std::dynamic_pointer_cast<ngraph::opset1::Reshape>(fc->get_input_node_shared_ptr(0));
+        if (!reshape)
+            return false;
 
         // Check that Reshape reshapes 4D tensor to 2D or input shape = output shape
         auto shape_in = reshape->input_value(0).get_shape();
@@ -67,6 +71,8 @@ MKLDNNPlugin::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
                                                                         fc->input_value(2),
                                                                         outShape,
                                                                         fc->output(0).get_element_type());
+        } else {
+            return false;
         }
         new_ops.push_back(new_fc);
         new_fc->set_friendly_name(fc->get_friendly_name());
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
index 999d1b958d8..f140f44e74e 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
@@ -60,6 +60,8 @@ MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
                                                                         fc->input_value(2),
                                                                         output_shape_new,
                                                                         fc->get_output_type());
+        } else {
+            return false;
         }
         new_ops.push_back(fc_new);
 
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
index 0cc1a33cbc3..69fd75ea57a 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
@@ -20,8 +20,16 @@ MKLDNNPlugin::ReshapePRelu::ReshapePRelu() {
         if (!prelu || ngraph::shape_size(prelu->get_input_shape(1)) == 1 || prelu->get_input_shape(1).size() != 1) {
             return false;
         }
-        ngraph::Shape new_shape(prelu->input_value(0).get_shape().size(), 1);
-        new_shape[new_shape.size() > 1 ? 1 : 0] = prelu->input_value(1).get_shape()[0];
+        const auto prelu_shape = prelu->input_value(0).get_shape();
+        const auto slope_shape = prelu->input_value(1).get_shape();
+        ngraph::Shape new_shape(prelu_shape.size(), 1);
+        const auto slope_dim = slope_shape[0];
+        const auto channel_dim_idx = prelu_shape.size() > 1 ? 1 : 0;
+        if (slope_dim != prelu_shape[channel_dim_idx]) {
+            return false;
+        }
+        new_shape[channel_dim_idx] = slope_dim;
+
         auto slope = ngraph::op::util::reshapeTo(prelu->input_value(1), new_shape);
         auto new_prelu = std::make_shared<ngraph::opset1::PRelu>(prelu->input(0).get_source_output(), slope);
         new_prelu->set_friendly_name(prelu->get_friendly_name());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
index d226dd73890..678922f3a4b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@@ -42,6 +42,8 @@ MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, co
     addOriginalInputPrecision(inPrc);
     outDims.emplace_back(dims);
     addOriginalOutputPrecision(outPrc);
+
+    errorPrefix = "Convert node with name '" + getName() + "'";
 }
 
 void MKLDNNConvertNode::getSupportedDescriptors() {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
index 2f97bbd2f85..38bebcd5271 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
@@ -58,6 +58,8 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Nod
         if (blockSize == 0)
             THROW_ERROR << "has incorrect block_size parameter is zero!";
 
+        size_t nSpatialDims = inDims[0].ndims() - 2;
+        blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     } else {
         IE_THROW(NotImplemented) << errorMessage;
     }
@@ -74,14 +76,13 @@ void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {
     if (srcDims.size() != dstDims.size())
         THROW_ERROR << "has incorrect number of input/output dimensions";
 
-    size_t nSpatialDims = srcDims.size() - 2;
-    blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     if (srcDims[1] % blockStep)
         THROW_ERROR << "has block_size parameter which is incompatible with input tensor channels dimension size";
 
     if (srcDims[1] / blockStep != dstDims[1])
         THROW_ERROR << "has incompatible input/output channels";
 
+    size_t nSpatialDims = srcDims.size() - 2;
     for (size_t i = 0; i < nSpatialDims; ++i) {
         if (srcDims[i + 2] * blockSize != dstDims[i + 2])
             THROW_ERROR << "has incompatible spatial dims";
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
index 21fb93728c1..b9ef511d010 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
@@ -49,7 +49,7 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr<ngraph::Node>& op, const mkld
 
     /* Data */
     inputShape = inDims[DATA_INDEX].ToSizeVector();
-    if (inputShape.size() < 1) {
+    if (inputShape.size() < 2) {
         IE_THROW() << layerErrorPrefix << " has invalid 'data' input tensor with rank: " << inputShape.size();
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
index 30d1fda9e95..bc19866768d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
@@ -32,7 +32,7 @@ private:
     size_t dataTypeSize_;
     int strideAxDst_;
     int dstAxDim_;
-    int strideAx1Diff_;
+    int strideAx1Diff_ = 0;
     std::string errorPrefix_;
 
     template <typename dataType>
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
index 07cc72247a5..1b70de9f0f8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
@@ -86,7 +86,7 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Nod
 
 void MKLDNNStridedSliceNode::getSupportedDescriptors() {
     auto isConstantNode = [](const MKLDNNNodePtr &node) {
-        return node->isConstant() && node->getType() == Input;
+        return node->getType() == Input && node->isConstant();
     };
 
     params.parametersAreConstant = isConstantNode(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()) &&
@@ -138,7 +138,11 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
 
     if (params.parametersAreConstant) {
         auto fillingInParameters = [&](std::vector<int> &parameter, const size_t type, const size_t size, const int value) {
-            auto blob = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent())->getMemoryPtr();
+            const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent());
+            if (!constNode) {
+                THROW_ERROR << "can't cast node on " << type << " port to MKLDNNInputNode";
+            }
+            auto blob = constNode->getMemoryPtr();
             if (blob->GetDataType() != mkldnn::memory::data_type::s32)
                 THROW_ERROR << "supports only parameters input with precision I32";
             const int *ptr = static_cast<const int*>(blob->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
index c9a53c79e07..d1d80e1b7cb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@@ -136,6 +136,9 @@ public:
     void execute(mkldnn::stream strm, int n_iter) override {
         auto mem = mem_holder_dst;
         auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
+        if (data_ptr == nullptr) {
+            IE_THROW() << "TensorIterator node has not allocated memory for IterCountPortHelper";
+        }
         *data_ptr = n_iter;
     }
 };
@@ -150,6 +153,9 @@ public:
 
     int getStatus() override {
         auto data_ptr = static_cast<uint8_t*>(mem_holder.get_data_handle());
+        if (data_ptr == nullptr) {
+            IE_THROW() << "TensorIterator node has not allocated memory for asBoolCheck";
+        }
         return *data_ptr == static_cast<uint8_t>(0) ? 0 : 1;
     }
 };
@@ -164,6 +170,9 @@ public:
 
     int getStatus() override {
         auto data_ptr = static_cast<uint32_t*>(mem_holder.get_data_handle());
+        if (data_ptr == nullptr) {
+            IE_THROW() << "TensorIterator node has not allocated memory for asIntCheck";
+        }
         return *data_ptr;
     }
 };
@@ -283,6 +292,9 @@ MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph:
 
 void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
     auto tiOp = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp>(ngraphOp);
+    if (tiOp == nullptr) {
+        IE_THROW() << "Can't cast TensorIterator node with name: " << getName() << " to ngraph::op::util::SubGraphOp";
+    }
     const std::shared_ptr<const ngraph::Function> body = tiOp->get_function();
     sub_graph.CreateGraph(body, ext_mng, weightCache);
 
diff --git a/inference-engine/src/mkldnn_plugin/utils/README.md b/inference-engine/src/mkldnn_plugin/utils/README.md
index af50c8d5015..d3b98f1cb48 100644
--- a/inference-engine/src/mkldnn_plugin/utils/README.md
+++ b/inference-engine/src/mkldnn_plugin/utils/README.md
@@ -71,3 +71,22 @@ Example:
 ```sh
     OV_CPU_BLOB_DUMP_NODE_NAME=".+" binary ...
 ```
+
+## Graph serialization
+The functionality allows to serialize execution graph using environment variable:
+```sh
+    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
+```
+
+Possible serialization options:
+* cout
+
+    Serialize to console output
+* \<path\>.xml
+
+    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
+* \<path\>.dot
+
+    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
+
+
diff --git a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
index 64af835064d..be6e7a830c2 100644
--- a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
+++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
@@ -4,7 +4,44 @@
 #pragma once
 
 #ifdef CPU_DEBUG_CAPS
-#   define ENABLE_CPU_DEBUG_CAP(_x) _x;
-#else
-#   define ENABLE_CPU_DEBUG_CAP(_x)
-#endif
+
+#include <map>
+#include <string>
+#include <vector>
+
+#define ENABLE_CPU_DEBUG_CAP(_x) _x;
+
+namespace MKLDNNPlugin {
+namespace DebugCaps {
+
+class Config {
+public:
+    Config() {
+        readParam(blobDumpDir, "OV_CPU_BLOB_DUMP_DIR");
+        readParam(blobDumpFormat, "OV_CPU_BLOB_DUMP_FORMAT");
+        readParam(blobDumpNodeExecId, "OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
+        readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
+        readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
+        readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
+    }
+
+    std::string blobDumpDir;
+    std::string blobDumpFormat;
+    std::string blobDumpNodeExecId;
+    std::string blobDumpNodeType;
+    std::string blobDumpNodeName;
+    std::string execGraphPath;
+
+private:
+    void readParam(std::string& param, const char* envVar) {
+        if (const char* envValue = std::getenv(envVar))
+            param = envValue;
+    }
+};
+
+} // namespace DebugCaps
+} // namespace MKLDNNPlugin
+
+#else // !CPU_DEBUG_CAPS
+#define ENABLE_CPU_DEBUG_CAP(_x)
+#endif // CPU_DEBUG_CAPS
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
index f69551159dc..9f3af44a66a 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
@@ -6,9 +6,10 @@
 #include "node_dumper.h"
 
 #include "mkldnn_node.h"
-#include "utils/blob_dump.h"
-
 #include "ie_common.h"
+#include "utils/blob_dump.h"
+#include "utils/debug_capabilities.h"
+
 #include <array>
 #include <regex>
 #include <sstream>
@@ -18,27 +19,24 @@ using namespace InferenceEngine;
 
 namespace MKLDNNPlugin {
 
-NodeDumper::NodeDumper(int _count):
-    count(_count), dumpFormat(DUMP_FORMAT::BIN) {
-    const char* dumpDirEnv = std::getenv("OV_CPU_BLOB_DUMP_DIR");
-    if (dumpDirEnv)
-        dumpDirName = dumpDirEnv;
+NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
+    : dumpFormat(DUMP_FORMAT::BIN)
+    , dumpDirName("mkldnn_dump")
+    , count(_count) {
+    if (!config.blobDumpDir.empty())
+        dumpDirName = config.blobDumpDir;
 
-    const char* dumpFormatEnv = std::getenv("OV_CPU_BLOB_DUMP_FORMAT");
-    if (dumpFormatEnv)
-        dumpFormat = parseDumpFormat(dumpFormatEnv);
+    if (!config.blobDumpFormat.empty())
+        dumpFormat = parseDumpFormat(config.blobDumpFormat);
 
-    const char* filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
-    if (filter)
-        dumpFilters[FILTER::BY_EXEC_ID] = filter;
+    if (!config.blobDumpNodeExecId.empty())
+        dumpFilters[FILTER::BY_EXEC_ID] = config.blobDumpNodeExecId;
 
-    filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_TYPE");
-    if (filter)
-        dumpFilters[FILTER::BY_TYPE] = filter;
+    if (!config.blobDumpNodeType.empty())
+        dumpFilters[FILTER::BY_TYPE] = config.blobDumpNodeType;
 
-    filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_NAME");
-    if (filter)
-        dumpFilters[FILTER::BY_NAME] = filter;
+    if (!config.blobDumpNodeName.empty())
+        dumpFilters[FILTER::BY_NAME] = config.blobDumpNodeName;
 }
 
 void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
index 7dd1ac1f0c6..0580bee4731 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
@@ -6,6 +6,7 @@
 
 #include "mkldnn_node.h"
 #include "utils/blob_dump.h"
+#include "utils/debug_capabilities.h"
 
 #include <unordered_map>
 #include <string>
@@ -22,7 +23,7 @@ namespace MKLDNNPlugin {
  */
 class NodeDumper {
 public:
-    NodeDumper(int _count);
+    NodeDumper(const DebugCaps::Config& config, const int _count);
 
     void dumpInputBlobs(const MKLDNNNodePtr &node) const;
     void dumpOutputBlobs(const MKLDNNNodePtr &node) const;
@@ -41,11 +42,9 @@ private:
     void formatNodeName(std::string& name) const;
 
     DUMP_FORMAT dumpFormat;
-
+    std::string dumpDirName;
     int count;
 
-    std::string dumpDirName = "mkldnn_dump";
-
     enum FILTER {
         BY_EXEC_ID,
         BY_TYPE,
diff --git a/inference-engine/src/offline_transformations/include/mask_attribute.hpp b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
index 70cce141567..48c5b4ee9f0 100644
--- a/inference-engine/src/offline_transformations/include/mask_attribute.hpp
+++ b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
@@ -54,10 +54,90 @@ public:
                            });
     }
 
+    std::vector<size_t> get_not_empty_dims() {
+        std::vector<size_t> not_empty_dims;
+        for (size_t i = 0; i < this->size(); i++) {
+            if (!this->at(i).empty())
+                not_empty_dims.push_back(i);
+        }
+        return not_empty_dims;
+    }
+
     bool is_shape_like() const { return m_is_shape_like; }
 
     void set_shape_like(bool flag) { m_is_shape_like = flag; }
 
+    void copy_value_from_mask(Mask *const mask) {
+        auto cur_mask_iter = begin();
+        auto mask_iter = mask->begin();
+        while (cur_mask_iter != end() && mask_iter != mask->end()) {
+            *cur_mask_iter = *mask_iter;
+
+            cur_mask_iter++;
+            mask_iter++;
+        }
+    }
+
+    void copy_value_from_mask_reversed(Mask *const mask) {
+        auto cur_mask_iter = rbegin();
+        auto mask_iter = mask->rbegin();
+        while (cur_mask_iter != rend() && mask_iter != mask->rend()) {
+            *cur_mask_iter = *mask_iter;
+
+            cur_mask_iter++;
+            mask_iter++;
+        }
+    }
+
+    Mask::Ptr intersect_masks_reversed(Mask *const mask) {
+        auto result_mask = std::make_shared<Mask>(std::max(size(), mask->size()));
+        auto result_iter = result_mask->rbegin();
+        auto mask_1_iter = rbegin();
+        auto mask_2_iter = mask->rbegin();
+
+        while (mask_1_iter != rend() &&
+               mask_2_iter != mask->rend()) {
+            // Merge mask dimension values for both masks
+            // Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
+            for (const auto & value : *mask_1_iter) {
+                if (mask_2_iter->count(value)) {
+                    result_iter->insert(value);
+                }
+            }
+
+            result_iter++;
+            mask_1_iter++;
+            mask_2_iter++;
+        }
+        return result_mask;
+    }
+
+    Mask::Ptr union_masks_reversed(Mask *const mask) {
+        auto result_mask = std::make_shared<Mask>(std::max(size(), mask->size()));
+        auto result_iter = result_mask->rbegin();
+        auto mask_1_iter = rbegin();
+        auto mask_2_iter = mask->rbegin();
+
+        while (mask_1_iter != rend() &&
+               mask_2_iter != mask->rend()) {
+            // Union mask dimension values for both masks
+            // Example: (MaskValue[1,2,3,4], MaskValue[2, 5]) -> MaskValue[1, 2, 3, 4, 5]
+            for (const auto & value : *mask_1_iter) {
+                result_iter->insert(value);
+            }
+            for (const auto & value : *mask_2_iter) {
+                if (!result_iter->count(value)) {
+                    result_iter->insert(value);
+                }
+            }
+
+            result_iter++;
+            mask_1_iter++;
+            mask_2_iter++;
+        }
+        return result_mask;
+    }
+
     void add_callback(const std::function<bool(Mask::Ptr)> & receive_callback, Mask::Ptr mask) {
         m_callbacks[mask.get()] = receive_callback;
         m_dependencies.push_back(mask.get());
diff --git a/inference-engine/src/offline_transformations/include/pruning.hpp b/inference-engine/src/offline_transformations/include/pruning.hpp
index f398ab1713f..7831ee14246 100644
--- a/inference-engine/src/offline_transformations/include/pruning.hpp
+++ b/inference-engine/src/offline_transformations/include/pruning.hpp
@@ -14,6 +14,7 @@ namespace ngraph {
 namespace pass {
 
 class InitConstMask;
+class InitMasks;
 class PropagateMasks;
 class ShrinkWeights;
 
@@ -22,6 +23,16 @@ class Pruning;
 } // namespace pass
 } // namespace ngraph
 
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Initialising masks for pruned operations
+ */
+class ngraph::pass::InitMasks : public ngraph::pass::GraphRewrite {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    InitMasks();
+};
+
 /**
  * @ingroup ie_transformation_common_api
  * @brief Check Constant operation values by given dimensions and set
diff --git a/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp b/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
index 73929487b80..01e9520082b 100644
--- a/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
@@ -17,7 +17,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::InitConstMask, "InitConstMask", 0);
 ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet & dims,
                                            const std::function<bool(const double & value)> & condition) {
     auto constant = pattern::wrap_type<opset6::Constant>(
-            pattern::type_matches_any({element::f16, element::f32, element::f64}));
+            pattern::type_matches_any({element::i8, element::u8, element::f16, element::f32, element::f64}));
 
     matcher_pass_callback callback = [=](pattern::Matcher& m) {
         auto const_node = std::dynamic_pointer_cast<opset6::Constant>(m.get_match_root());
diff --git a/inference-engine/src/offline_transformations/src/pruning/init_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/init_masks.cpp
new file mode 100644
index 00000000000..2b4394b6c32
--- /dev/null
+++ b/inference-engine/src/offline_transformations/src/pruning/init_masks.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "pruning.hpp"
+#include "mask_attribute.hpp"
+
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/log.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::InitMasks, "InitMasks", 0);
+
+namespace ngraph {
+namespace pass {
+namespace init_masks {
+
+class InitConvMask;
+
+} // namespace init_masks
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::init_masks::InitConvMask : public MatcherPass {
+public:
+    InitConvMask() {
+        auto input = pattern::any_input();
+        auto weights = pattern::any_input();
+        auto conv = pattern::wrap_type<opset6::Convolution, opset6::GroupConvolution>({input, weights});
+
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_output = pattern_map.at(conv);
+
+            // Initializing weights mask:
+            // 1. Looking for Const node with weights
+            NodeVector weights_calculation_nodes;
+            auto cur_node = m_output.get_node()->get_input_node_shared_ptr(1);
+
+            while (!ngraph::is_type<opset6::Constant>(cur_node) && cur_node->inputs().size()) {
+                weights_calculation_nodes.push_back(cur_node);
+                cur_node = cur_node->get_input_node_shared_ptr(0);
+            }
+            if (!ngraph::is_type<opset6::Constant>(cur_node)) {
+                NGRAPH_DEBUG << "Can't find Constant weights for Convolution: " <<
+                m_output.get_node()->get_friendly_name() << std::endl;
+                return false;
+            }
+
+            // 2. Init mask for Const node
+            InitConstMask({0}/* check only output channels dim */).apply(cur_node);
+            return true;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "ConvolutionInitMask");
+        register_matcher(m, callback);
+    }
+};
+
+
+ngraph::pass::InitMasks::InitMasks() {
+    add_matcher<init_masks::InitConvMask>();
+}
+
diff --git a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
index ac7a8e8b685..424b6ae9583 100644
--- a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
@@ -7,7 +7,9 @@
 
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/opsets/opset6.hpp>
+#include <ngraph/opsets/opset5.hpp>
 #include <ngraph/log.hpp>
+#include <ngraph/rt_info.hpp>
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::PropagateMasks, "PropagateMasks", 0);
 
@@ -20,11 +22,23 @@ class GroupConvolution;
 class Elementwise;
 class PassThrough;
 class StopPropagation;
+class FakeQuantize;
+class Concat;
+class Reshape;
 
 } // namespace mask_propagation
 } // namespace pass
 } // namespace ngraph
 
+ngraph::Shape broadcast_shape_to_rank(ngraph::Shape shape_to_broadcast, int64_t dst_rank) {
+    auto initial_rank = static_cast<int64_t>(shape_to_broadcast.size());
+    auto num_of_broadcased_dims = dst_rank - initial_rank;
+    std::vector<size_t> dims(num_of_broadcased_dims, 1);
+    dims.insert(dims.end(), shape_to_broadcast.begin(), shape_to_broadcast.end());
+    auto new_shape = ngraph::Shape(dims);
+    return new_shape;
+}
+
 class ngraph::pass::mask_propagation::Convolution : public MatcherPass {
 public:
     Convolution() {
@@ -38,12 +52,15 @@ public:
             const auto & m_output = pattern_map.at(conv);
             const auto & m_input = pattern_map.at(input);
 
-            // In case if weights are Constant we initialize Mask
-            InitConstMask({0}/* check only output channel */).apply(m_weights.get_node_shared_ptr());
-
             auto weights_mask = getMask(m_weights);
-            // If weights are not a Constant and we didn't set Mask value before we will get nullptr
-            if (!weights_mask) return false;
+
+            // Nullptr in weights-mask means that mask for this node wasn't initialized earlier.
+            // Weights mask for convolution should be initialized in the InitMasks pass (and propagate after it).
+            // If mask isn't initialized - this weights (and hence all convolution) can't be pruned for some reason.
+            if (!weights_mask) {
+                NGRAPH_DEBUG << "No weights mask for " << m_output.get_node()->get_friendly_name() << "\n";
+                return false;
+            }
             auto weights_mask_row = weights_mask.get();
 
             if (auto input_mask = getMask(m_input)) {
@@ -119,9 +136,15 @@ public:
 
             auto weights_mask = getMask(m_weights);
             if (!weights_mask) {
-                // TODO: only if weights are constant
-                weights_mask = std::make_shared<Mask>(weights_shape.size());
-                setMask(m_weights, weights_mask);
+                // Setting mask only if weights are constant
+                if (ngraph::is_type<opset6::Constant>(m_output.get_node_shared_ptr())) {
+                    weights_mask = std::make_shared<Mask>(weights_shape.size());
+                    setMask(m_weights, weights_mask);
+                } else {
+                    NGRAPH_DEBUG << "GroupConvolution: No weights mask and weights aren't constant for " <<
+                    *m_output.get_node() << "\n";
+                    return false;
+                }
             }
             auto weights_mask_row = weights_mask.get();
 
@@ -169,13 +192,85 @@ public:
     }
 };
 
+class ngraph::pass::mask_propagation::Reshape : public MatcherPass {
+public:
+    Reshape() {
+        auto input = pattern::any_input(pattern::has_static_shape());
+        auto shape = pattern::any_input();
+        // Working only for Reshapes on Group Convolution weights
+        auto reshape = pattern::wrap_type<opset6::Reshape>({input, shape}, pattern::consumers_count(1));
+        auto gconv = pattern::wrap_type<opset6::GroupConvolution>({pattern::any_input(), reshape},
+                                                                  pattern::has_static_shape());
+
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_shape = pattern_map.at(shape);
+            const auto & m_output = pattern_map.at(reshape);
+            const auto & m_input = pattern_map.at(input);
+
+            auto shape_val = m_shape.get_node_shared_ptr();
+
+            // In Depthwise Convolutions Reshape on weights just add additional dimension for output channels count
+            // (1 in case of the depthwise) of kernel.
+            // Example: Reshape from [G, 1 (I), X, Y, Z] -> [G, 1 (O), 1 (I), X, Y, Z], where G - group numbers,
+            // X, Y, Z -  spartial dimensions (can be only X or X, Y), I, O - number of input/output channels of kernel.
+
+            // Checking that matched Reshape meets this conditions (add 1-d dim on 1 position of shape constant)
+            auto inp_shape = m_input.get_shape();
+            auto out_shape = m_output.get_shape();
+            inp_shape.insert(inp_shape.begin() + 1, 1);
+            if (inp_shape != out_shape) {
+                return false;
+            }
+
+            auto input_mask = getMask(m_input);
+            if (!input_mask) {
+                return false;
+            }
+            auto input_mask_row = input_mask.get();
+            auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
+            auto output_mask_row = output_mask.get();
+
+            // Depthwise Convolution pruned only by input channels (== groups) ->
+            // Propagating mask from Group (0) dim in Reshape input to Group (0) dim in Reshape output and back
+            input_mask->add_callback([output_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->at(0) = output_mask_row->at(0);
+                return true;
+            }, output_mask);
+            output_mask->add_callback([input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->at(0) = input_mask_row->at(0);
+                return true;
+            }, input_mask);
+            input_mask->apply_callback(output_mask);
+
+            // To allow pruning on weights (allow reshape input Group (0) dim changing) replace Reshape Shape constant
+            // [G, 1, 1, X, Y, Z] by [-1, 1, 1, X, Y, Z].
+            auto old_shape_const = std::dynamic_pointer_cast<opset6::Constant>(m_shape.get_node_shared_ptr());
+            auto shape_value = old_shape_const.get()->cast_vector<int64_t>();
+            shape_value[0] = -1;
+            auto new_const = opset6::Constant::create(old_shape_const->get_element_type(),
+                                                      old_shape_const->get_shape(), shape_value);
+            new_const->set_friendly_name(old_shape_const->get_friendly_name());
+            ngraph::copy_runtime_info(old_shape_const, new_const);
+            ngraph::replace_node(old_shape_const, new_const);
+
+            setMask(m_output, output_mask);
+            return true;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(reshape, "ReshapeMaskPropagation");
+        register_matcher(m, callback);
+    }
+};
+
 class ngraph::pass::mask_propagation::Elementwise : public MatcherPass {
 public:
     Elementwise() {
         auto input = pattern::any_input();
         auto weights = pattern::any_input();
-        auto eltwise = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>({input, weights},
-                                                                                 pattern::has_static_rank());
+        auto eltwise = pattern::wrap_type<opset6::Add, opset6::Subtract, opset6::Maximum, opset6::Minimum,
+        opset6::Multiply>({input, weights}, pattern::has_static_rank());
+        // TODO: add Div, Power support
 
         ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
             const auto & pattern_map = m.get_pattern_value_map();
@@ -183,82 +278,275 @@ public:
             const auto & m_output = pattern_map.at(eltwise);
             const auto & m_input = pattern_map.at(input);
 
-            // TODO: implement check that compares input shape ranks
+            // Case when input masks should be united instead of intersection
+            bool union_eltwise_type = ngraph::is_type<opset6::Multiply>(m_output.get_node_shared_ptr());
+
             const auto & input_rank = m_input.get_partial_shape().rank().get_length();
             const auto & weights_rank = m_weights.get_partial_shape().rank().get_length();
+            // Here assuming that masks can be propagated only through 3/4 dimensional tensors
+            // (since channel dim is necessary)
             if (weights_rank < 3 || input_rank < 3) return false;
 
-            // In case if one of the inputs is constant
-            // TODO: need to find channel dimension instead of hardcoded zero
-            const size_t & channel_dim = (input_rank == weights_rank ? 1 : 0);
-            InitConstMask({channel_dim}).apply(m_input.get_node_shared_ptr());
-            InitConstMask({channel_dim}).apply(m_weights.get_node_shared_ptr());
+            // In case if first of the inputs is constant
+            InitConstMask({0, 1/* potential output channel dim */}).apply(m_input.get_node_shared_ptr());
+            auto input_mask = getMask(m_input);
+            if (!input_mask) {
+                NGRAPH_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
+                return false;
+            }
+
+            InitConstMask({0, 1}).apply(m_weights.get_node_shared_ptr());
 
             auto weights_mask = getMask(m_weights);
-            auto input_mask = getMask(m_input);
-
-            if (!weights_mask || !input_mask) {
-                NGRAPH_DEBUG << "No mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
+            if (!weights_mask) {
+                NGRAPH_DEBUG << "No weights mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
                 return false;
             }
             auto input_mask_row = input_mask.get();
             auto weights_mask_row = weights_mask.get();
 
-            // Merge masks from two inputs
+            // Merging masks from two inputs
             auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
             auto output_mask_row = output_mask.get();
 
-            auto out_mask_callback = [input_mask_row, weights_mask_row](Mask::Ptr cur_mask) -> bool {
-                auto omask_iter = cur_mask->rbegin();
-                auto imask_iter = input_mask_row->rbegin();
-                auto wmask_iter = weights_mask_row->rbegin();
-
-                for (auto & item : *cur_mask) {
-                    item.clear();
-                }
-
-                while (imask_iter != input_mask_row->rend() &&
-                       wmask_iter != weights_mask_row->rend()) {
-                    // Merge mask dimension values for both masks
-                    // Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
-                    for (const auto & value : *imask_iter) {
-                        if (wmask_iter->count(value)) {
-                            omask_iter->insert(value);
-                        }
-                    }
-
-                    omask_iter++;
-                    imask_iter++;
-                    wmask_iter++;
+            auto out_mask_callback = [input_mask_row, weights_mask_row, union_eltwise_type](Mask::Ptr cur_mask) -> bool {
+                Mask::Ptr result_mask;
+                if (union_eltwise_type) {
+                    result_mask = input_mask_row->union_masks_reversed(weights_mask_row);
+                } else {
+                    result_mask = input_mask_row->intersect_masks_reversed(weights_mask_row);
                 }
+                cur_mask->copy_value_from_mask_reversed(result_mask.get());
                 return true;
             };
             output_mask->add_callback(out_mask_callback, input_mask);
-            output_mask->add_callback(out_mask_callback, weights_mask);
 
-            auto callback = [output_mask_row](Mask::Ptr cur_mask) -> bool {
-                auto omask_iter = output_mask_row->rbegin();
-                auto cmask_iter = cur_mask->rbegin();
-                while (omask_iter != output_mask_row->rend() &&
-                       cmask_iter != cur_mask->rend()) {
-                    // TODO: check
-                    *cmask_iter = *omask_iter;
-
-                    omask_iter++;
-                    cmask_iter++;
-                }
+            input_mask->add_callback([weights_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask_reversed(weights_mask_row);
                 return true;
-            };
-            input_mask->add_callback(callback, output_mask);
-            weights_mask->add_callback(callback, output_mask);
+            }, weights_mask);
+            input_mask->add_callback([output_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask_reversed(output_mask_row);
+                return true;
+            }, output_mask);
+            weights_mask->add_callback([input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask_reversed(input_mask_row);
+                return true;
+            }, input_mask);
 
-            // Init output mask
             output_mask->apply_callback(input_mask);
+            weights_mask->apply_callback(input_mask);
+
             setMask(m_output, output_mask);
             return true;
         };
 
-        auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise, "EltwiseMaskPropagation");
+        auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise, "ElementwiseMaskPropagation");
+        register_matcher(m, callback);
+    }
+};
+
+class ngraph::pass::mask_propagation::FakeQuantize : public MatcherPass{
+public:
+    FakeQuantize(){
+        auto input = pattern::any_input(pattern::has_static_shape());
+        auto input_low = pattern::any_input(pattern::has_static_shape());
+        auto input_high = pattern::any_input(pattern::has_static_shape());
+        auto output_low = pattern::any_input(pattern::has_static_shape());
+        auto output_high = pattern::any_input(pattern::has_static_shape());
+        auto fake_quantize = pattern::wrap_type<opset6::FakeQuantize>({input, input_low, input_high, output_low,
+                                                                            output_high});
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_input = pattern_map.at(input);
+            const auto & m_input_low = pattern_map.at(input_low);
+            const auto & m_input_high = pattern_map.at(input_high);
+            const auto & m_output_low = pattern_map.at(output_low);
+            const auto & m_output_high = pattern_map.at(output_high);
+            const auto & m_output = pattern_map.at(fake_quantize);
+
+            auto input_mask = getMask(m_input);
+
+            // Input mask is the only source of pruning in FQ
+            if (!input_mask) {
+                NGRAPH_DEBUG << "FakeQuantize: No input mask for " << *m_output.get_node() << "\n";
+                return false;
+            }
+
+            auto input_mask_row = input_mask.get();
+
+            // Propagate input mask to output mask and in the opposite direction
+            auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
+            auto output_mask_row = output_mask.get();
+
+            // Output mask is equal to input mask
+            auto output_mask_callback = [input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask(input_mask_row);
+                return true;
+            };
+
+            auto input_mask_callback = [output_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask(output_mask_row);
+                return true;
+            };
+
+            output_mask->add_callback(output_mask_callback, input_mask);
+            input_mask->add_callback(input_mask_callback, output_mask);
+
+            // Calculate output mask
+            output_mask->apply_callback(input_mask);
+            setMask(m_output, output_mask);
+
+            auto input_low_size = shape_size(m_input_low.get_shape());
+            auto input_high_size = shape_size(m_input_high.get_shape());
+            auto output_low_size = shape_size(m_output_low.get_shape());
+            auto output_high_size = shape_size(m_output_high.get_shape());
+
+            // In the per-tensor case FQ params shouldn't be pruned
+            if (input_low_size == 1 && output_low_size == 1 && input_high_size == 1 && output_high_size == 1) {
+                return true;
+            }
+
+            // If input/output ranges in FQ should be broadcasted to input shape -> broadcast this consant values
+            // for the convenience of working with the masks
+            NodeVector fq_params_nodes{m_input_low.get_node_shared_ptr(),
+                                                               m_input_high.get_node_shared_ptr(),
+                                                               m_output_low.get_node_shared_ptr(),
+                                                               m_output_high.get_node_shared_ptr()};
+            auto fq_node = std::dynamic_pointer_cast<op::FakeQuantize>(m_output.get_node_shared_ptr());
+            size_t idx = 0;
+            if (fq_node->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NONE) {
+                for (auto const_node : fq_params_nodes) {
+                    auto new_shape = broadcast_shape_to_rank(const_node->get_shape(),
+                                                             m_input.get_partial_shape().rank().get_length());
+                    auto const_copy = const_node->clone_with_new_inputs(const_node->input_values());
+                    auto new_const = std::dynamic_pointer_cast<op::Constant>(const_copy);
+                    new_const->set_data_shape(new_shape);
+                    new_const->validate_and_infer_types();
+                    new_const->set_friendly_name(const_node->get_friendly_name());
+                    ngraph::copy_runtime_info(const_node, new_const);
+                    ngraph::replace_node(const_node, new_const);
+                    fq_params_nodes[idx++] = new_const;
+                }
+            }
+
+            auto fq_params_mask_callback = [input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->at(1/* fq params have same shapes as input */) = input_mask_row->at(1 /* channel dim in data */);
+                return true;
+            };
+
+            for (auto fq_param : fq_params_nodes) {
+                auto mask = std::make_shared<Mask>(fq_param->get_shape().size());
+                mask->add_callback(fq_params_mask_callback, input_mask);
+                input_mask->add_callback([mask](Mask::Ptr cur_mask) -> bool {
+                    return true;
+                }, mask);
+                mask->apply_callback(input_mask);
+                setMask(fq_param->output(0), mask);
+            }
+
+            return true;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(fake_quantize, "FakeQuantizeMaskPropagation");
+        register_matcher(m, callback);
+    }
+};
+
+class ngraph::pass::mask_propagation::Concat : public MatcherPass{
+public:
+    Concat() {
+        auto concat = pattern::wrap_type<opset6::Concat>(pattern::has_static_shape());
+
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_output = pattern_map.at(concat);
+            auto concat_ptr = std::dynamic_pointer_cast<opset6::Concat>(m_output.get_node_shared_ptr());
+            auto axis = concat_ptr->get_concatenation_axis();
+
+            auto inputs = concat_ptr->inputs();
+            std::map<int64_t , Mask::Ptr> input_masks;
+            std::map<int64_t , Mask *> input_masks_row;
+            std::vector<int64_t> input_sizes;
+
+            size_t first_input_idx = 0;
+            Mask::Ptr first_input_mask;
+            bool first_initialized = false;
+            for (size_t i=0; i < inputs.size(); i++) {
+                auto input = inputs[i];
+                auto input_mask = getMask(input.get_source_output());
+                if (input_mask) {
+                    input_masks[i] = input_mask;
+                    input_masks_row[i] = input_mask.get();
+
+                    if (!first_initialized) {
+                        first_input_idx = i;
+                        first_input_mask = input_mask;
+                        first_initialized = true;
+                    }
+                }
+                input_sizes.push_back(input.get_shape().at(axis));
+            }
+
+            if (!first_initialized) {
+                return false;
+            }
+
+            auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
+            auto output_mask_row = output_mask.get();
+
+            auto out_mask_callback = [input_masks_row, input_sizes, axis](Mask::Ptr cur_mask) -> bool {
+                int64_t cur_size = 0;
+                cur_mask->at(axis).clear();
+
+                for (size_t i=0; i < input_sizes.size(); ++i) {
+                    if (input_masks_row.count(i)) {
+                        for (auto idx : input_masks_row.at(i)->at(axis)) {
+                            cur_mask->at(axis).insert(idx + cur_size);
+                        }
+                    }
+                    cur_size += input_sizes[i];
+                }
+                return true;
+            };
+
+            auto create_input_mask_callback_for_idx = [output_mask_row, input_sizes, axis](size_t input_idx){
+                auto input_mask_callback = [output_mask_row, input_sizes, axis, input_idx](Mask::Ptr cur_mask) -> bool {
+                    cur_mask->clean_dim_values();
+                    uint64_t min_val = 0;
+                    for (size_t i = 0; i < input_idx; i++) {
+                        min_val += input_sizes[i];
+                    }
+                    uint64_t max_val = min_val + input_sizes[input_idx];
+                    for (auto idx : output_mask_row->at(axis)) {
+                        if (idx < max_val && idx >= min_val) {
+                            cur_mask->at(axis).insert(idx - min_val);
+                        }
+                    }
+                    return true;
+                };
+                return input_mask_callback;
+            };
+            output_mask->add_callback(out_mask_callback, first_input_mask);
+
+            for (size_t i=0; i < inputs.size(); ++i) {
+                if (input_masks.count(i) && i != first_input_idx) {
+                    auto input_mask = input_masks.at(i);
+                    input_mask->add_callback(create_input_mask_callback_for_idx(i),
+                                             first_input_mask);
+                    first_input_mask->add_callback([](Mask::Ptr cur_mask) -> bool {
+                        return true;
+                    }, input_mask);
+                }
+            }
+            first_input_mask->add_callback(create_input_mask_callback_for_idx(first_input_idx),
+                                     output_mask);
+            output_mask->apply_callback(first_input_mask);
+            setMask(m_output, output_mask);
+
+            return true;
+        };
+        auto m = std::make_shared<ngraph::pattern::Matcher>(concat, "ConcatMaskPropagation");
         register_matcher(m, callback);
     }
 };
@@ -266,7 +554,9 @@ public:
 class ngraph::pass::mask_propagation::PassThrough : public MatcherPass {
 public:
     PassThrough() {
-        auto unary_op = pattern::wrap_type<op::util::UnaryElementwiseArithmetic, opset6::Clamp>();
+        auto unary_op = pattern::wrap_type<op::util::UnaryElementwiseArithmetic, opset6::Clamp,
+                                            opset6::Convert, opset6::ConvertLike, opset6::AvgPool, opset6::MaxPool,
+                                            opset6::ROIPooling, opset6::PSROIPooling, opset6::Pad>();
 
         ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
             const auto & pattern_map = m.get_pattern_value_map();
@@ -312,5 +602,8 @@ ngraph::pass::PropagateMasks::PropagateMasks() {
     add_matcher<mask_propagation::GroupConvolution>();
     add_matcher<mask_propagation::Elementwise>();
     add_matcher<mask_propagation::PassThrough>();
+    add_matcher<mask_propagation::FakeQuantize>();
+    add_matcher<mask_propagation::Concat>();
+    add_matcher<mask_propagation::Reshape>();
     add_matcher<mask_propagation::StopPropagation>();
 }
diff --git a/inference-engine/src/offline_transformations/src/pruning/pruning.cpp b/inference-engine/src/offline_transformations/src/pruning/pruning.cpp
index 3159e3db7db..ad7f410b8f3 100644
--- a/inference-engine/src/offline_transformations/src/pruning/pruning.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/pruning.cpp
@@ -15,8 +15,13 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::Pruning, "Pruning", 0);
 
 bool ngraph::pass::Pruning::run_on_function(std::shared_ptr<Function> f) {
     Manager manager(get_pass_config());
+
+    // Initialize masks only for Convolutions/GroupConvolutions weights (needed to init mask in source Constant of
+    // weights-calculating subgraph). For other node types masks initialized in PropagateMasks pass.
+    manager.register_pass<InitMasks>();
     manager.register_pass<PropagateMasks>();
 
+
 #ifdef NGRAPH_DEBUG_ENABLE
     // VisualizeTree modifier helps to print Masks and mark nodes with masks
     /*
diff --git a/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp b/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp
index 80c2abbb709..08e7ef152c0 100644
--- a/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp
@@ -54,6 +54,8 @@ bool ngraph::pass::ShrinkWeights::run_on_function(std::shared_ptr<ngraph::Functi
             for (size_t dim = 0; dim < mask->size(); ++dim) {
                 const auto &dim_size = mask->at(dim).size();
                 if (dim_size == 0) continue;
+                // Broadcastable 1-size dimension shouldn't be shrank with mask
+                if (const_node->get_shape().at(dim) == 1 && dim_size > 1) continue;
 
                 // Convert dims that we want remove to dims that we need to keep
                 std::vector<int64_t> dims_to_keep;
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
index 58951410383..1f3eb681e4b 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
@@ -140,14 +140,6 @@ protected:
     virtual std::shared_ptr<IInferRequestInternal> CreateInferRequestImpl(InputsDataMap networkInputs,
                                                                           OutputsDataMap networkOutputs);
 
-    /**
-     * @brief Exports an internal hardware-dependent model to a stream.
-     * @note The function is called from IExecutableNetworkInternal::Export(std::ostream&),
-     * which performs common export first and calls this plugin-dependent implementation after.
-     * @param networkModel A stream to export network to.
-     */
-    virtual void ExportImpl(std::ostream& networkModel);
-
     InferenceEngine::InputsDataMap _networkInputs;  //!< Holds information about network inputs info
     InferenceEngine::OutputsDataMap _networkOutputs;  //!< Holds information about network outputs data
 
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
index 22c2f7e1c08..56e3e1cf5f7 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -286,29 +286,12 @@ protected:
                                                                            const std::map<std::string, std::string>& config);
 
     /**
-     * @brief Creates an executable network from an previously exported network
-     * @note The function is called from
-     * IInferencePlugin::ImportNetwork(std::istream&, const RemoteContext::Ptr&, const std::map<std::string, std::string>&)
-     * performs common steps first and calls this plugin-dependent implementation after.
-     * @param networkModel Reference to network model output stream
-     * @param config A string -> string map of parameters
-     * @return An Executable network
+     * @brief Set input and output information to executable network. This method is used to
+     * set addtional information to InferenceEngine::IExecutableNetworkInternal create by device plugin.
+     * @param exeNetwork An executable network object to set information to
+     * @param inputs An input information to set
+     * @param outputs An output information to set
      */
-    virtual std::shared_ptr<IExecutableNetworkInternal> ImportNetworkImpl(std::istream& networkModel,
-                                                                          const std::map<std::string, std::string>& config);
-
-    /**
-     * @brief Imports network wit RemoteContext
-     * @param networkModel Reference to network model output stream
-     * @param context - a pointer to plugin context derived from RemoteContext class used to
-     *        execute the network
-     * @param config A string -> string map of parameters
-     * @return An Executable network
-     */
-    virtual std::shared_ptr<IExecutableNetworkInternal> ImportNetworkImpl(std::istream& networkModel,
-                                                                          const std::shared_ptr<RemoteContext>& context,
-                                                                          const std::map<std::string, std::string>& config);
-
     void SetExeNetworkInfo(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork,
                            const ConstInputsDataMap& inputs,
                            const ConstOutputsDataMap& outputs);
diff --git a/inference-engine/src/plugin_api/ie_icore.hpp b/inference-engine/src/plugin_api/ie_icore.hpp
index 70d7aaff3a5..fb4ac0b3423 100644
--- a/inference-engine/src/plugin_api/ie_icore.hpp
+++ b/inference-engine/src/plugin_api/ie_icore.hpp
@@ -141,18 +141,6 @@ public:
     virtual ~ICore() = default;
 };
 
-/**
- * @brief Type of magic value
- * @ingroup ie_dev_api_plugin_api
- */
-using ExportMagic = std::array<char, 4>;
-
-/**
- * @brief Magic number used by ie core to identify exported network with plugin name
- * @ingroup ie_dev_api_plugin_api
- */
-constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}};
-
 /**
  * @private
  */
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
index d26132abf2e..892c2e6bae4 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@@ -1011,8 +1011,6 @@ void V10Parser::parsePreProcess(
     if (!meanSegmentPrecision || meanSegmentPrecision == Precision::MIXED)
         IE_THROW() << "mean blob defined without specifying precision.";
 
-    InferenceEngine::PreProcessChannel::Ptr preProcessChannel;
-
     int lastChanNo = -1;
     std::unordered_set<int> idsForMeanImage;
 
@@ -1022,7 +1020,6 @@ void V10Parser::parsePreProcess(
             IE_THROW() << "Pre-process channel id invalid: " << chanNo;
         }
         lastChanNo = chanNo;
-        preProcessChannel = pp[chanNo];
 
         auto meanNode = chan.child("mean");
         if (!meanNode.empty()) {
@@ -1038,13 +1035,15 @@ void V10Parser::parsePreProcess(
                                        << " extpecting " << width << " x " << height << " x "
                                        << meanSegmentPrecision.size();
                 }
-                preProcessChannel->meanData = make_blob_with_precision(
+                auto meanData = make_blob_with_precision(
                     TensorDesc(meanSegmentPrecision, {height, width}, Layout::HW));
-                preProcessChannel->meanData->allocate();
-                auto lockedMem = preProcessChannel->meanData->buffer();
+                meanData->allocate();
+                auto lockedMem = meanData->buffer();
                 char* data = lockedMem.as<char*>();
                 uint8_t* src_data = weights->cbuffer().as<uint8_t*>() + offset;
                 memcpy(data, src_data, size);
+
+                pp.setMeanImageForChannel(meanData, chanNo);
             }
         }
     }
diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp
new file mode 100644
index 00000000000..1ec1ffe628e
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+    class TRANSFORMATIONS_API GatherNegativeConstIndicesNormalize;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief GatherNegativeConstIndicesNormalize checks if indices value is negative scalar and
+ * normalizes it using ShapeOf->Add->Cast subgraph.
+ * We need to remove this transformation after adding support of negative indices in
+ * future version of Gather operation.
+ */
+class ngraph::pass::GatherNegativeConstIndicesNormalize : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    GatherNegativeConstIndicesNormalize();
+};
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index 79f1dee8882..4ab5cf1e80d 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -70,6 +70,7 @@
 #include "transformations/op_conversions/log_softmax_decomposition.hpp"
 #include "transformations/op_conversions/mvn6_decomposition.hpp"
 #include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp"
+#include "transformations/op_conversions/gather_normalize_negative_indices.hpp"
 
 #include <ngraph/pass/manager.hpp>
 #include <ngraph/pass/constant_folding.hpp>
@@ -157,6 +158,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
     decomp->add_matcher<ngraph::pass::MVN6Decomposition>();
     decomp->add_matcher<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
     decomp->add_matcher<ngraph::pass::EinsumDecomposition>();
+    decomp->add_matcher<ngraph::pass::GatherNegativeConstIndicesNormalize>();
     decomp->set_name("ngraph::pass::CommonDecompositions");
 
     // CF is required after all decompositions
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
index 94173079c62..36a58551a68 100644
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
@@ -68,14 +68,20 @@ ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolat
             return false;
         }
         attrsV4.shape_calculation_mode = ngraph::opset4::Interpolate::ShapeCalcMode::sizes;
-        attrsV4.nearest_mode = ngraph::opset4::Interpolate::NearestMode::round_prefer_floor;
+        attrsV4.nearest_mode = ngraph::opset4::Interpolate::NearestMode::simple;
         attrsV4.pads_begin = attrsV0.pads_begin;
         attrsV4.pads_end = attrsV0.pads_end;
         attrsV4.antialias = attrsV0.antialias;
-        attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::half_pixel;
+        attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::asymmetric;
         attrsV4.cube_coeff = -0.75f;
         if (attrsV0.align_corners) {
             attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::align_corners;
+        } else if ((attrsV4.mode == ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx ||
+                    attrsV4.mode == ngraph::op::v4::Interpolate::InterpolateMode::linear) &&
+                    std::all_of(attrsV4.pads_begin.begin(), attrsV4.pads_begin.end(), [](size_t i){return i == 0;}) &&
+                    std::all_of(attrsV4.pads_end.begin(), attrsV4.pads_end.end(), [](size_t i){return i == 0;}) &&
+                    !(input_shape_rank - 2 == 2 && attrsV0.axes == AxisSet{2, 3})) {
+            attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::half_pixel;
         }
 
         auto interpolateV4 = std::make_shared<ngraph::opset4::Interpolate>(interpolationV0->input_value(0), interpolationV0->input_value(1),
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp
new file mode 100644
index 00000000000..86713451869
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/gather_normalize_negative_indices.hpp"
+
+#include <memory>
+
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "itt.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::GatherNegativeConstIndicesNormalize, "GatherNegativeConstIndicesNormalize", 0);
+
+ngraph::pass::GatherNegativeConstIndicesNormalize::GatherNegativeConstIndicesNormalize() {
+    MATCHER_SCOPE(GatherNegativeConstIndicesNormalize);
+    auto data_input = ngraph::pattern::any_input(pattern::has_static_rank());
+    auto axis_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto indices_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto gather_node = std::make_shared<ngraph::opset7::Gather>(data_input, indices_input, axis_input);
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        auto& pattern_to_output = m.get_pattern_value_map();
+        auto gather = std::dynamic_pointer_cast<ngraph::opset7::Gather>(pattern_to_output.at(gather_node).get_node_shared_ptr());
+        auto data = pattern_to_output.at(data_input);
+        auto axis_constant = std::dynamic_pointer_cast<ngraph::opset7::Constant>(pattern_to_output.at(axis_input).get_node_shared_ptr());
+        auto indices_constant = std::dynamic_pointer_cast<ngraph::opset7::Constant>(pattern_to_output.at(indices_input).get_node_shared_ptr());
+
+        if (!gather || !axis_constant || !indices_constant) {
+            return false;
+        }
+
+        auto indices = indices_constant->cast_vector<int64_t>();
+        if (indices.size() != 1 || indices[0] >= 0) {
+            return false;
+        }
+
+        auto axis = axis_constant->cast_vector<int64_t>();
+        if (axis.size() != 1) {
+            return false;
+        }
+
+        auto axis_value = axis[0];
+
+        // normalize `axis` value if it is negative
+        if (axis_value < 0) {
+            axis_value = axis_value + data.get_partial_shape().rank().get_length();
+        }
+
+        if (data.get_partial_shape().rank().get_length() < axis_value) {
+            return false;
+        }
+
+        // check `axis` dimension of data tensor is static
+        if (!data.get_partial_shape()[axis_value].is_static()) {
+            return false;
+        }
+
+        auto input_type = indices_constant->get_element_type();
+        auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, input_type);
+        auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
+            ngraph::opset7::Constant::create(input_type, Shape{}, {axis_value}), ngraph::opset7::Constant::create(input_type, Shape{}, {0}));
+
+        auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices_constant);
+        auto gather_new = gather_node->copy_with_new_inputs({data, add, axis_constant});
+        gather_new->set_friendly_name(gather->get_friendly_name());
+
+        ngraph::copy_runtime_info(gather, {shape_of, input_gather, add, gather_new});
+        ngraph::replace_node(gather, gather_new);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(gather_node, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
index 213e06ee1f5..27e703ec4f5 100644
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
@@ -92,7 +92,7 @@ bool checkGrowingOutput(const Model& model) {
         return false;
     }
 
-    static const float SCALE_THRESHOLD = 0.125f;
+    static const float SCALE_THRESHOLD = 0.1f;
 
     for (const auto& stage : model->getStages()) {
         if (stage->type() != StageType::Power &&
@@ -248,14 +248,13 @@ void PassImpl::run(const Model& model) {
                 if (firstStage && shift < 4 && isGrowingOutput && weights->desc().dim(Dim::C) > 1) {
                     normalVal = 5;
                 }
-
                 shift = correctShift(shift, firstStage, stage->origLayer()->type);
                 shift -= normalVal;
             }
 
             firstStage = false;
             scale = 1;
-            if (shift > scaleThreshold) {
+            if (shift >= scaleThreshold) {
                 scale = static_cast<float>(1ULL << static_cast<std::uint32_t>(shift));
             }
 
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
index 8a6c42c4e97..22824ee5ec1 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
@@ -93,16 +93,6 @@ public:
         model.write(_graphBlob.data(), _graphBlob.size());
     }
 
-    void Export(const std::string &modelFileName) override {
-        std::ofstream modelFile(modelFileName, std::ios::out | std::ios::binary);
-
-        if (modelFile.is_open()) {
-            Export(modelFile);
-        } else {
-            IE_THROW() << "The " << modelFileName << " file can not be opened for export";
-        }
-    }
-
     ie::Parameter GetMetric(const std::string &name) const override;
 
     ie::CNNNetwork GetExecGraphInfo() override;
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
index f61b9fbf7fb..75e7ef395d9 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
@@ -151,20 +151,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
     return executableNetwork;
 }
 
-InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
-        const std::string& modelFileName,
-        const std::map<std::string, std::string>& config) {
-    VPU_PROFILE(ImportNetwork);
-
-    std::ifstream blobFile(modelFileName, std::ios::binary);
-
-    if (!blobFile.is_open()) {
-        IE_THROW(NetworkNotRead);
-    }
-
-    return ImportNetwork(blobFile, config);
-}
-
 InferenceEngine::Parameter Engine::GetMetric(const std::string& name,
                                      const std::map<std::string, InferenceEngine::Parameter> & options) const {
     const auto mvnc = _mvnc;
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
index 07349f637e2..9fb074b5ac1 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
@@ -37,10 +37,6 @@ public:
 
     using ie::IInferencePlugin::ImportNetwork;
 
-    ie::IExecutableNetworkInternal::Ptr ImportNetwork(
-            const std::string& modelFileName,
-            const std::map<std::string, std::string>& config) override;
-
     ie::IExecutableNetworkInternal::Ptr ImportNetwork(
             std::istream& model,
             const std::map<std::string, std::string>& config) override;
diff --git a/inference-engine/tests/functional/inference_engine/caching_test.cpp b/inference-engine/tests/functional/inference_engine/caching_test.cpp
index dd19dd3815d..cad8bd4428e 100644
--- a/inference-engine/tests/functional/inference_engine/caching_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/caching_test.cpp
@@ -43,17 +43,18 @@ enum class TestLoadType {
     EContext,
     EModelName
 };
+
 using TestParam = std::tuple<TestLoadType, std::string, bool>;
 
 //  GCC4.8 limitation: have to specify type of each element in list
 static const std::vector<TestParam> loadVariants = {
-        TestParam { TestLoadType::ECNN, std::string("ByCNNNetwork"), false },
-        TestParam { TestLoadType::EContext, std::string("ByRemoteContext"), true },
-        TestParam { TestLoadType::EModelName, std::string("ByModelName"), false },
+    TestParam { TestLoadType::ECNN, std::string("ByCNNNetwork"), false },
+    TestParam { TestLoadType::EContext, std::string("ByRemoteContext"), true },
+    TestParam { TestLoadType::EModelName, std::string("ByModelName"), false },
 };
 
 static const std::vector<std::string> cacheFolders {
-        std::string("testCache"),
+    std::string("testCache"),
 };
 
 std::string getTestCaseName(const testing::TestParamInfo<std::tuple<TestParam, std::string>> &obj) {
@@ -100,12 +101,12 @@ public:
 
     MOCK_CONST_METHOD0(OnLoadNetworkFromFile, void(void));
 
-    MOCK_METHOD2(ImportNetworkImpl, std::shared_ptr<IExecutableNetworkInternal>(std::istream& networkModel,
-                                                                                const std::map<std::string, std::string>& config));
+    MOCK_METHOD2(ImportNetwork, IExecutableNetworkInternal::Ptr(std::istream& networkModel,
+                                                               const std::map<std::string, std::string>& config));
 
-    MOCK_METHOD3(ImportNetworkImpl, std::shared_ptr<IExecutableNetworkInternal>(std::istream& networkModel,
-                                                                                const RemoteContext::Ptr& context,
-                                                                                const std::map<std::string, std::string>& config));
+    MOCK_METHOD3(ImportNetwork, IExecutableNetworkInternal::Ptr(std::istream& networkModel,
+                                                                const RemoteContext::Ptr& context,
+                                                                const std::map<std::string, std::string>& config));
 
     MOCK_CONST_METHOD2(QueryNetwork, QueryNetworkResult(const CNNNetwork& network,
                                                         const std::map<std::string, std::string>& config));
@@ -120,7 +121,7 @@ class MockExecutableNetwork : public IExecutableNetworkInternal {
 
 public:
     MockExecutableNetwork() {}
-    MOCK_METHOD1(ExportImpl, void(std::ostream& networkModel));
+    MOCK_METHOD1(Export, void(std::ostream& networkModel));
     MOCK_METHOD0(CreateInferRequest, IInferRequestInternal::Ptr());
     MOCK_CONST_METHOD0(GetInputsInfo, ConstInputsDataMap());
     MOCK_CONST_METHOD0(GetOutputsInfo, ConstOutputsDataMap());
@@ -130,10 +131,10 @@ public:
     MOCK_METHOD1(setNetworkInputs, void(const InputsDataMap& networkInputs));
     MOCK_METHOD1(setNetworkOutputs, void(const OutputsDataMap& networkOutputs));
 
-    void Export(std::ostream& networkModel) override {
-        std::lock_guard<std::mutex> guard(m_pluginMutex);
-        IExecutableNetworkInternal::Export(networkModel);
-    }
+    // void Export(std::ostream& networkModel) override {
+    //     std::lock_guard<std::mutex> guard(m_pluginMutex);
+    //     IExecutableNetworkInternal::Export(networkModel);
+    // }
 
     void SetPointerToPlugin(const IInferencePlugin::Ptr& plugin) override {
         std::lock_guard<std::mutex> guard(m_pluginMutex);
@@ -323,13 +324,13 @@ private:
         ON_CALL(plugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).
                 WillByDefault(Return("mock"));
 
-        ON_CALL(plugin, ImportNetworkImpl(_, _, _)).
+        ON_CALL(plugin, ImportNetwork(_, _, _)).
                 WillByDefault(Invoke([&](std::istream &istr, RemoteContext::Ptr,
                                          const std::map<std::string, std::string> &) {
             return createMockIExecutableNet();
         }));
 
-        ON_CALL(plugin, ImportNetworkImpl(_, _)).
+        ON_CALL(plugin, ImportNetwork(_, _)).
                 WillByDefault(Invoke([&](std::istream &istr, const std::map<std::string, std::string> &) {
             return createMockIExecutableNet();
         }));
@@ -403,9 +404,9 @@ TEST_P(CachingTest, TestLoad) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -415,9 +416,9 @@ TEST_P(CachingTest, TestLoad) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -426,43 +427,43 @@ TEST_P(CachingTest, TestLoad) {
 }
 
 TEST_P(CachingTest, TestLoadCustomImportExport) {
-    const int customNumber = 1234;
+    const char customData[] = {1, 2, 3, 4, 5};
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).
+    ON_CALL(*mockPlugin, ImportNetwork(_, _, _)).
             WillByDefault(Invoke([&](std::istream& s, RemoteContext::Ptr,
                                      const std::map<std::string, std::string> &) {
-        int a;
-        s >> a;
-        EXPECT_EQ(customNumber, a);
+        char a[sizeof(customData)];
+        s.read(a, sizeof(customData));
+        EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
         auto mock = std::make_shared<MockExecutableNetwork>();
         EXPECT_CALL(*mock, GetInputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstInputsDataMap{}));
         EXPECT_CALL(*mock, GetOutputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstOutputsDataMap{}));
         return mock;
     }));
 
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _)).
+    ON_CALL(*mockPlugin, ImportNetwork(_, _)).
             WillByDefault(Invoke([&](std::istream &s, const std::map<std::string, std::string> &) {
-        int a;
-        s >> a;
-        EXPECT_EQ(customNumber, a);
+        char a[sizeof(customData)];
+        s.read(a, sizeof(customData));
+        EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
         auto mock = std::make_shared<MockExecutableNetwork>();
         EXPECT_CALL(*mock, GetInputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstInputsDataMap{}));
         EXPECT_CALL(*mock, GetOutputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstOutputsDataMap{}));
         return mock;
     }));
 
-    ON_CALL(*net, ExportImpl(_)).WillByDefault(Invoke([&] (std::ostream& s) {
-        s << customNumber;
+    ON_CALL(*net, Export(_)).WillByDefault(Invoke([&] (std::ostream& s) {
+        s.write(customData, sizeof(customData));
     }));
 
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -472,9 +473,9 @@ TEST_P(CachingTest, TestLoadCustomImportExport) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -497,9 +498,9 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunctionWithCfg(ie, {{CUSTOM_KEY, "0"}});
@@ -509,9 +510,9 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunctionWithCfg(ie, {{CUSTOM_KEY, "1"}});
@@ -526,9 +527,9 @@ TEST_P(CachingTest, TestNoCacheEnabled) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             m_testFunction(ie);
         });
@@ -544,9 +545,9 @@ TEST_P(CachingTest, TestNoCacheSupported) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(m_type == TestLoadType::EModelName ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -563,9 +564,9 @@ TEST_P(CachingTest, TestNoCacheMetricSupported) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(m_type == TestLoadType::EModelName ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -652,8 +653,8 @@ TEST_P(CachingTest, TestNoCacheEnabled_cacheDirConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
         testLoad([&](Core &ie) {
             m_testFunction(ie);
         });
@@ -667,9 +668,9 @@ TEST_P(CachingTest, TestLoadChangeCacheDir) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -681,9 +682,9 @@ TEST_P(CachingTest, TestLoadChangeCacheDir) {
         MkDirGuard dir(newCacheDir);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), newCacheDir}});
             m_testFunction(ie);
@@ -698,9 +699,9 @@ TEST_P(CachingTest, TestClearCacheDir) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), ""}});
@@ -716,9 +717,9 @@ TEST_P(CachingTest, TestChangeOtherConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"someKey", "someValue"}});
@@ -735,9 +736,9 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -747,9 +748,9 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_ANY_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir + "/" + longName}}));
@@ -769,9 +770,9 @@ TEST_P(CachingTest, TestCacheDirCreateRecursive) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), newCacheDir3}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -797,9 +798,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.0";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -810,9 +811,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.1";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -822,9 +823,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.50";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -835,9 +836,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.51";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -856,9 +857,9 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.0";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -869,9 +870,9 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.50";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -887,9 +888,9 @@ TEST_P(CachingTest, TestThrowOnExport) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1).WillOnce(Throw(1));
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1).WillOnce(Throw(1));
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_ANY_THROW(m_testFunction(ie));
@@ -906,9 +907,9 @@ TEST_P(CachingTest, TestThrowOnImport) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -918,13 +919,13 @@ TEST_P(CachingTest, TestThrowOnImport) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
         if (m_remoteContext) {
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(1).WillOnce(Throw(1));
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
         } else {
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1).WillOnce(Throw(1));
         }
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -933,9 +934,9 @@ TEST_P(CachingTest, TestThrowOnImport) {
     { // Step 3: same load, cache is re-created on export on step 2 and shall be successfully imported now
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -951,9 +952,9 @@ TEST_P(CachingTest, TestNetworkModified) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -974,9 +975,9 @@ TEST_P(CachingTest, TestNetworkModified) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -985,9 +986,9 @@ TEST_P(CachingTest, TestNetworkModified) {
     { // Step 3: same load, should be ok now
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1003,9 +1004,9 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1021,9 +1022,9 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     { // Step 2. Cache is corrupted, will be silently removed
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1032,9 +1033,9 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     { // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1050,9 +1051,9 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1083,9 +1084,9 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     { // Step 2. Build number mismatch, cache will be silently removed
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1094,9 +1095,9 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     { // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1118,9 +1119,9 @@ TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
     for (int i = 0; i < 2; i++) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1138,9 +1139,9 @@ TEST_P(CachingTest, LoadHetero_OneDevice) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1152,9 +1153,9 @@ TEST_P(CachingTest, LoadHetero_OneDevice) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1172,9 +1173,9 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"TARGET_FALLBACK", "mock"}}, CommonTestUtils::DEVICE_HETERO);
@@ -1187,9 +1188,9 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"TARGET_FALLBACK", "mock"}}, CommonTestUtils::DEVICE_HETERO);
@@ -1200,20 +1201,20 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
 
 TEST_P(CachingTest, LoadHetero_MultiArchs) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
-    int customNumber = 1234;
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _)).
+    const char customData[] = {1, 2, 3, 4, 5};
+    ON_CALL(*mockPlugin, ImportNetwork(_, _)).
             WillByDefault(Invoke([&](std::istream &s, const std::map<std::string, std::string> &) {
-        int a;
-        s >> a;
-        EXPECT_EQ(customNumber, a);
+        char a[sizeof(customData)];
+        s.read(a, sizeof(customData));
+        EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
         auto mock = std::make_shared<MockExecutableNetwork>();
         EXPECT_CALL(*mock, GetInputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstInputsDataMap{}));
         EXPECT_CALL(*mock, GetOutputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstOutputsDataMap{}));
         return mock;
     }));
 
-    ON_CALL(*net, ExportImpl(_)).WillByDefault(Invoke([&] (std::ostream& s) {
-        s << customNumber;
+    ON_CALL(*net, Export(_)).WillByDefault(Invoke([&] (std::ostream& s) {
+        s.write(customData, sizeof(customData));
     }));
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber()).WillRepeatedly(
             Invoke([&](const CNNNetwork &network, const std::map<std::string, std::string> &config) {
@@ -1249,9 +1250,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(AtLeast(2)); // for .1 and for .51
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(AtLeast(2)); // for .1 and for .51
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(AtLeast(2)); // for .1 and for .51
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1264,9 +1265,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(AtLeast(2)); // for .2 and for .52
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(AtLeast(2)); // for .2 and for .52
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1276,9 +1277,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(AtLeast(1));
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(AtLeast(1));
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(AtLeast(1));
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1305,9 +1306,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"TARGET_FALLBACK", "mock.1"}}, CommonTestUtils::DEVICE_HETERO);
@@ -1318,9 +1319,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{"TARGET_FALLBACK", "mock.1"}}, CommonTestUtils::DEVICE_HETERO);
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -1330,9 +1331,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{"TARGET_FALLBACK", "mock.51"}}, CommonTestUtils::DEVICE_HETERO);
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -1366,9 +1367,9 @@ TEST_P(CachingTest, LoadMulti_race) {
 
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(devCount - 1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(devCount - 1);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), cacheDir}});
             ASSERT_NO_THROW(m_testFunction(ie));
@@ -1394,9 +1395,9 @@ TEST_P(CachingTest, Load_threads) {
         MkDirGuard guard(cacheDir);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(THREADS_COUNT - 1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(THREADS_COUNT - 1);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), cacheDir}});
             std::vector<std::thread> threads;
@@ -1443,12 +1444,12 @@ TEST_P(CachingTest, LoadMulti_Archs) {
         // Load network from file shall not be called for plugins with caching supported
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(0);
 
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(TEST_DEVICE_MAX_COUNT - 2)
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(TEST_DEVICE_MAX_COUNT - 2)
                 .WillRepeatedly(Invoke([&](std::istream &, const std::map<std::string, std::string> &) {
             return createMockIExecutableNet();
         }));
-        EXPECT_CALL(*net, ExportImpl(_)).Times(2);
+        EXPECT_CALL(*net, Export(_)).Times(2);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ASSERT_NO_THROW(m_testFunction(ie));
@@ -1490,9 +1491,9 @@ TEST_P(CachingTest, LoadMulti_NoCachingOnDevice) {
         // Load network from file shall not be called by Multi plugin for devices with caching supported
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(0);
 
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ExecutableNetwork exeNet;
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp
new file mode 100644
index 00000000000..0d6b29d5fe5
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp
@@ -0,0 +1,317 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_transformation.hpp"
+
+#include <string>
+#include <sstream>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <low_precision/transformer.hpp>
+#include <low_precision/avg_pool.hpp>
+#include <low_precision/concat.hpp>
+#include <low_precision/concat_multi_channels.hpp>
+#include <low_precision/max_pool.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "lpt_ngraph_functions/concat_function.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "simple_low_precision_transformer.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace ngraph::pass;
+
+namespace {
+
+class ConcatTransformationActualValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize1;
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize2;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationActualValues& values) {
+    return out << "_" << values.fakeQuantize1 << "_" << values.fakeQuantize2;
+}
+
+class ConcatTransformationResultValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize1;
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize2;
+    ngraph::element::Type precisionBeforeOp;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
+    ngraph::element::Type precisionAfterOperation;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter2;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationResultValues& values) {
+    return out << "_" <<
+        values.fakeQuantize1 << "_" <<
+        values.fakeQuantize2 << "_" <<
+        values.dequantizationAfter1 << "_" <<
+        values.dequantizationAfter2;
+}
+
+class ConcatTransformationTestValues {
+public:
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    bool multiChannels;
+    ConcatTransformationActualValues actual;
+    ConcatTransformationResultValues result;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationTestValues& values) {
+    return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result;
+}
+
+typedef std::tuple <
+    ngraph::element::Type,
+    ngraph::Shape,
+    ConcatTransformationTestValues
+> ConcatTransformationParams;
+
+class ConcatWithIntermediatePrecisionSelectionTransformation : public LayerTransformation, public testing::WithParamInterface<ConcatTransformationParams> {
+public:
+    void SetUp() override {
+        const ngraph::element::Type precision = std::get<0>(GetParam());
+        const ngraph::Shape shape = std::get<1>(GetParam());
+        ConcatTransformationTestValues testValues = std::get<2>(GetParam());
+
+        actualFunction = ngraph::builder::subgraph::ConcatFunction::getOriginalWithIntermediateAvgPool(
+            precision,
+            shape,
+            testValues.actual.fakeQuantize1,
+            testValues.actual.fakeQuantize2);
+
+        SimpleLowPrecisionTransformer transform;
+        if (testValues.multiChannels) {
+            transform.addBranchSpecific<ngraph::pass::low_precision::ConcatMultiChannelsTransformation, ngraph::opset1::Concat>(testValues.params);
+        } else {
+            transform.addBranchSpecific<ngraph::pass::low_precision::ConcatTransformation, ngraph::opset1::Concat>(testValues.params);
+        }
+        transform.add<ngraph::pass::low_precision::MaxPoolTransformation, ngraph::opset1::MaxPool>(testValues.params);
+        transform.add<ngraph::pass::low_precision::AvgPoolTransformation, ngraph::opset1::AvgPool>(testValues.params);
+        transform.transform(actualFunction);
+
+        referenceFunction = ngraph::builder::subgraph::ConcatFunction::getReferenceWithIntermediateAvgPool(
+            precision,
+            shape,
+            testValues.result.fakeQuantize1,
+            testValues.result.fakeQuantize2,
+            testValues.result.precisionBeforeOp,
+            testValues.result.dequantizationBefore1,
+            testValues.result.dequantizationBefore2,
+            testValues.result.precisionAfterOperation,
+            testValues.result.dequantizationAfter1,
+            testValues.result.dequantizationAfter2);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<ConcatTransformationParams> obj) {
+        const ngraph::element::Type precision = std::get<0>(obj.param);
+        const ngraph::Shape shape = std::get<1>(obj.param);
+        const ConcatTransformationTestValues testValues = std::get<2>(obj.param);
+
+        std::ostringstream result;
+        result <<
+            LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" <<
+            (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
+            testValues.actual << "_" <<
+            testValues.result << "_";
+        return result.str();
+    }
+};
+
+TEST_P(ConcatWithIntermediatePrecisionSelectionTransformation, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+    auto res = compare_functions(referenceFunction, actualFunction, true, false, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+const std::vector<ngraph::element::Type> precisions = {
+    ngraph::element::f32,
+    // ngraph::element::f16
+};
+
+const std::vector<ConcatTransformationTestValues> testValues = {
+    // Concat: FakeQuantize operations with signed intervals but consumer requires U8
+    {
+        LayerTransformation::createParamsU8I8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {64.f}, {192.f} },
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, { 128.f }, { 0.01f } },
+            { {}, { 128.f }, { 0.01f } }
+        }
+    },
+
+    // Concat: FakeQuantize operations with unsigned intervals but consumer requires I8
+    {
+        LayerTransformation::createParamsI8I8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {-128.f}, { -0.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, { -128.f }, { 0.01f } },
+            { {}, { -128.f }, { 0.01f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with signed intervals but consumer requires U8
+    {
+        LayerTransformation::createParamsU8I8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {0.f}, { 255.f} },
+            ngraph::element::u8,
+            {},
+            {},
+            ngraph::element::u8,
+            { ngraph::element::f32, { 128.f }, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, { 128.f }, { 0.005f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with unsigned intervals but consumer requires I8
+    {
+        LayerTransformation::createParamsI8I8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {-128.f}, { 127.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, { -128.f }, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, { -128.f }, { 0.005f } }
+        }
+    },
+
+    // Concat: FakeQuantize operations with unsigned intervals, no consumer limitations: FQ were decomposed to U8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, { 128.f} },
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, {}, { 0.01f } },
+            { {}, {}, { 0.01f } }
+        }
+    },
+
+    // Concat: FakeQuantize operations with signed intervals, no consumer limitations: FQ were decomposed to I8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-64.f}, {64.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, {}, { 0.01f } },
+            { {}, {}, { 0.01f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with unsigned intervals, no consumer limitations: FQ were decomposed to U8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {255.f} },
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, {}, { 0.005f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with signed intervals, no consumer limitations: FQ were decomposed to I8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-128.f}, {127.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, {}, { 0.005f } }
+        }
+    }
+};
+
+const std::vector<ngraph::Shape> shapes = {
+    { 1, 3, 9, 9 },
+    { 4, 3, 9, 9 }
+};
+
+INSTANTIATE_TEST_CASE_P(
+    smoke_LPT,
+    ConcatWithIntermediatePrecisionSelectionTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(precisions),
+        ::testing::ValuesIn(shapes),
+        ::testing::ValuesIn(testValues)),
+    ConcatWithIntermediatePrecisionSelectionTransformation::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
index 8ee17c8e39b..3c48d56be5b 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
@@ -49,19 +49,41 @@ bool SimpleLowPrecisionTransformer::isPrecisionPreserved(const std::shared_ptr<n
 }
 
 void SimpleLowPrecisionTransformer::transform(std::shared_ptr<ngraph::Function>& function) {
+    // initialization
+    for (auto it : branchSpecificTransformations) {
+        ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+        transformation->setParamsManager(this);
+        transformation->setLayerTransformationsManager(this);
+    }
+
+    for (auto it : transformations) {
+        ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+        transformation->setParamsManager(this);
+        transformation->setLayerTransformationsManager(this);
+    }
+
+    // transformation
     {
         ngraph::pass::low_precision::TypeRelaxedReplacer pass;
         pass.run_on_function(function);
     }
 
     ngraph::pass::low_precision::TransformationContext context(function);
-    GraphRewrite pass;
-    for (auto it : transformations) {
-        ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
-
-        transformation->setParamsManager(this);
-        transformation->setLayerTransformationsManager(this);
-        transformation->registerMatcherIn(pass, context);
+    {
+        GraphRewrite pass;
+        for (auto it : branchSpecificTransformations) {
+            ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+            transformation->registerMatcherIn(pass, context);
+        }
+        pass.run_on_function(function);
+    }
+
+    {
+        GraphRewrite pass;
+        for (auto it : transformations) {
+            ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+            transformation->registerMatcherIn(pass, context);
+        }
+        pass.run_on_function(function);
     }
-    pass.run_on_function(function);
 }
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
index b4bf3a9c978..c9582adf0f0 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
@@ -28,9 +28,22 @@ public:
     bool isQuantized(const std::shared_ptr<ngraph::Node>& layer) const noexcept override;
     bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& layer) const noexcept override;
 
+    template <class T, class Operation>
+    ngraph::pass::low_precision::LayerTransformationPtr addBranchSpecific(const ngraph::pass::low_precision::LayerTransformation::Params& params) {
+        const std::string typeName = ngraph::pass::low_precision::LowPrecisionTransformations::getType<Operation>();
+
+        const auto it = branchSpecificTransformations.find(typeName);
+        if (it != branchSpecificTransformations.end()) {
+            branchSpecificTransformations.erase(it);
+        }
+
+        auto transformation = std::make_shared<T>(params);
+        branchSpecificTransformations.emplace(typeName, transformation);
+        return transformation;
+    }
+
     template <class T, class Operation>
     ngraph::pass::low_precision::LayerTransformationPtr add(const ngraph::pass::low_precision::LayerTransformation::Params& params) {
-        // const std::string typeName = typeid(ngraph::op::TypeRelaxed<Operation>).name();
         const std::string typeName = ngraph::pass::low_precision::LowPrecisionTransformations::getType<Operation>();
 
         const auto it = transformations.find(typeName);
@@ -46,5 +59,6 @@ public:
     void transform(std::shared_ptr<ngraph::Function>& function);
 
 private:
+    std::map<std::string, ngraph::pass::low_precision::LayerTransformationPtr> branchSpecificTransformations;
     std::map<std::string, ngraph::pass::low_precision::LayerTransformationPtr> transformations;
 };
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp
index 9468db9287d..12177f78cbc 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp
@@ -54,7 +54,7 @@ TEST(TransformationTests, ConvertInterpolate1ToInterpolate4) {
 
         auto interpolate4_attr = opset4::Interpolate::InterpolateAttrs(opset4::Interpolate::InterpolateMode::nearest,
             opset4::Interpolate::ShapeCalcMode::sizes, std::vector<size_t>{0, 0, 0, 0}, std::vector<size_t>{0, 0, 0, 0},
-            opset4::Interpolate::CoordinateTransformMode::asymmetric, opset4::Interpolate::NearestMode::floor,
+            opset4::Interpolate::CoordinateTransformMode::asymmetric, opset4::Interpolate::NearestMode::simple,
             false, -0.75);
 
         auto interpolate4 = std::make_shared<opset4::Interpolate>(data_node, out_shape_node, default_scales_node, axes_node, interpolate4_attr);
@@ -62,7 +62,7 @@ TEST(TransformationTests, ConvertInterpolate1ToInterpolate4) {
         f_ref = std::make_shared<Function>(NodeVector{interpolate4}, ParameterVector{data_node});
     }
 
-    auto res = compare_functions(f, f_ref);
+    auto res = compare_functions(f, f_ref, true, false, false, true, true);
     ASSERT_TRUE(res.first) << res.second;
 }
 
@@ -97,16 +97,16 @@ TEST(TransformationTests, ConvertInterpolate1ToInterpolate4_1) {
         auto default_scales_node = opset1::Constant::create(ngraph::element::f32, Shape{2}, {4.0f / 3.0f, 4.0f / 3.0f});
         auto axes_node = opset1::Constant::create(ngraph::element::i64, Shape{2}, {2, 3});
 
-        auto interpolate4_attr = opset4::Interpolate::InterpolateAttrs(opset4::Interpolate::InterpolateMode::linear,
+        auto interpolate4_attr = opset4::Interpolate::InterpolateAttrs(opset4::Interpolate::InterpolateMode::linear_onnx,
             opset4::Interpolate::ShapeCalcMode::sizes, std::vector<size_t>{0, 0, 0, 0}, std::vector<size_t>{0, 0, 0, 0},
-            opset4::Interpolate::CoordinateTransformMode::align_corners, opset4::Interpolate::NearestMode::floor,
-            false, -0.75);
+            opset4::Interpolate::CoordinateTransformMode::asymmetric, opset4::Interpolate::NearestMode::simple,
+            true, -0.75);
 
         auto interpolate4 = std::make_shared<opset4::Interpolate>(data_node, out_shape_node, default_scales_node, axes_node, interpolate4_attr);
 
         f_ref = std::make_shared<Function>(NodeVector{interpolate4}, ParameterVector{data_node});
     }
 
-    auto res = compare_functions(f, f_ref);
+    auto res = compare_functions(f, f_ref, true, false, false, true, true);
     ASSERT_TRUE(res.first) << res.second;
 }
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp
index be5560f367e..731d96bbd3a 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp
@@ -53,6 +53,7 @@ TEST(TransformationTests, ConvertStridedSliceToCropTests1) {
         manager.register_pass<ngraph::pass::InitNodeInfo>();
         manager.register_pass<ngraph::pass::ConvertStridedSliceToCropMatcher>();
         manager.run_passes(f);
+
         ASSERT_NO_THROW(check_rt_info(f));
     }
 
@@ -238,4 +239,53 @@ TEST(TransformationTests, ConvertStridedSliceToCropNegative2) {
 
     auto res = compare_functions(f, f_ref);
     ASSERT_TRUE(res.first) << res.second;
-}
\ No newline at end of file
+}
+
+
+TEST(TransformationTests, ConvertStridedSliceToCropNoneZeroBeginValuesWithMask) {
+    // when begin_mask/end_mask are present begin/end values should not affect output shape
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input        = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 4});
+        auto slice_begin  = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 2, 1});
+        auto slice_end    = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 2});
+        auto slice_stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1, 1, 1, 1});
+
+        std::vector<int64_t> begin_mask       = {1, 0, 1, 1};
+        std::vector<int64_t> end_mask         = {1, 0, 1, 0};
+        std::vector<int64_t> new_axis_mask    = {0, 1, 0, 0};
+        std::vector<int64_t> shrink_axis_mask = {0, 0, 0, 0};
+        std::vector<int64_t> ellipsis_mask    = {0, 0, 0, 0};
+
+        auto sslice = std::make_shared<ngraph::opset1::StridedSlice>(input, slice_begin, slice_end, slice_stride,
+                                                                     begin_mask, end_mask,
+                                                                     new_axis_mask, shrink_axis_mask, ellipsis_mask);
+        sslice->set_friendly_name("strided_slice");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{sslice}, ngraph::ParameterVector{input});
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertStridedSliceToCropMatcher>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 4});
+
+        std::vector<int64_t> axes   = {0, 1, 2, 3};
+        std::vector<int64_t> dim    = {1, 1, 2, 2};
+        std::vector<int64_t> offset = {0, 0, 0, 0};
+
+        auto reshape = ngraph::op::util::reshapeTo(input, {1, 1, 2, 4});
+        reshape->set_friendly_name("strided_slice/Reshape_for_Crop");
+
+        auto crop = std::make_shared<ngraph::op::CropIE>(reshape, axes, dim, offset);
+        crop->set_friendly_name("strided_slice");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{crop}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/gather_normalize_negative_indices_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/gather_normalize_negative_indices_test.cpp
new file mode 100644
index 00000000000..ec6c4204a9b
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/gather_normalize_negative_indices_test.cpp
@@ -0,0 +1,306 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/op_conversions/gather_normalize_negative_indices.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto indices_type = ngraph::element::i32;
+
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
+        auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+
+        auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
+        auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
+            ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
+        auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_neg_axis) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto indices_type = ngraph::element::i32;
+
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
+        auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
+
+        auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
+        auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
+             ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
+        auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_dif_input_types) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto indices_type = ngraph::element::i32;
+
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
+        auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+
+        auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
+        auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
+            ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
+        auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_static_axis_dim) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto indices_type = ngraph::element::i32;
+
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
+        auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+
+        auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
+        auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
+            ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
+        auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_static_axis_dim_neg_axis) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto indices_type = ngraph::element::i32;
+
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
+        auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
+
+        auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
+        auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
+            ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
+        auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_non_static_axis_dim) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, DYN, DYN});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto indices_type = ngraph::element::i32;
+
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, DYN, DYN});
+        auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_positive_ind) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{2, 3});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{2, 3});
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, GatherNegativeIndicesNormalize_non_static_rank) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic(ngraph::Rank::dynamic()));
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
+        auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
+        auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
+
+        auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
index 0f46a853cef..82c1fa6c9f2 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
@@ -15,6 +15,7 @@
 #include <transformations/init_node_info.hpp>
 #include <ngraph/coordinate_transform.hpp>
 #include <ngraph/pass/manager.hpp>
+#include <inference_engine.hpp>
 
 using namespace testing;
 using namespace ngraph;
@@ -67,6 +68,23 @@ TEST(TransformationTests, InitMasksOutputChannel) {
     compare_masks(*getMask(weights->output(0)), {{}, {1}, {}, {}});
 }
 
+// TODO: add test init masks with subgraph
+TEST(TransformationTests, TestInitMasks) {
+    Shape weights_shape{6, 3, 3, 3};
+    Shape input_shape{1, 3, 64, 64};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights = create_constant_with_zeros(weights_shape, {{1, 2, 3}, {}, {}, {}});
+    auto conv = std::make_shared<opset5::Convolution>(input, weights, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto f = std::make_shared<Function>(NodeVector{conv}, ParameterVector{input});
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights.get_node_shared_ptr()->output(0)), {{1, 2, 3}, {}, {}, {}});
+}
+
 TEST(TransformationTests, InitMasksNegative) {
     Shape weights_shape{6, 3, 3, 3};
     auto weights = opset5::Constant::create(element::f32, weights_shape, {0.5});
@@ -85,6 +103,7 @@ TEST(TransformationTests, PropagateMasksNegative) {
     auto f = std::make_shared<Function>(NodeVector{conv}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
@@ -102,27 +121,35 @@ TEST(TransformationTests, PropagateMasksBasic) {
                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
     auto relu = std::make_shared<opset5::Relu>(conv);
 
+    auto add_const = create_constant_with_zeros(Shape{1, 6, 1, 1}, {{}, {1, 2, 3, 4, 5}, {}, {}});
+    auto add = std::make_shared<opset5::Add>(relu, add_const);
+
     auto sub_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2, 3}, {}, {}});
-    auto sub = std::make_shared<opset5::Subtract>(relu, sub_const);
+    auto sub = std::make_shared<opset5::Subtract>(add, sub_const);
 
-    auto mul_const = create_constant_with_zeros(Shape{6, 1, 1}, {{2}, {}, {}});
-    auto mul = std::make_shared<opset5::Subtract>(sub, mul_const);
+    auto mul_const = create_constant_with_zeros(Shape{1, 6, 1, 1}, {{}, {4}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
 
-    auto weights2 = opset5::Constant::create(element::f32, weights_shape2, {0});
+    auto weights2 = create_constant_with_zeros(weights_shape2, {{1, 2}, {1, 2, 3}, {}, {}});
     auto conv2 = std::make_shared<opset5::Convolution>(mul, weights2, Strides(2, 1),
                                                        CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
-    compare_masks(*getMask(weights->output(0)),  Mask({{2}, {}, {}, {}}));
-    compare_masks(*getMask(conv->output(0)),     Mask({{}, {2}, {}, {}}));
-    compare_masks(*getMask(relu->output(0)),     Mask({{}, {2}, {}, {}}));
-    compare_masks(*getMask(sub_const), Mask({{2}, {}, {}}));
-    compare_masks(*getMask(mul_const), Mask({{2}, {}, {}}));
-    compare_masks(*getMask(weights2->output(0)), Mask({{}, {2}, {}, {}}));
+    compare_masks(*getMask(weights->output(0)),  Mask({{1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(conv->output(0)),     Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(relu->output(0)),     Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add_const), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(sub_const), Mask({{1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(mul_const), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(weights2.get_node_shared_ptr()->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
     compare_masks(*getMask(conv2->output(0)),    Mask({{}, {}, {}, {}}));
 }
 
@@ -148,6 +175,7 @@ TEST(TransformationTests, PropagateMasksDynamicConvolution) {
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
@@ -182,6 +210,7 @@ TEST(TransformationTests, PropagateMasksDynamicGroupConvolution) {
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 }
@@ -199,15 +228,16 @@ TEST(TransformationTests, PropagateMasksEmpty) {
     auto sub_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2, 3}, {}, {}});
     auto sub = std::make_shared<opset5::Subtract>(relu, sub_const);
 
-    auto mul_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2}, {}, {}});
-    auto mul = std::make_shared<opset5::Subtract>(sub, mul_const);
+    auto add_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2}, {}, {}});
+    auto add = std::make_shared<opset5::Subtract>(sub, add_const);
 
     auto weights2 = opset5::Constant::create(element::f32, weights_shape2, {0});
-    auto conv2 = std::make_shared<opset5::Convolution>(mul, weights2, Strides(2, 1),
+    auto conv2 = std::make_shared<opset5::Convolution>(add, weights2, Strides(2, 1),
                                                        CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
@@ -215,11 +245,55 @@ TEST(TransformationTests, PropagateMasksEmpty) {
     compare_masks(*getMask(conv->output(0)),     Mask({{}, {}, {}, {}}));
     compare_masks(*getMask(relu->output(0)),     Mask({{}, {}, {}, {}}));
     compare_masks(*getMask(sub_const), Mask({{}, {}, {}}));
-    compare_masks(*getMask(mul_const), Mask({{}, {}, {}}));
+    compare_masks(*getMask(add_const), Mask({{}, {}, {}}));
     compare_masks(*getMask(weights2->output(0)), Mask({{}, {}, {}, {}}));
     compare_masks(*getMask(conv2->output(0)),    Mask({{}, {}, {}, {}}));
 }
 
+TEST(TransformationTests, PropagateMaskPassThrough) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+    auto weights_const_1 = create_constant_with_zeros(weights_shape, {{1, 2, 3}, {}, {}, {}});
+    weights_const_1.get_node_shared_ptr()->set_friendly_name("weights_1");
+
+    auto conv_1 = std::make_shared<opset5::Convolution>(input, weights_const_1, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    conv_1->set_friendly_name("conv_1");
+
+    // Adding a couple of PassThrough operations
+    auto relu = std::make_shared<opset5::Relu>(conv_1);
+    relu->set_friendly_name("relu");
+
+    auto clamp = std::make_shared<opset5::Clamp>(relu, 0, 6);
+    clamp->set_friendly_name("clamp");
+
+    auto pads_begin = opset5::Constant::create(element::i32, Shape{4}, {0, 0, 1, 1});
+    auto pads_end = opset5::Constant::create(element::i32, Shape{4}, {0, 0, 2, 2});
+    auto pad = std::make_shared<opset5::Pad>(clamp, pads_begin, pads_end, op::PadMode::CONSTANT);
+    auto max_pool = std::make_shared<opset5::MaxPool>(pad, Strides{1, 1},
+                                                      Shape{0, 0}, Shape{1, 1}, Shape{4, 4});
+    max_pool->set_friendly_name("max_pool");
+
+    auto weights2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(max_pool, weights2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_const_1.get_node_shared_ptr()->output(0)),  Mask({{1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv_1->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+    compare_masks(*getMask(relu->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+    compare_masks(*getMask(clamp->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+    compare_masks(*getMask(max_pool->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+}
+
 TEST(TransformationTests, PropagateMasksHardDependencies) {
     Shape input_shape{1, 3, 3, 3};
 
@@ -280,4 +354,344 @@ TEST(TransformationTests, PropagateMasksHardDependencies) {
 //    compare_masks(*getMask(relu),     Mask({{}, {0, 1, 2, 3, 4, 5}, {}, {}}));
 //    compare_masks(*getMask(weights2), Mask({{}, {0, 1, 2, 3, 4, 5}, {}, {}}));
 //    compare_masks(*getMask(conv2),    Mask({{}, {}, {}, {}}));
-}
\ No newline at end of file
+}
+
+TEST(TransformationTests, PropagateMasksQuantizedGroupConvolution) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weights_group_shape{8, 1, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+
+    auto weights1 = create_constant_with_zeros(weights_shape, {{0, 1, 2, 3}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights1, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto weights_group = opset5::Constant::create(element::i8, weights_group_shape, {0});
+    weights_group->set_friendly_name("weights_group");
+
+    auto convert = std::make_shared<opset5::Convert>(weights_group, element::f32);
+    convert->set_friendly_name("convert");
+
+    auto sub_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3}, {}, {}, {}});
+
+    auto sub = std::make_shared<opset5::Subtract>(convert, sub_const);
+    sub->set_friendly_name("sub");
+
+    auto mul_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3, 4}, {}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
+    mul->set_friendly_name("mul");
+
+    auto reshape = std::make_shared<opset5::Reshape>(mul, opset5::Constant::create(element::i64, Shape{5}, {8, 1, 1, 3, 3}), false);
+
+    auto conv_group = std::make_shared<opset5::GroupConvolution>(conv1, reshape, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto add_const = create_constant_with_zeros(Shape{1, 8, 1, 1}, {{}, {0, 1, 2, 3, 4}, {}, {}});;
+    auto add = std::make_shared<opset5::Add>(conv_group, add_const);
+    add->set_friendly_name("add");
+
+    auto weights_2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(add, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::Pruning>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights1.get_node_shared_ptr()->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0 , 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(weights_group->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(sub_const.get_node_shared_ptr()->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(mul_const.get_node_shared_ptr()->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+
+    compare_masks(*getMask(reshape->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}, {}}));
+
+    compare_masks(*getMask(conv_group->output(0)),  Mask({{}, {0 , 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(weights_2->output(0)),  Mask({{}, {0, 1, 2, 3}, {}, {}}));
+}
+
+TEST(TransformationTests, PropagateMasksFakeQuantizePerTensor) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+    auto weights_1 = opset5::Constant::create(element::i8, weights_shape, {0});
+    weights_1->set_friendly_name("weights_int8_const");
+
+    auto convert = std::make_shared<opset5::Convert>(weights_1, element::f32);
+    convert->set_friendly_name("convert");
+
+    auto sub_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3}, {}, {}, {}});
+
+    auto sub = std::make_shared<opset5::Subtract>(convert, sub_const);
+    sub->set_friendly_name("sub");
+
+    auto mul_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3, 4}, {}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
+    mul->set_friendly_name("mul");
+
+    auto conv1 = std::make_shared<opset5::Convolution>(input, mul, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    conv1->set_friendly_name("conv1");
+
+    auto add_const = create_constant_with_zeros(Shape{1, 8, 1, 1}, {{}, {0, 1, 2, 3, 4}, {}, {}});;
+    auto add = std::make_shared<opset5::Add>(conv1, add_const);
+    add->set_friendly_name("add");
+
+    auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
+    auto input_high = opset5::Constant::create(element::f32, Shape{1, 1, 1, 1}, {20});
+    auto output_low = opset5::Constant::create(element::f32, Shape{}, {1});
+    auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
+    auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low, input_high, output_low, output_high, 8);
+
+    auto weights_2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(fq, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::Pruning>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1->output(0)), Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(mul_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {0 , 1, 2, 3, 4},  {}, {}}));
+
+    compare_masks(*getMask(fq->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(weights_2->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+}
+
+TEST(TransformationTests, PropagateMasksFakeQuantizePerChannel) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+    auto weights_1 = opset5::Constant::create(element::i8, weights_shape, {0});
+    weights_1->set_friendly_name("weights_int8_const");
+
+    auto convert = std::make_shared<opset5::Convert>(weights_1, element::f32);
+    convert->set_friendly_name("convert");
+
+    auto sub_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3}, {}, {}, {}});
+
+    auto sub = std::make_shared<opset5::Subtract>(convert, sub_const);
+    sub->set_friendly_name("sub");
+
+    auto mul_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3, 4}, {}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
+    mul->set_friendly_name("mul");
+
+    auto conv1 = std::make_shared<opset5::Convolution>(input, mul, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    conv1->set_friendly_name("conv1");
+
+    auto add_const = create_constant_with_zeros(Shape{1, 8, 1, 1}, {{}, {0, 1, 2, 3, 4}, {}, {}});;
+    auto add = std::make_shared<opset5::Add>(conv1, add_const);
+    add->set_friendly_name("add");
+
+    auto input_low = opset5::Constant::create(element::f32, Shape{1, 8, 1, 1}, {0});
+    auto input_high = opset5::Constant::create(element::f32, Shape{1, 8, 1, 1}, {20});
+    auto output_low = opset5::Constant::create(element::f32, Shape{8, 1, 1}, {1});
+    auto output_high = opset5::Constant::create(element::f32, Shape{8, 1, 1}, {10});
+    auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low, input_high, output_low, output_high, 8);
+
+    auto weights_2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(fq, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1->output(0)), Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(mul_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {0 , 1, 2, 3, 4},  {}, {}}));
+
+    compare_masks(*getMask(fq->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(weights_2->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(fq->input(1).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(fq->input(2).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(fq->input(3).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(fq->input(4).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+}
+
+TEST(TransformationTests, TestConcatMaskPropagation) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape1{8, 3, 3, 3};
+    Shape weights_shape2{16, 3, 3, 3};
+    Shape weights_shape3{8, 3, 3, 3};
+
+    Shape weight_shape_out_conv{3, 32, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights_1 = create_constant_with_zeros(weights_shape1, {{0, 1, 2, 3}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights_1, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_2 = create_constant_with_zeros(weights_shape2, {{7, 8, 9, 10}, {}, {}, {}});
+    auto conv2 = std::make_shared<opset5::Convolution>(input, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_3 = create_constant_with_zeros(weights_shape3, {{4, 5, 6, 7}, {}, {}, {}});
+    auto conv3 = std::make_shared<opset5::Convolution>(input, weights_3, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto concat = std::make_shared<opset5::Concat>(OutputVector{conv1->output(0), conv2->output(0), conv3->output(0)}, 1);
+
+    auto weights_out_conv = create_constant_with_zeros(weight_shape_out_conv, {{}, {}, {}, {}});
+    auto conv_out = std::make_shared<opset5::Convolution>(concat, weights_out_conv, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto f = std::make_shared<Function>(NodeVector{conv_out}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1.get_node_shared_ptr()->output(0)),  Mask({{0, 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0, 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(weights_2.get_node_shared_ptr()->output(0)),  Mask({{7, 8, 9, 10}, {}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {7, 8, 9, 10}, {}, {}}));
+
+    compare_masks(*getMask(weights_3.get_node_shared_ptr()->output(0)),  Mask({{4, 5, 6, 7}, {}, {}, {}}));
+    compare_masks(*getMask(conv3->output(0)),  Mask({{}, {4, 5, 6, 7}, {}, {}}));
+
+    compare_masks(*getMask(concat->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+    compare_masks(*getMask(weights_out_conv.get_node_shared_ptr()->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+}
+
+
+TEST(TransformationTests, TestConcatMaskPropagationUp) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape1{8, 3, 3, 3};
+    Shape weights_shape2{16, 3, 3, 3};
+    Shape weights_shape3{8, 3, 3, 3};
+
+    Shape weight_shape_out_conv{3, 32, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights_1 = create_constant_with_zeros(weights_shape1, {{0, 1, 2, 3, 4, 5}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights_1, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_2 = create_constant_with_zeros(weights_shape2, {{7, 8, 9, 10}, {}, {}, {}});
+    auto conv2 = std::make_shared<opset5::Convolution>(input, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_3 = create_constant_with_zeros(weights_shape3, {{2, 3, 4, 5, 6, 7}, {}, {}, {}});
+    auto conv3 = std::make_shared<opset5::Convolution>(input, weights_3, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto concat = std::make_shared<opset5::Concat>(OutputVector{conv1->output(0), conv2->output(0), conv3->output(0)}, 1);
+
+    auto add_const = create_constant_with_zeros(Shape{1, 32, 1, 1}, {{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}});
+    auto add = std::make_shared<opset5::Add>(concat, add_const);
+
+    auto weights_out_conv = create_constant_with_zeros(weight_shape_out_conv, {{}, {}, {}, {}});
+    auto conv_out = std::make_shared<opset5::Convolution>(add, weights_out_conv, Strides(2, 1),
+                                                          CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto f = std::make_shared<Function>(NodeVector{conv_out}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1.get_node_shared_ptr()->output(0)),  Mask({{0, 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0, 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(weights_2.get_node_shared_ptr()->output(0)),  Mask({{7, 8, 9, 10}, {}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {7, 8, 9, 10}, {}, {}}));
+
+    compare_masks(*getMask(weights_3.get_node_shared_ptr()->output(0)),  Mask({{4, 5, 6, 7}, {}, {}, {}}));
+    compare_masks(*getMask(conv3->output(0)),  Mask({{}, {4, 5, 6, 7}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+
+
+    compare_masks(*getMask(concat->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+    compare_masks(*getMask(weights_out_conv.get_node_shared_ptr()->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+}
+
+
+TEST(TransformationTests, TestConcatMaskPropagationUpEmpty) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape1{8, 3, 3, 3};
+    Shape weights_shape2{16, 3, 3, 3};
+    Shape weights_shape3{8, 3, 3, 3};
+
+    Shape weight_shape_out_conv{3, 32, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights_1 = create_constant_with_zeros(weights_shape1, {{0, 1, 2, 3, 4, 5}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights_1, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_2 = create_constant_with_zeros(weights_shape2, {{7, 8, 9, 10}, {}, {}, {}});
+    auto conv2 = std::make_shared<opset5::Convolution>(input, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_3 = create_constant_with_zeros(weights_shape3, {{2, 3, 4, 5, 6, 7}, {}, {}, {}});
+    auto conv3 = std::make_shared<opset5::Convolution>(input, weights_3, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto concat = std::make_shared<opset5::Concat>(OutputVector{conv1->output(0), conv2->output(0), conv3->output(0)}, 1);
+
+    auto add_const = create_constant_with_zeros(Shape{1, 32, 1, 1}, {{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}});
+    auto add = std::make_shared<opset5::Add>(concat, add_const);
+
+    auto f = std::make_shared<Function>(NodeVector{add}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(weights_2.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(weights_3.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(conv3->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {}, {}, {}}));
+
+
+    compare_masks(*getMask(concat->output(0)),  Mask({{}, {}, {}, {}}));
+}
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md
index 1f21dd1c07f..07b50de8409 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md
@@ -17,6 +17,7 @@ Outcome of a build is a `subgrpahsDumper` binary located in building artifacts f
 ## Running
 The tool takes two command line parameters:    
 * `--input_folders` - Required. Comma separated paths to the input folders with IRs
+* `--local_cache` - Optional. Comma separated paths to the local cache folders with IRs.
 * `--output_folder` - Required. Path to the output folders where to serialize IRs
 * `--path_regex` - Optional. regular expression to be applied in input folders recursive discovery
 * `--constants_size_threshold` - Optional. Maximum size of constant in megabytes to be serialized.
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp
index c37e45c445b..13ad0007de7 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp
@@ -9,6 +9,7 @@
 
 static const char help_message[] = "Print a usage message.";
 static const char input_folders_message[] = "Required. Comma separated paths to the input folders with IRs";
+static const char local_cache_message[] = "Optional. Comma separated paths to the local cache folders with IRs";
 static const char output_folder_message[] = "Required. Path to the output folders where to serialize IRs";
 static const char path_regex_message[] = "Optional. regular expression to be applied in input "
                                          "folders recursive discovery";
@@ -21,7 +22,8 @@ static const char eliminate_dynamism_message[] = "Optional. If specified dynamic
                                                  "and replaced by propagated upper bound values (if possible)";
 
 DEFINE_bool(h, false, help_message);
-DEFINE_string(input_folders, ".", input_folders_message);
+DEFINE_string(input_folders, "", local_cache_message);
+DEFINE_string(local_cache, ".", input_folders_message);
 DEFINE_string(output_folder, "output", output_folder_message);
 DEFINE_string(path_regex, ".*", output_folder_message);
 DEFINE_double(constants_size_threshold, 1., constants_size_threshold_message);
@@ -37,6 +39,7 @@ static void showUsage() {
     std::cout << "\n";
     std::cout << "    -h                                     " << help_message << "\n";
     std::cout << "    --input_folders \"<path>\"             " << input_folders_message << "\n";
+    std::cout << "    --local_cache \"<path>\"               " << input_folders_message << "\n";
     std::cout << "    --output_folder \"<path>\"             " << output_folder_message << "\n";
     std::cout << "    --path_regex \"<path>\"                " << path_regex_message << "\n";
     std::cout << "    --constants_size_threshold \"<value>\" " << constants_size_threshold_message << "\n";
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp
index 0bd42bf169d..16f8f55a98b 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp
@@ -19,17 +19,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-int main(int argc, char *argv[]) {
-    uint8_t ret_code = 0;
-
-    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
-    if (FLAGS_h) {
-        showUsage();
-        return 0;
-    }
-    SubgraphsDumper::ClonersMap::constant_size_threshold_mb = FLAGS_constants_size_threshold;
+std::vector<SubgraphsDumper::Model> findModelsInDirs(const std::vector<std::string> &dirs) {
     std::vector<std::string> input_folder_content;
-    std::vector<std::string> dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_input_folders);
     for (const auto &dir : dirs) {
         if (!CommonTestUtils::directoryExists(dir)) {
             std::string msg = "Input directory (" + dir + ") doesn't not exist!";
@@ -51,9 +42,13 @@ int main(int argc, char *argv[]) {
         std::string msg = "Output directory (" + FLAGS_output_folder + ") doesn't not exist!";
         throw std::runtime_error(msg);
     }
+    return models;
+}
 
+void cacheModels(std::unique_ptr<SubgraphsDumper::OPCache> &cache,
+                 uint8_t& ret_code,
+                 const std::vector<SubgraphsDumper::Model>& models) {
     auto ie = InferenceEngine::Core();
-    auto cache = SubgraphsDumper::OPCache::make_cache();
     time_t rawtime;
     struct tm *timeinfo;
     char buffer[20];
@@ -92,6 +87,27 @@ int main(int argc, char *argv[]) {
             }
         }
     }
+}
+
+
+int main(int argc, char *argv[]) {
+    uint8_t ret_code = 0;
+
+    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
+    if (FLAGS_h) {
+        showUsage();
+        return 0;
+    }
+    SubgraphsDumper::ClonersMap::constant_size_threshold_mb = FLAGS_constants_size_threshold;
+
+    std::vector<std::string> local_cache_dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_local_cache);
+    std::vector<std::string> dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_input_folders);
+    auto cachedOps = findModelsInDirs(local_cache_dirs);
+    auto models = findModelsInDirs(dirs);
+
+    auto cache = SubgraphsDumper::OPCache::make_cache();
+    cacheModels(cache, ret_code, cachedOps);
+    cacheModels(cache, ret_code, models);
     cache->serialize_cached_ops(FLAGS_output_folder);
 
     return ret_code;
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp
index 8dab56fdafa..e3f0adb1cb6 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp
@@ -42,18 +42,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> AutoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "8"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::NO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::YES}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "10"}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, CorrectConfigTests,
@@ -93,22 +82,14 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoinconfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "OFF"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, "OFF"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}}
+        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
+            {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "OFF"}}
     };
 
     const std::vector<std::map<std::string, std::string>> multiconf = {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
     };
 
-    const std::vector<std::map<std::string, std::string>> autoconf = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
-    };
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, CorrectConfigAPITests,
             ::testing::Combine(
             ::testing::ValuesIn(netPrecisions),
@@ -127,7 +108,7 @@ namespace {
             ::testing::Combine(
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-            ::testing::ValuesIn(autoconf)),
+            ::testing::ValuesIn(AutoConfigs)),
             CorrectConfigAPITests::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigTests,
@@ -144,13 +125,6 @@ namespace {
             ::testing::ValuesIn(multiinconfigs)),
             IncorrectConfigTests::getTestCaseName);
 
-    INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, IncorrectConfigTests,
-            ::testing::Combine(
-            ::testing::ValuesIn(netPrecisions),
-            ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-            ::testing::ValuesIn(autoinconfigs)),
-            IncorrectConfigTests::getTestCaseName);
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigAPITests,
             ::testing::Combine(
             ::testing::ValuesIn(netPrecisions),
@@ -166,10 +140,10 @@ namespace {
             IncorrectConfigAPITests::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, IncorrectConfigAPITests,
-            ::testing::Combine(
-            ::testing::ValuesIn(netPrecisions),
-            ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-            ::testing::ValuesIn(autoinconfigs)),
-            IncorrectConfigAPITests::getTestCaseName);
+                            ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                ::testing::ValuesIn(autoinconfigs)),
+                            IncorrectConfigAPITests::getTestCaseName);
 
 } // namespace
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp
index 80914183c39..3db325234cf 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp
@@ -51,20 +51,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> AutoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS,
-              InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS,
-              InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "8"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::NO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::YES}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "10"}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferConfigTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp
index 771794774a6..eb988c0043a 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp
@@ -26,9 +26,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-              {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestInputTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp
index 78be1513314..a77253f8900 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp
@@ -22,9 +22,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestOutputTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp
index 2e2aab976c6..8bf1a8f95ba 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp
@@ -14,10 +14,6 @@ namespace {
             {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_CPU}}
     };
 
-    const std::vector<std::map<std::string, std::string>> Autoconfigs = {
-            {{ AUTO_CONFIG_KEY(DEVICE_LIST) , CommonTestUtils::DEVICE_CPU}}
-    };
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PerfCountersTest,
                             ::testing::Combine(
                                     ::testing::Values(InferenceEngine::Precision::FP32),
@@ -32,11 +28,4 @@ namespace {
                                     ::testing::ValuesIn(Multiconfigs)),
                             PerfCountersTest::getTestCaseName);
 
-    INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, PerfCountersTest,
-                            ::testing::Combine(
-                                    ::testing::Values(InferenceEngine::Precision::FP32),
-                                    ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(Autoconfigs)),
-                            PerfCountersTest::getTestCaseName);
-
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp
index 95208f0a092..da76bc26498 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp
@@ -37,9 +37,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> AutoConfigsInputOutput = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_CPU}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_CPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> configsOutput = {
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
index 76607e55f76..6e762ed562c 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@@ -69,8 +69,8 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTy
 };
 
 const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {
-    {PReLu, {{-0.01f}}},
-    {LeakyRelu, {{0.01f}}}
+        {PReLu, {{}}}, // Slope will be filled with increasing values from -10 to match slope input shape
+        {LeakyRelu, {{0.01f}}}
 };
 
 std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
@@ -81,16 +81,12 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
 std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> preluBasic = {
         {{1, 50}, {{1}, {50}}},
         {{1, 128}, {{1}, {128}}},
-        {{20, 128}, {{128}}},
-        {{1, 20, 128}, {{1}, {20}}},
-        {{1, 20, 128, 128}, {{1}, {20}}},
-        {{1, 20, 20, 128, 128}, {{1}, {20}}}
-        // according to spec second input for PRelu must be 1D and must be broadcastabe per channel
-        // at this moment these cases unsupported
-        // {{20, 128}, {{20}, {20, 128}}},
-        // {{1, 20, 128}, {{128}, {20, 128}}},
-        // {{1, 20, 128, 128}, {{128}, {128, 128}, {20, 128, 128}}},
-        // {{1, 20, 20, 128, 128}, {{128}, {128, 128}, {20, 128, 128}, {20, 20, 128, 128}}},
+
+        // Broadcast check
+        {{3, 2}, {{1}, {2}, {3, 2}}},
+        {{3, 2, 5}, {{1}, {2}, {5}, {2, 5}, {3, 1, 5}, {1, 2, 1}, {1, 1, 5}, {3, 1, 1}, {3, 2, 5}}},
+        {{2, 1, 2}, {{2}, {2, 1, 1}}},
+        {{3, 2, 5, 7}, {{1}, {7}, {2}, {5, 7}, {2, 5, 7}, {2, 1, 1}, {1, 2, 1, 1}, {3, 2, 1, 1}, {3, 2, 5, 7}}},
 };
 
 const auto basicCases = ::testing::Combine(
@@ -127,11 +123,9 @@ const auto basicIntegerOperations = ::testing::Combine(
 );
 
 INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationLayerTest, basicCases, ActivationLayerTest::getTestCaseName);
-INSTANTIATE_TEST_CASE_P(smoke_Integer_Activation_Basic, ActivationLayerTest, basicIntegerOperations, ActivationLayerTest::getTestCaseName);
-INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
-
-INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
-
 INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationDynamicLayerTest, basicCases, ActivationLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Integer_Activation_Basic, ActivationLayerTest, basicIntegerOperations, ActivationLayerTest::getTestCaseName);
 
+INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu_Const, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu_Param, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp
index 9ab20c3eda4..062ea0cad91 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp
@@ -38,7 +38,7 @@ INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
                 ::testing::ValuesIn(iterationCount),
                 ::testing::ValuesIn(inShapes),
                 ::testing::ValuesIn(inputPrecisions),
-                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU, "HETERO:CPU")),
         MemoryTest::getTestCaseName);
 
 } // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index f59f8d954e0..3d4678bf589 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -35,8 +35,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ReluShapeOfSubgraphTest.*)",
         // TODO: Issue: 34805
         R"(.*ActivationLayerTest.*Ceiling.*)",
-        // TODO: Issue: 32032
-        R"(.*ActivationParamLayerTest.*)",
         // TODO: Issue: 43314
         R"(.*Broadcast.*mode=BIDIRECTIONAL.*inNPrec=BOOL.*)",
         // TODO: Issue 43417 sporadic issue, looks like an issue in test, reproducible only on Windows platform
@@ -69,8 +67,12 @@ std::vector<std::string> disabledTestPatterns() {
 
         // TODO: 55656 AUTO plugin and QueryNetwork
         R"(.*CoreThreading.*smoke_QueryNetwork.*targetDevice=AUTO_config.*)",
+        // Unsupported config KEY_ENFORCE_BF16 for AUTO plugin
+        R"(.*smoke_SetBlobOfKindAUTO.*SetBlobOfKindTest.CompareWithRefs.*)",
         // reference doesn't cover I8, U8 cases. Issue: 55842
         R"(.*Gather7LayerTest.*netPRC=I8.*)",
+        // need to implement Export / Import
+        R"(.*IEClassImportExportTestP.*)"
     };
 #ifdef __APPLE__
         // TODO: Issue 55717
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp
index 72dbe0d5e12..0df9c464c4f 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp
@@ -5,6 +5,7 @@
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/single_layer/pooling.hpp"
+#include "test_utils/fusing_test_utils.hpp"
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -13,21 +14,24 @@ using namespace LayerTestsDefinitions;
 namespace CPULayerTestsDefinitions {
 typedef std::tuple<
         poolLayerTestParamsSet,
-        CPUSpecificParams
+        CPUSpecificParams,
+        fusingSpecificParams
 > poolLayerCpuTestParamsSet;
 
 class PoolingLayerCPUTest : public testing::WithParamInterface<poolLayerCpuTestParamsSet>,
-                            virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
+                            virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<poolLayerCpuTestParamsSet>& obj) {
         poolLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
-        std::tie(basicParamsSet, cpuParams) = obj.param;
+        fusingSpecificParams fusingParams;
+        std::tie(basicParamsSet, cpuParams, fusingParams) = obj.param;
 
         std::ostringstream result;
         result << PoolingLayerTest::getTestCaseName(testing::TestParamInfo<poolLayerTestParamsSet>(
                 basicParamsSet, 0));
         result << CPUTestsBase::getTestCaseName(cpuParams);
+        result << CpuTestWithFusing::getTestCaseName(fusingParams);
 
         return result.str();
     }
@@ -36,7 +40,8 @@ protected:
     void SetUp() {
         poolLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
-        std::tie(basicParamsSet, cpuParams) = this->GetParam();
+        fusingSpecificParams fusingParams;
+        std::tie(basicParamsSet, cpuParams, fusingParams) = this->GetParam();
 
         poolSpecificParams poolParams;
         std::vector<size_t> inputShape;
@@ -48,6 +53,7 @@ protected:
         }
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(postOpMgrPtr, fusedOps) = fusingParams;
 
         if (selectedType.empty()) {
             selectedType = getPrimitiveType();
@@ -133,7 +139,8 @@ INSTANTIATE_TEST_CASE_P(smoke_MaxPool_CPU_4D, PoolingLayerCPUTest,
                                 ::testing::Values(InferenceEngine::Layout::ANY),
                                 ::testing::Values(std::vector<size_t >({1, 3, 64, 64})),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                        ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                        ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest,
@@ -147,7 +154,8 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 64, 64})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest,
@@ -161,7 +169,8 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 64, 64})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::Values(ref)),
+                                ::testing::Values(ref),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 const std::vector<poolSpecificParams> paramsMax5D = {
@@ -200,7 +209,8 @@ INSTANTIATE_TEST_CASE_P(smoke_MaxPool_CPU_5D, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 3, 16, 32, 32})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest,
@@ -214,7 +224,8 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 32, 32, 32})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest,
@@ -228,7 +239,58 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 16, 16, 16})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::Values(ref)),
+                                ::testing::Values(ref),
+                                ::testing::Values(emptyFusingSpec)),
+                        PoolingLayerCPUTest::getTestCaseName);
+
+/* === Fusing === */
+
+const auto avx512_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"};
+const auto avx512_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"};
+
+const auto avx2_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"};
+const auto avx2_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"};
+
+const auto sse42_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"};
+const auto sse42_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"};
+
+const std::vector<CPUSpecificParams> vecCpuConfigsFusing_4D = {sse42_nhwc, avx2_nhwc, avx512_nhwc};
+const std::vector<CPUSpecificParams> vecCpuConfigsFusing_5D = {sse42_ndhwc, avx2_ndhwc, avx512_ndhwc};
+
+std::vector<fusingSpecificParams> fusingParamsSet {
+    emptyFusingSpec,
+    fusingFakeQuantizePerTensor,
+    fusingFakeQuantizePerChannel,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_I8, PoolingLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(paramsAvg4D),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(Precision::I8),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(std::vector<size_t >({1, 4, 64, 64})),
+                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_4D)),
+                                ::testing::ValuesIn(fusingParamsSet)),
+                        PoolingLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_I8, PoolingLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(paramsAvg5D),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(Precision::I8),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(std::vector<size_t >({1, 4, 16, 16, 16})),
+                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_5D)),
+                                ::testing::ValuesIn(fusingParamsSet)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 } // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
index 0e5fc43d366..d55f2a98859 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
@@ -216,6 +216,13 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
                 return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
             }, "Add(PerChannel)"}}), {"Add"} };
 
+const auto fusingFakeQuantizePerTensor = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                auto localPrc = inpNode->get_element_type();
+                ngraph::Shape newShape(inpNode->get_shape().size(), 1);
+                return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
+            }, "FakeQuantize(PerTensor)"}}), {"FakeQuantize"} };
+
 const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 auto localPrc = inpNode->get_element_type();
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp
new file mode 100644
index 00000000000..7e3d15174f3
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp
@@ -0,0 +1,230 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>                 // Input Shape
+> convertMatmulToPointwiseConvParams;
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input Shape
+    std::pair<float, float>             // Input Min and Max
+> convertMatmulToPointwiseConvWithFqParams;
+
+namespace LayerTestsDefinitions {
+
+class ConvertMatmulToPointwiseConv : public testing::WithParamInterface<convertMatmulToPointwiseConvParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::vector<size_t> inputShape;
+        std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
+        return result.str();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
+        blob->allocate();
+
+        auto* rawBlobDataPtr = blob->buffer().as<float*>();
+        std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f);
+        for (size_t i = 0; i < blob->size(); i++) {
+            rawBlobDataPtr[i] = values[i];
+        }
+        return blob;
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+        std::vector<size_t> inputShape;
+        std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+        size_t batch = inputShape[inputShape.size() - 2];
+        size_t elemNum = inputShape[inputShape.size() - 1];
+        std::vector<float> weights = CommonTestUtils::generate_float_numbers(elemNum * elemNum, -0.1f, 0.1f);
+        auto weightsNode = std::make_shared<ngraph::opset7::Constant>(ngPrc, ngraph::Shape{elemNum, elemNum}, weights);
+        auto matmul = ngraph::builder::makeMatMul(params[0], weightsNode, false, true);
+
+        auto bias = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{1, batch, 1}, std::vector<float>{1.0f});
+        auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD);
+
+        auto pattern = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
+            ngraph::Shape{ inputShape.size() }, inputShape);
+        auto reshape = std::make_shared<ngraph::opset7::Reshape>(matmul, pattern, false);
+        auto relu = std::make_shared<ngraph::opset7::Relu>(reshape);
+
+        ngraph::ResultVector results{ std::make_shared<ngraph::opset7::Result>(relu)};
+        function = std::make_shared<ngraph::Function>(results, params, "ConvertMatmulToPointwiseConv");
+    }
+};
+
+class ConvertMatmulToPointwiseConvWithFq : public testing::WithParamInterface<convertMatmulToPointwiseConvWithFqParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+    float inputDataMin = -10.0f;
+    float inputDataMax = 10.0f;
+    float inputDataResolution = 1.0f;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvWithFqParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
+        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
+        return result.str();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin,
+            1 / inputDataResolution);
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax) = this->GetParam();
+        std::tie(inputDataMin, inputDataMax) = inputMinMax;
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+        auto inputLowNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
+            std::vector<float>{ inputDataMin });
+        auto inputHighNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
+            std::vector<float>{ inputDataMax });
+        auto inputFQ = std::make_shared<ngraph::opset7::FakeQuantize>(params[0],
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, UINT16_MAX);
+
+        size_t elemNum = inputShape[inputShape.size() - 1];
+
+        const float weightsMin = -0.2f;
+        const float weightsMax = 0.2f;
+        std::vector<float> weights = CommonTestUtils::generate_float_numbers(elemNum * elemNum, weightsMin, weightsMax);
+        auto weightsNode = std::make_shared<ngraph::opset7::Constant>(ngPrc, ngraph::Shape{elemNum, elemNum}, weights);
+        auto weightsLowNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
+            std::vector<float>{ weightsMin });
+        auto weightsHighNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
+              std::vector<float>{ weightsMax });
+        auto weightsFQNode = std::make_shared<ngraph::opset7::FakeQuantize>(weightsNode,
+            weightsLowNode, weightsHighNode, weightsLowNode, weightsHighNode, UINT16_MAX);
+        auto matmul = ngraph::builder::makeMatMul(inputFQ, weightsFQNode, false, true);
+
+        auto bias = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{1, 1, 1}, std::vector<float>{1.0f});
+        auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD);
+
+        auto outputLowNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
+            std::vector<float>{ -inputDataMax * weightsMax *  elemNum });
+        auto outputHighNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
+            std::vector<float>{ inputDataMax * weightsMax * elemNum });
+        auto outputFQ = std::make_shared<ngraph::opset7::FakeQuantize>(add,
+            outputLowNode, outputHighNode, outputLowNode, outputHighNode, UINT16_MAX);
+
+        auto pattern = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
+            ngraph::Shape{ inputShape.size() }, inputShape);
+        auto reshape = std::make_shared<ngraph::opset7::Reshape>(outputFQ, pattern, false);
+
+        auto relu = std::make_shared<ngraph::opset7::Relu>(reshape);
+
+        ngraph::ResultVector results{ std::make_shared<ngraph::opset7::Result>(relu)};
+        function = std::make_shared<ngraph::Function>(results, params, "ConvertMatmulToPointwiseConv");
+    }
+};
+
+TEST_P(ConvertMatmulToPointwiseConv, CompareWithRefImpl) {
+    Run();
+};
+
+TEST_P(ConvertMatmulToPointwiseConvWithFq, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    }
+};
+
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 64, 64},
+    {1, 256, 128},
+    {1, 512, 128}
+};
+
+const std::vector<std::pair<float, float>> fqStats = {
+    {-0.5, 0.5}
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConv,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(inputShape)),
+    ConvertMatmulToPointwiseConv::getTestCaseName);
+
+// Issue 55662
+INSTANTIATE_TEST_CASE_P(DISABLED_smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(inputShape),
+        ::testing::ValuesIn(fqStats)),
+    ConvertMatmulToPointwiseConvWithFq::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
index 17da73dfc99..f4c6cc98d34 100644
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
@@ -18,19 +18,21 @@
 typedef std::tuple<
         InferenceEngine::Precision,         // Network Precision
         std::string,                        // Target Device
-        std::map<std::string, std::string>  //Configuration
+        std::map<std::string, std::string>, // Configuration
+        std::vector<size_t>                 // Input Shape
 > EltwiseSplitOverChannelsPassParams;
 
 namespace LayerTestsDefinitions {
 
 class EltwiseSplitOverChannelsPassTest : public testing::WithParamInterface<EltwiseSplitOverChannelsPassParams>,
-                                             public LayerTestsUtils::LayerTestsCommon {
+                                         public LayerTestsUtils::LayerTestsCommon {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitOverChannelsPassParams> obj) {
         InferenceEngine::Precision netPrecision;
         std::string targetDevice;
         std::map<std::string, std::string> configuration;
-        std::tie(netPrecision, targetDevice, configuration) = obj.param;
+        std::vector<size_t> inputShape;
+        std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
 
         std::ostringstream result;
         result << "netPRC=" << netPrecision.name() << "_";
@@ -38,20 +40,22 @@ public:
         for (auto const& configItem : configuration) {
             result << "_configItem=" << configItem.first << "_" << configItem.second;
         }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
         return result.str();
     }
 
 protected:
     void SetUp() override {
         InferenceEngine::Precision netPrecision;
-        std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
+        std::vector<size_t> inputShape;
+        std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
-        auto params = ngraph::builder::makeParams(ngPrc, { {1, 67000} });
-        auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, {1, 67000}, {-1.0f});
+        auto params = ngraph::builder::makeParams(ngPrc, { inputShape });
+        auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, inputShape, {-1.0f});
 
         auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
-        function = std::make_shared<ngraph::Function>(sum, params, "RemovePermutationPass");
+        function = std::make_shared<ngraph::Function>(sum, params, "EltwiseSplitOverChannelsPassTest");
     }
 };
 
@@ -71,11 +75,17 @@ const std::vector<std::map<std::string, std::string>> configs = {
         }
 };
 
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 67000},
+    {1, 500000}
+};
+
 INSTANTIATE_TEST_CASE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_GNA),
-                                ::testing::ValuesIn(configs)),
+                                ::testing::ValuesIn(configs),
+                                ::testing::ValuesIn(inputShape)),
                         EltwiseSplitOverChannelsPassTest::getTestCaseName);
 
 } // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
index cb4cc459a95..a59ad83eaed 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -60,8 +60,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
         // TODO: Issue 51528
         R"(.*CachingSupport.*_(u8|i16)_.*)",
-        // TODO: Issue 51527
-        R"(.*CachingSupport.*_batch2_.*)",
         // TODO: Issue 51525
         R"(.*CachingSupport.*KSOFunction.*)",
         // TODO: Issue 57363 (Param -> Result subgraphs)
diff --git a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
index e6415688de2..6837c0b84c3 100644
--- a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -9,7 +9,7 @@
 
 #include <ie_compound_blob.h>
 
-#include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include <remote_blob_tests/remote_blob_helpers.hpp>
 #include <common_test_utils/test_common.hpp>
 #include <functional_test_utils/plugin_cache.hpp>
@@ -175,7 +175,7 @@ TEST_P(BatchedBlob_Test, canInputNV12) {
 
     /* XXX: is it correct to set KEY_CLDNN_NV12_TWO_INPUTS in case of remote blob? */
     auto exec_net_b = ie.LoadNetwork(net_remote, CommonTestUtils::DEVICE_GPU,
-                { { CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, PluginConfigParams::YES} });
+                { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES} });
     auto inf_req_remote = exec_net_b.CreateInferRequest();
     auto cldnn_context = exec_net_b.GetContext();
     cl_context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context)->get();
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
index 0b550c568b6..a8c039e4391 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
@@ -4,6 +4,7 @@
 
 #include "behavior/config.hpp"
 #include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
@@ -12,6 +13,7 @@ namespace {
             InferenceEngine::Precision::FP16
     };
 
+    IE_SUPPRESS_DEPRECATED_START
     const std::vector<std::map<std::string, std::string>> inconfigs = {
             {{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, "OFF"}},
             {{InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, "ON"}},
@@ -46,6 +48,7 @@ namespace {
             {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU},
                     {InferenceEngine::PluginConfigParams::KEY_DEVICE_ID, "DEVICE_UNKNOWN"}}
     };
+    IE_SUPPRESS_DEPRECATED_END
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigTests,
             ::testing::Combine(
@@ -73,6 +76,29 @@ namespace {
             {}
     };
 
+    IE_SUPPRESS_DEPRECATED_START
+    const std::vector<std::map<std::string, std::string>> conf_gpu = {
+            // Deprecated
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::YES}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::NO}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE, "0"}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE, "1"}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY, "0"}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY, "1"}},
+
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::YES}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::NO}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, "0"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, "1"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, "0"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, "1"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, "1"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, "4"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, InferenceEngine::PluginConfigParams::YES}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, InferenceEngine::PluginConfigParams::NO}},
+    };
+    IE_SUPPRESS_DEPRECATED_END
+
     const std::vector<std::map<std::string, std::string>> multiconf = {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}}
     };
@@ -92,6 +118,13 @@ namespace {
                 ::testing::ValuesIn(conf)),
             CorrectConfigAPITests::getTestCaseName);
 
+    INSTANTIATE_TEST_CASE_P(smoke_GPU_BehaviorTests, CorrectConfigAPITests,
+            ::testing::Combine(
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                ::testing::ValuesIn(conf_gpu)),
+            CorrectConfigAPITests::getTestCaseName);
+
     INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, CorrectConfigAPITests,
             ::testing::Combine(
                     ::testing::ValuesIn(netPrecisions),
@@ -106,6 +139,13 @@ namespace {
                     ::testing::ValuesIn(autoconf)),
             CorrectConfigAPITests::getTestCaseName);
 
+    INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, CorrectConfigAPITests,
+                            ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
+                            CorrectConfigAPITests::getTestCaseName);
+
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigAPITests,
             ::testing::Combine(
                     ::testing::ValuesIn(netPrecisions),
@@ -124,15 +164,15 @@ namespace {
             ::testing::Combine(
                     ::testing::ValuesIn(netPrecisions),
                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                    ::testing::ValuesIn(autoconf)),
+                    ::testing::ValuesIn(autoinconfigs)),
             IncorrectConfigAPITests::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, IncorrectConfigAPITests,
                             ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
+                                ::testing::ValuesIn(autoinconfigs)),
                             IncorrectConfigAPITests::getTestCaseName);
 
 
-} // namespace
\ No newline at end of file
+} // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
index 3765c75864f..68b23831e47 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
@@ -11,7 +11,7 @@
 #endif
 #include "gpu/gpu_context_api_ocl.hpp"
 
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
index a9a07450f70..59f4dd21677 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "behavior/infer_request_input.hpp"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
@@ -26,9 +26,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU},
-                {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS,
-                    InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
index 550572077c3..1135f6d9f7d 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "behavior/infer_request_output.hpp"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
@@ -22,8 +22,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU},
-                {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp
index c5a5f695359..d8a89ef317d 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp
@@ -14,14 +14,6 @@ namespace {
             {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_GPU}}
     };
 
-    const std::vector<std::map<std::string, std::string>> Autoconfigs = {
-            {{ AUTO_CONFIG_KEY(DEVICE_LIST) , CommonTestUtils::DEVICE_GPU}}
-    };
-
-    const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}}
-    };
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PerfCountersTest,
                             ::testing::Combine(
                                     ::testing::Values(InferenceEngine::Precision::FP32),
@@ -36,18 +28,4 @@ namespace {
                                     ::testing::ValuesIn(Multiconfigs)),
                             PerfCountersTest::getTestCaseName);
 
-    INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, PerfCountersTest,
-                            ::testing::Combine(
-                                    ::testing::Values(InferenceEngine::Precision::FP32),
-                                    ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(Autoconfigs)),
-                            PerfCountersTest::getTestCaseName);
-
-    INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, PerfCountersTest,
-                            ::testing::Combine(
-                                ::testing::Values(InferenceEngine::Precision::FP32),
-                                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
-                            PerfCountersTest::getTestCaseName);
-
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
index bc6507d7905..729bf57c64a 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "behavior/test_plugin.hpp"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
@@ -28,7 +28,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> configsInput = {
@@ -42,18 +42,6 @@ namespace {
              {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
     };
 
-    const std::vector<std::map<std::string, std::string>> AutoConfigsInputOutput = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_GPU}},
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_GPU},
-            {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
-    };
-
-    const std::vector<std::map<std::string, std::string>> AutoCGConfigsInputOutput = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}},
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU},
-            {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
-    };
-
     const std::vector<std::map<std::string, std::string>> configsOutput = {
             {},
             {{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
@@ -77,14 +65,14 @@ namespace {
                             ::testing::Combine(
                                     ::testing::ValuesIn(netPrecisions),
                                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(AutoConfigsInputOutput)),
+                                    ::testing::ValuesIn(AutoConfigs)),
                             BehaviorTestOutput::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, BehaviorTestOutput,
                             ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(AutoCGConfigsInputOutput)),
+                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
                             BehaviorTestOutput::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, BehaviorTests,
@@ -133,14 +121,14 @@ namespace {
                             ::testing::Combine(
                                     ::testing::ValuesIn(netPrecisions),
                                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(AutoConfigsInputOutput)),
+                                    ::testing::ValuesIn(AutoConfigs)),
                             BehaviorTestInput::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, BehaviorTestInput,
                             ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(AutoCGConfigsInputOutput)),
+                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
                             BehaviorTestInput::getTestCaseName);
 
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
index da308c032e2..4fffb2cad6e 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
@@ -4,7 +4,7 @@
 
 #include <string>
 #include <vector>
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 #include "multi/multi_remote_blob_tests.hpp"
 #include "common_test_utils/test_constants.hpp"
 
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 02889dd8dde..07bd2a26098 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -57,5 +57,7 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*LSTMSequence.*CompareWithRefs.*mode=CONVERT_TO_TI_RAND_SEQ_LEN_PARAM_seq.*direction=bidirectional_clip=0.7_netPRC=FP32.*)",
             // TODO: Issue: 54194
             R"(.*ActivationLayerTest.*SoftPlus.*)",
+            // need to implement Export / Import
+            R"(.*IEClassImportExportTestP.*)"
     };
 }
diff --git a/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp b/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp
index aae2e0db8fa..bce1ef10691 100644
--- a/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp
@@ -9,7 +9,7 @@
 #include <map>
 #include <vector>
 #include <ngraph/op/util/attr_types.hpp>
-#include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
 #include "common_test_utils/test_constants.hpp"
 #include "ie_api.h"
@@ -289,8 +289,8 @@ namespace {
                 InferenceEngine::Precision::FP16,
             }), // precision
             ::testing::ValuesIn(std::vector<Config> {
-                {CommonTestUtils::DEVICE_GPU, {{CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING, PluginConfigParams::YES}}},
-                {CommonTestUtils::DEVICE_GPU, {{CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO}}}
+                {CommonTestUtils::DEVICE_GPU, {{GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::YES}}},
+                {CommonTestUtils::DEVICE_GPU, {{GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO}}}
             })), // configuration
         TensorIteratorWithConfigTest::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp
index 8edb52954a8..2eaa15ec866 100644
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp
@@ -38,36 +38,7 @@ INSTANTIATE_TEST_CASE_P(
 // IEClassNetworkTestP tests, customized to add SKIP_IF_CURRENT_TEST_IS_DISABLED()
 //
 
-using IEClassNetworkTestP_VPU = IEClassNetworkTestP;
-
-TEST_P(IEClassNetworkTestP_VPU, smoke_ImportNetworkNoThrowWithDeviceName) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED();
-    Core ie;
-    std::stringstream strm;
-    ExecutableNetwork executableNetwork;
-    ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-    ASSERT_NO_THROW(executableNetwork.Export(strm));
-    ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
-    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-}
-
-TEST_P(IEClassNetworkTestP_VPU, smoke_ExportUsingFileNameImportFromStreamNoThrowWithDeviceName) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED();
-    Core ie;
-    ExecutableNetwork executableNetwork;
-    std::string fileName{"ExportedNetwork"};
-    {
-        ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-        ASSERT_NO_THROW(executableNetwork.Export(fileName));
-    }
-    {
-        std::ifstream strm(fileName);
-        ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
-    }
-    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-}
-
-using IEClassNetworkTestP_VPU_GetMetric = IEClassNetworkTestP_VPU;
+using IEClassNetworkTestP_VPU_GetMetric = IEClassNetworkTestP;
 
 TEST_P(IEClassNetworkTestP_VPU_GetMetric, smoke_OptimizationCapabilitiesReturnsFP16) {
     Core ie;
@@ -86,13 +57,13 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::ValuesIn(devices));
 
 INSTANTIATE_TEST_CASE_P(
-        smoke_IEClassImportExportTestP, IEClassNetworkTestP_VPU,
+        smoke_IEClassImportExportTestP, IEClassImportExportTestP,
         ::testing::Values(std::string(CommonTestUtils::DEVICE_MYRIAD), "HETERO:" + std::string(CommonTestUtils::DEVICE_MYRIAD)));
 
 #if defined(ENABLE_MKL_DNN) && ENABLE_MKL_DNN
 
 INSTANTIATE_TEST_CASE_P(
-        smoke_IEClassImportExportTestP_HETERO_CPU, IEClassNetworkTestP_VPU,
+        smoke_IEClassImportExportTestP_HETERO_CPU, IEClassImportExportTestP,
         ::testing::Values("HETERO:" + std::string(CommonTestUtils::DEVICE_MYRIAD) + ",CPU"));
 #endif
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp
index b9caf6edacf..e13fe679b2a 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp
@@ -57,8 +57,7 @@ namespace BehaviorTestsDefinitions {
         // Create CNNNetwork from ngrpah::Function
         InferenceEngine::CNNNetwork cnnNet(function);
         if (targetDevice.find(CommonTestUtils::DEVICE_MULTI) == std::string::npos &&
-            targetDevice.find(CommonTestUtils::DEVICE_HETERO) == std::string::npos &&
-            targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+            targetDevice.find(CommonTestUtils::DEVICE_HETERO) == std::string::npos) {
             ASSERT_NO_THROW(ie->GetMetric(targetDevice, METRIC_KEY(SUPPORTED_CONFIG_KEYS)));
             ASSERT_THROW(ie->SetConfig(configuration, targetDevice),
                          InferenceEngine::Exception);
@@ -73,8 +72,12 @@ namespace BehaviorTestsDefinitions {
         SKIP_IF_CURRENT_TEST_IS_DISABLED()
         // Create CNNNetwork from ngrpah::Function
         InferenceEngine::CNNNetwork cnnNet(function);
-        ASSERT_THROW(auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration),
-                     InferenceEngine::Exception);
+        if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) != std::string::npos) {
+            GTEST_SKIP();
+        } else {
+            ASSERT_THROW(auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration),
+                         InferenceEngine::Exception);
+        }
     }
 
     using IncorrectConfigAPITests = BehaviorTestsUtils::BehaviorTestsBasic;
@@ -110,8 +113,10 @@ namespace BehaviorTestsDefinitions {
             ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
         }
         // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-        execNet.CreateInferRequest();
+        if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+            auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+            execNet.CreateInferRequest();
+        }
 
         if ((targetDevice == CommonTestUtils::DEVICE_HDDL) || (targetDevice == CommonTestUtils::DEVICE_GNA)) {
             ASSERT_EQ(0u, InferenceEngine::ExecutorManager::getInstance()->getExecutorsNumber());
@@ -139,8 +144,10 @@ namespace BehaviorTestsDefinitions {
             ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
         }
         // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-        execNet.CreateInferRequest();
+        if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+            auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+            execNet.CreateInferRequest();
+        }
 
         if ((targetDevice == CommonTestUtils::DEVICE_MYRIAD) ||
             (targetDevice == CommonTestUtils::DEVICE_KEEMBAY)) {
@@ -170,8 +177,10 @@ namespace BehaviorTestsDefinitions {
                 ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
             }
             // Load CNNNetwork to target plugins
-            auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-            execNet.CreateInferRequest();
+            if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+                auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+                execNet.CreateInferRequest();
+            }
 
             if ((targetDevice == CommonTestUtils::DEVICE_MYRIAD) ||
                 (targetDevice == CommonTestUtils::DEVICE_KEEMBAY)) {
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
index 834db01006f..adcd0e525b4 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
@@ -61,6 +61,7 @@ namespace BehaviorTestsDefinitions {
     }                                                                           \
 }
 
+
 class IEClassBasicTestP : public ::testing::Test, public WithParamInterface<std::pair<std::string, std::string> > {
 protected:
     std::string deviceName;
@@ -424,7 +425,16 @@ TEST_P(IEClassBasicTestP, ImportNetworkThrows) {
 
     if (deviceName == CommonTestUtils::DEVICE_CPU ||
         deviceName == CommonTestUtils::DEVICE_GPU) {
-        ASSERT_THROW(ie.ImportNetwork("model", deviceName), NotImplemented);
+        ASSERT_THROW(ie.ImportNetwork("model", deviceName), NetworkNotRead);
+
+        const std::string modelName = "compiled_blob.blob";
+        {
+            std::ofstream file(modelName);
+            file << "content";
+        }
+
+        EXPECT_THROW(ie.ImportNetwork(modelName, deviceName), NotImplemented);
+        ASSERT_EQ(0, std::remove(modelName.c_str()));
     }
 }
 
@@ -432,13 +442,13 @@ TEST(IEClassBasicTest, smoke_ImportNetworkHeteroThrows) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
     Core ie;
 
-    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_HETERO), Exception);
+    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_HETERO), NetworkNotRead);
 }
 
 TEST(IEClassBasicTest, smoke_ImportNetworkMultiThrows) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
     InferenceEngine::Core ie;
-    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_MULTI), Exception);
+    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_MULTI), NetworkNotRead);
 }
 
 TEST_P(IEClassBasicTestP, ImportNetworkWithNullContextThrows) {
@@ -474,19 +484,18 @@ TEST_P(IEClassNetworkTestP, LoadNetworkActualHeteroDevice2NoThrow) {
 //
 // ImportExportNetwork
 //
-TEST_P(IEClassImportExportTestP, smoke_ImportNetworkNoThrowIfNoDeviceName) {
+
+TEST_P(IEClassImportExportTestP, smoke_ImportNetworkThrowsIfNoDeviceName) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
     Core ie;
     std::stringstream strm;
     ExecutableNetwork executableNetwork;
     ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-    SKIP_IF_NOT_IMPLEMENTED(executableNetwork.Export(strm));
-    if (!strm.str().empty()) {
-        SKIP_IF_NOT_IMPLEMENTED(executableNetwork = ie.ImportNetwork(strm));
-    }
-    if (executableNetwork) {
-        ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-    }
+    ASSERT_NO_THROW(executableNetwork.Export(strm));
+
+    IE_SUPPRESS_DEPRECATED_START
+    ASSERT_THROW(executableNetwork = ie.ImportNetwork(strm), Exception);
+    IE_SUPPRESS_DEPRECATED_END
 }
 
 TEST_P(IEClassImportExportTestP, smoke_ImportNetworkNoThrowWithDeviceName) {
@@ -495,11 +504,9 @@ TEST_P(IEClassImportExportTestP, smoke_ImportNetworkNoThrowWithDeviceName) {
     std::stringstream strm;
     ExecutableNetwork executableNetwork;
     ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-    SKIP_IF_NOT_IMPLEMENTED(executableNetwork.Export(strm));
-    SKIP_IF_NOT_IMPLEMENTED(executableNetwork = ie.ImportNetwork(strm, deviceName));
-    if (executableNetwork) {
-        ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-    }
+    ASSERT_NO_THROW(executableNetwork.Export(strm));
+    ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
+    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
 }
 
 TEST_P(IEClassImportExportTestP, smoke_ExportUsingFileNameImportFromStreamNoThrowWithDeviceName) {
@@ -509,18 +516,16 @@ TEST_P(IEClassImportExportTestP, smoke_ExportUsingFileNameImportFromStreamNoThro
     std::string fileName{"ExportedNetwork"};
     {
         ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(simpleNetwork, deviceName));
-        SKIP_IF_NOT_IMPLEMENTED(executableNetwork.Export(fileName));
+        ASSERT_NO_THROW(executableNetwork.Export(fileName));
     }
-    if (CommonTestUtils::fileExists(fileName)) {
+    {
         {
             std::ifstream strm(fileName);
-            SKIP_IF_NOT_IMPLEMENTED(executableNetwork = ie.ImportNetwork(strm, deviceName));
+            ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
         }
         ASSERT_EQ(0, remove(fileName.c_str()));
     }
-    if (executableNetwork) {
-        ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-    }
+    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
 }
 
 //
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp
index cb364c80f8c..c9469401123 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp
@@ -42,8 +42,10 @@ TEST_P(InferConfigTests, canSetExclusiveAsyncRequests) {
         ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
     }
     // Load CNNNetwork to target plugins
-    auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-    execNet.CreateInferRequest();
+    if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+        execNet.CreateInferRequest();
+    }
 
     if ((targetDevice == CommonTestUtils::DEVICE_HDDL) || (targetDevice == CommonTestUtils::DEVICE_GNA)) {
         ASSERT_EQ(0u, InferenceEngine::ExecutorManager::getInstance()->getExecutorsNumber());
@@ -71,8 +73,10 @@ TEST_P(InferConfigTests, withoutExclusiveAsyncRequests) {
         ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
     }
     // Load CNNNetwork to target plugins
-    auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-    execNet.CreateInferRequest();
+    if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+        execNet.CreateInferRequest();
+    }
 
     if ((targetDevice == CommonTestUtils::DEVICE_GNA) || (targetDevice == CommonTestUtils::DEVICE_HDDL)) {
         ASSERT_EQ(0u, InferenceEngine::ExecutorManager::getInstance()->getExecutorsNumber());
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
index de442f12c21..ff294866858 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
@@ -157,16 +157,17 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessSetBlob) {
     auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess();
     preProcess.init(3);
     for (size_t i = 0; i < 3; i++) {
-        preProcess[i]->meanData = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
-                                                                                       {10, 10},
-                                                                                       InferenceEngine::Layout::HW));
-        preProcess[i]->meanData->allocate();
-        auto lockedMem = preProcess[i]->meanData->buffer();
+        auto meanData = make_blob_with_precision(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,  {10, 10},
+            InferenceEngine::Layout::HW));
+        meanData->allocate();
+        auto lockedMem = meanData->buffer();
         auto* data = lockedMem.as<float *>();
         for (size_t j = 0; j < 100; j++) {
             data[j] = 0;
             data[j] -= i * 100 + j;
         }
+        ASSERT_NO_THROW(preProcess.setMeanImageForChannel(meanData, i));
     }
     preProcess.setVariant(InferenceEngine::MEAN_IMAGE);
     // Load CNNNetwork to target plugins
diff --git a/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp b/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
index 0db7264cb74..c30945dc914 100644
--- a/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
@@ -69,13 +69,16 @@ void ImportNetworkTestBase::Run() {
 
     for (const auto& next_input : importedExecNetwork.GetInputsInfo()) {
         ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]);
+        Compare(next_input.second->getTensorDesc(), compiledExecNetwork.GetInputsInfo()[next_input.first]->getTensorDesc());
     }
     for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) {
         ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]);
     }
     auto importedOutputs = GetOutputs();
     ASSERT_EQ(actualOutputs.size(), importedOutputs.size());
+
     for (size_t i = 0; i < actualOutputs.size(); i++) {
+        Compare(actualOutputs[i]->getTensorDesc(), importedOutputs[i]->getTensorDesc());
         Compare(actualOutputs[i], importedOutputs[i]);
     }
 }
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
index 9b8b78b6ef0..9d132515743 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
@@ -72,6 +72,8 @@ public:
 
     virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual);
 
+    virtual void Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc);
+
     virtual void SetRefMode(RefMode mode);
 
     std::shared_ptr<ngraph::Function> GetFunction();
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
index bdf996cd141..684942ee184 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
@@ -96,13 +96,11 @@ protected:
 };
 
 class ActivationParamLayerTest : public ActivationLayerTest {
-public:
-    void Infer() override;
-
 protected:
     void SetUp() override;
 
 private:
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
     void generateActivationBlob(std::vector<float> constantsValue);
     ngraph::ParameterVector createActivationParams(
         ngraph::element::Type ngPrc, std::vector<size_t> inShape = {});
diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
index cc3927b25c5..056826aff86 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
@@ -274,6 +274,17 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const
     }
 }
 
+void LayerTestsCommon::Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc) {
+    auto expectedDims = actualDesc.getDims();
+    auto actualDims = expectedDesc.getDims();
+    ASSERT_EQ(actualDims.size(), expectedDims.size());
+    for (size_t j = 0; j < actualDims.size(); ++j) {
+        ASSERT_EQ(actualDims.at(j), expectedDims.at(j));
+    }
+    ASSERT_EQ(actualDesc.getLayout(), expectedDesc.getLayout());
+    ASSERT_EQ(actualDesc.getPrecision(), expectedDesc.getPrecision());
+}
+
 void LayerTestsCommon::ConfigureNetwork() {
     for (const auto &in : cnnNetwork.getInputsInfo()) {
         if (inLayout != InferenceEngine::Layout::ANY) {
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
index 3136c604e7d..5b90cfc2079 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
@@ -41,6 +41,13 @@ void ActivationLayerTest::SetUp() {
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
     auto params = ngraph::builder::makeParams(ngPrc, {shapes.first});
     params[0]->set_friendly_name("Input");
+
+    if (activationType == ngraph::helpers::ActivationTypes::PReLu && constantsValue.empty()) {
+        const auto elemnts_count = ngraph::shape_size(shapes.second);
+        constantsValue.resize(elemnts_count);
+        std::iota(constantsValue.begin(), constantsValue.end(), -10);
+    }
+
     auto activation = ngraph::builder::makeActivation(params[0], ngPrc, activationType, shapes.second, constantsValue);
 
     function = std::make_shared<ngraph::Function>(ngraph::NodeVector{activation}, params);
@@ -163,70 +170,26 @@ ngraph::ParameterVector ActivationParamLayerTest::createActivationParams(ngraph:
     }
 }
 
-void ActivationParamLayerTest::generateActivationBlob(std::vector<float> constantsValue) {
-    switch (activationType) {
-        case ngraph::helpers::ActivationTypes::PReLu: {
-            auto blobNegativeSlope = inferRequest.GetBlob("negativeSlope");
-            float negativeSlope = constantsValue[0];
-            blobNegativeSlope = FuncTestUtils::createAndFillBlobWithFloatArray(blobNegativeSlope->getTensorDesc(), &negativeSlope, 1);
-            inferRequest.SetBlob("negativeSlope", blobNegativeSlope);
-            inputs.push_back(blobNegativeSlope);
-            break;
-        }
-        case ngraph::helpers::ActivationTypes::LeakyRelu: {
-            auto blobLeakySlope = inferRequest.GetBlob("leakySlope");
-            float leakySlope = constantsValue[0];
-            blobLeakySlope = FuncTestUtils::createAndFillBlobWithFloatArray(blobLeakySlope->getTensorDesc(), &leakySlope, 1);
-            inferRequest.SetBlob("leakySlope", blobLeakySlope);
-            inputs.push_back(blobLeakySlope);
-            break;
-        }
-        case ngraph::helpers::ActivationTypes::HardSigmoid: {
-            auto blobHardSigmoidAlpha = inferRequest.GetBlob("alpha");
-            auto blobHardSigmoidBeta = inferRequest.GetBlob("beta");
-            float alpha = constantsValue[0], beta = constantsValue[1];
-            blobHardSigmoidAlpha = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidAlpha->getTensorDesc(), &alpha, 1);
-            blobHardSigmoidBeta = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidBeta->getTensorDesc(), &beta, 1);
-            inferRequest.SetBlob("alpha", blobHardSigmoidAlpha);
-            inferRequest.SetBlob("beta", blobHardSigmoidBeta);
-            inputs.push_back(blobHardSigmoidAlpha);
-            inputs.push_back(blobHardSigmoidBeta);
-            break;
-        }
-        case ngraph::helpers::ActivationTypes::Selu: {
-            auto blobHardSigmoidAlpha = inferRequest.GetBlob("alpha");
-            auto blobHardSigmoidLambda = inferRequest.GetBlob("lambda");
-            float alpha = constantsValue[0], lambda = constantsValue[1];
-            blobHardSigmoidAlpha = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidAlpha->getTensorDesc(), &alpha, 1);
-            blobHardSigmoidLambda = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidLambda->getTensorDesc(), &lambda, 1);
-            inferRequest.SetBlob("alpha", blobHardSigmoidAlpha);
-            inferRequest.SetBlob("lambda", blobHardSigmoidLambda);
-            inputs.push_back(blobHardSigmoidAlpha);
-            inputs.push_back(blobHardSigmoidLambda);
-            break;
-        }
-        default:
-            IE_THROW() << "Unsupported activation type for Params test type";
+InferenceEngine::Blob::Ptr ActivationParamLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
+    InferenceEngine::Blob::Ptr blobPtr;
+    const std::string& name = info.name();
+    if (name == "negativeSlope") {
+        const auto elemnts_count = ngraph::shape_size(function->get_parameters()[1]->get_shape());
+        std::vector<float> param_data(elemnts_count);
+        std::iota(param_data.begin(), param_data.end(), -10);
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &param_data[0], elemnts_count);
+    } else if (name == "leakySlope") {
+        const auto elemnts_count = ngraph::shape_size(function->get_parameters()[1]->get_shape());
+        std::vector<float> param_data(elemnts_count, constantsValue[0]);
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &param_data[0], elemnts_count);
+    } else if (name == "alpha") {
+         blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &constantsValue[0], 1);
+    } else if (name == "beta" || name == "lambda") {
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &constantsValue[1], 1);
+    } else {
+        blobPtr = FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 20, -10, 1);
     }
-}
-
-void ActivationParamLayerTest::Infer() {
-    inferRequest = executableNetwork.CreateInferRequest();
-
-    auto blobInput = inferRequest.GetBlob("Input");
-    blobInput = FuncTestUtils::createAndFillBlobFloat(blobInput->getTensorDesc());
-    inferRequest.SetBlob("Input", blobInput);
-    inputs.push_back(blobInput);
-
-    generateActivationBlob(constantsValue);
-
-    if (configuration.count(InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED) &&
-        configuration.count(InferenceEngine::PluginConfigParams::YES)) {
-        auto batchSize = executableNetwork.GetInputsInfo().begin()->second->getTensorDesc().getDims()[0] / 2;
-        inferRequest.SetBatch(batchSize);
-    }
-
-    inferRequest.Infer();
+    return blobPtr;
 }
 
 void ActivationParamLayerTest::SetUp() {
@@ -245,7 +208,8 @@ void ActivationParamLayerTest::SetUp() {
     params.insert(params.end(), activationParams.begin(), activationParams.end());
 
     auto activation = ngraph::builder::makeActivation(params, ngPrc, activationType);
-    function = std::make_shared<ngraph::Function>(ngraph::NodeVector{activation}, params);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(activation)};
+    function = std::make_shared<ngraph::Function>(results, params);
 }
 
 void ActivationDynamicLayerTest::Run() {
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html
index 26f0923e144..7d6f751f917 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html
@@ -43,11 +43,11 @@
     </thead>
 </table>
 <table class="table table-hover" id="report">
-    <thead style="position: sticky; top: 0">
+    <thead>
     <tr>
-        <th class="table-primary" scope="col">Operation</th>
+        <th class="table-primary" scope="col" style="position: sticky; top: 0">Operation</th>
         {% for d in devices -%}
-        <th class="table-primary">{{ d }}</th>
+        <th class="table-primary" style="position: sticky; top: 0">{{ d }}</th>
         {% endfor %}
     </tr>
     </thead>
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
index 2a68e96c19e..82e2ff61f33 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
@@ -32,11 +32,8 @@ public:
     MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr));
     MOCK_METHOD1(SetConfig, void(const std::map <std::string, std::string> &));
 
-    using InferenceEngine::IInferencePlugin::ImportNetwork;
-
-    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> ImportNetworkImpl(std::istream& stream,
-                                                                                   const std::map <std::string, std::string>&) {
-        std::getline(stream, importedString);
+    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
+    ImportNetwork(std::istream& stream, const std::map <std::string, std::string>&) {
         return {};
     }
 
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
index 4e221be90f7..c1cc30a944b 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
@@ -34,8 +34,4 @@ public:
     void WrapOstreamExport(std::ostream& networkModel) {
         IExecutableNetworkInternal::Export(networkModel);
     }
-    const std::string exportString = "MockExecutableNetworkInternal";
-    void ExportImpl(std::ostream& networkModel) override {
-        networkModel << exportString << std::endl;
-    }
 };
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
index cd2e7b95f46..4408614f61c 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
@@ -70,8 +70,8 @@ MockPlugin::LoadExeNetworkImpl(const CNNNetwork& network,
 }
 
 std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-MockPlugin::ImportNetworkImpl(std::istream& networkModel,
-                              const std::map<std::string, std::string>& config) {
+MockPlugin::ImportNetwork(std::istream& networkModel,
+                          const std::map<std::string, std::string>& config) {
     if (_target) {
         return _target->ImportNetwork(networkModel, config);
     } else {
@@ -80,9 +80,9 @@ MockPlugin::ImportNetworkImpl(std::istream& networkModel,
 }
 
 std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-MockPlugin::ImportNetworkImpl(std::istream& networkModel,
-                              const std::shared_ptr<InferenceEngine::RemoteContext>& context,
-                              const std::map<std::string, std::string>& config) {
+MockPlugin::ImportNetwork(std::istream& networkModel,
+                         const std::shared_ptr<InferenceEngine::RemoteContext>& context,
+                         const std::map<std::string, std::string>& config) {
     if (_target) {
         return _target->ImportNetwork(networkModel, context, config);
     } else {
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
index c01a8a8d175..c2654061abd 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
@@ -35,13 +35,13 @@ public:
                 const std::map<std::string, std::string> &config) override;
 
     std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-    ImportNetworkImpl(std::istream& networkModel,
+    ImportNetwork(std::istream& networkModel,
         const std::map<std::string, std::string>& config) override;
 
     std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-    ImportNetworkImpl(std::istream& networkModel,
-                      const std::shared_ptr<InferenceEngine::RemoteContext>& context,
-                      const std::map<std::string, std::string>& config) override;
+    ImportNetwork(std::istream& networkModel,
+        const std::shared_ptr<InferenceEngine::RemoteContext>& context,
+        const std::map<std::string, std::string>& config) override;
 
     InferenceEngine::Parameter GetMetric(const std::string& name,
                         const std::map<std::string, InferenceEngine::Parameter>& options) const override;
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
index c0c1686ca55..f70f653efe2 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
@@ -51,6 +51,12 @@ public:
         const FakeQuantizeOnData& fqOnData1,
         const FakeQuantizeOnData& fqOnData2);
 
+    static std::shared_ptr<ngraph::Function> getOriginalWithIntermediateAvgPool(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fqOnData1,
+        const FakeQuantizeOnData& fqOnData2);
+
     static std::shared_ptr<ngraph::Function> getOriginalWithSplitedIntermediate(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
@@ -134,6 +140,7 @@ public:
         const std::string& neighborType,
         const std::string& additionalLayer);
 
+    // TODO: refactor: dequantizationBefore2 <=> dequantizationOperations2
     static std::shared_ptr<ngraph::Function> getReferenceWithIntermediate(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
@@ -142,6 +149,18 @@ public:
         const FakeQuantizeOnData& fqOnData2,
         const ngraph::element::Type precisionBeforeOp,
         const DequantizationOperations& dequantizationBefore1,
+        const DequantizationOperations& dequantizationOperations2,
+        const ngraph::element::Type precisionAfterOperation,
+        const DequantizationOperations& dequantizationOperations1,
+        const DequantizationOperations& dequantizationBefore2);
+
+    static std::shared_ptr<ngraph::Function> getReferenceWithIntermediateAvgPool(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fqOnData1,
+        const FakeQuantizeOnData& fqOnData2,
+        const ngraph::element::Type precisionBeforeOp,
+        const DequantizationOperations& dequantizationBefore1,
         const DequantizationOperations& dequantizationBefore2,
         const ngraph::element::Type precisionAfterOperation,
         const DequantizationOperations& dequantizationOperations1,
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
index 15108abb73e..37387977eb7 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
@@ -272,6 +272,58 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithIntermediate(
     return function;
 }
 
+std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithIntermediateAvgPool(
+    const ngraph::element::Type precision,
+    const ngraph::Shape& inputShape,
+    const FakeQuantizeOnData& fqOnData1,
+    const FakeQuantizeOnData& fqOnData2) {
+    const std::vector<size_t> inputShape1 = { inputShape[0], inputShape[1], inputShape[2] - 2, inputShape[3] - 2 };
+
+    const auto input1 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
+    input1->set_friendly_name("input1");
+    const auto fakeQuantize1 = makeFakeQuantize(input1, precision, fqOnData1);
+    fakeQuantize1->set_friendly_name("fakeQuantize1");
+
+    const std::vector<size_t> inputShape2 = { inputShape[0], inputShape[1], inputShape[2], inputShape[3] };
+    const auto input2 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape2));
+    input2->set_friendly_name("input2");
+
+    const auto fakeQuantize2 = makeFakeQuantize(input2, precision, fqOnData2);
+    fakeQuantize2->set_friendly_name("fakeQuantize2");
+
+    std::shared_ptr<Node> intermediateOp = makeMaxPool(fakeQuantize2->output(0), { 3, 3 });
+    intermediateOp->set_friendly_name("intermediate");
+
+    const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(
+        ngraph::OutputVector{ fakeQuantize1->output(0), intermediateOp->output(0) }, 1);
+    concat->set_friendly_name("concat");
+
+    auto& rtInfo = concat->get_rt_info();
+    rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("concat");
+
+    std::shared_ptr<Node> parent2 = std::make_shared<ngraph::opset1::AvgPool>(
+        intermediateOp,
+        Strides{ 1, 1 },
+        Shape{ 1, 1 },
+        Shape{ 0, 0 },
+        Shape{ 2, 2 },
+        true,
+        op::RoundingType::FLOOR);
+    parent2->set_friendly_name("avgPool");
+
+    ngraph::ResultVector results {
+        std::make_shared<ngraph::opset1::Result>(concat),
+        std::make_shared<ngraph::opset1::Result>(parent2)
+    };
+
+    std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
+        results,
+        ngraph::ParameterVector{ input1, input2 },
+        "ConcatWithIntermediateTransformation");
+
+    return function;
+}
+
 std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithSplitedIntermediate(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,
@@ -1056,6 +1108,77 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithIntermediate(
     return function;
 }
 
+std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithIntermediateAvgPool(
+    const ngraph::element::Type precision,
+    const ngraph::Shape& inputShape,
+    const FakeQuantizeOnData& fqOnData1,
+    const FakeQuantizeOnData& fqOnData2,
+    const ngraph::element::Type precisionBeforeOp,
+    const DequantizationOperations& dequantizationBefore1,
+    const DequantizationOperations& dequantizationBefore2,
+    const ngraph::element::Type precisionAfterOperation,
+    const DequantizationOperations& dequantizationAfter1,
+    const DequantizationOperations& dequantizationAfter2) {
+    const std::vector<size_t> inputShape1 = { inputShape[0], inputShape[1], inputShape[2] - 2, inputShape[3] - 2};
+    const auto input1 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
+    input1->set_friendly_name("input1");
+
+    const auto fakeQuantize1 = makeFakeQuantizeTypeRelaxed(input1, precision, fqOnData1);
+    low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize1, precisionBeforeOp);
+    fakeQuantize1->set_friendly_name("fakeQuantize1");
+    const auto deqBefore1 = makeDequantization(fakeQuantize1, dequantizationBefore1);
+
+    const std::vector<size_t> inputShape2 = { inputShape[0], inputShape[1], inputShape[2], inputShape[3] };
+    const auto input2 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape2));
+    input2->set_friendly_name("input2");
+
+    const auto fakeQuantize2 = makeFakeQuantizeTypeRelaxed(input2, precision, fqOnData2);
+    low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize2, precisionBeforeOp);
+    fakeQuantize2->set_friendly_name("fakeQuantize2");
+    const auto deqBefore2 = makeDequantization(fakeQuantize2, dequantizationBefore2);
+
+    std::shared_ptr<Node> intermediateOp  = makeMaxPool(deqBefore2, { 3, 3 });
+    intermediateOp->set_friendly_name("intermediate");
+
+    const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(
+        ngraph::OutputVector { deqBefore1, intermediateOp },
+        1);
+    concat->set_friendly_name("concat");
+    low_precision::NetworkHelper::setOutDataPrecision(concat, precisionAfterOperation);
+
+    auto& rtInfo = concat->get_rt_info();
+    rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("concat");
+
+    const std::shared_ptr<ngraph::Node> parent1 = makeDequantization(concat, dequantizationAfter1);
+    parent1->set_friendly_name("concat");
+
+    std::shared_ptr<Node> parent2 = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::AvgPool>>(
+        std::vector<ngraph::element::Type>{ element::f32, element::f32 },
+        std::vector<ngraph::element::Type>{ element::f32 },
+        ngraph::op::TemporaryReplaceOutputType(intermediateOp, element::f32).get(),
+        Strides{ 1, 1 },
+        Shape{ 1, 1 },
+        Shape{ 0, 0 },
+        Shape{ 2, 2 },
+        true,
+        op::RoundingType::FLOOR);
+    parent2->set_friendly_name("avgPool");
+
+    parent2 = makeDequantization(parent2, dequantizationAfter2);
+
+    ngraph::ResultVector results {
+        std::make_shared<ngraph::opset1::Result>(parent1),
+        std::make_shared<ngraph::opset1::Result>(parent2)
+    };
+
+    std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
+        results,
+        ngraph::ParameterVector{ input1, input2 },
+        "ConcatWithIntermediateTransformation");
+
+    return function;
+}
+
 std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithSplitedIntermediate(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
index 0945510d7a0..2d26c7bd0e2 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
@@ -149,29 +149,6 @@ TEST_F(InferenceEnginePluginInternalTest, failToSetNotAllocatedBlob) {
     }
 }
 
-TEST_F(InferenceEnginePluginInternalTest, executableNetworkInternalExportsMagicAndName) {
-    std::stringstream strm;
-    ASSERT_NO_THROW(mockIExeNetworkInternal->WrapOstreamExport(strm));
-    ExportMagic actualMagic = {};
-    strm.read(actualMagic.data(), actualMagic.size());
-    ASSERT_EQ(exportMagic, actualMagic);
-    std::string pluginName;
-    std::getline(strm, pluginName);
-    ASSERT_EQ(pluginId, pluginName);
-    std::string exportedString;
-    std::getline(strm, exportedString);
-    ASSERT_EQ(mockIExeNetworkInternal->exportString, exportedString);
-}
-
-TEST_F(InferenceEnginePluginInternalTest, pluginInternalEraseMagicAndNameWhenImports) {
-    std::stringstream strm;
-    ASSERT_NO_THROW(mockIExeNetworkInternal->WrapOstreamExport(strm));
-    ASSERT_NO_THROW(mock_plugin_impl->ImportNetwork(strm, {}));
-    ASSERT_EQ(mockIExeNetworkInternal->exportString, mock_plugin_impl->importedString);
-    mock_plugin_impl->importedString = {};
-}
-
-
 TEST(InferencePluginTests, throwsOnUninitializedGetVersion) {
     InferencePlugin plg;
     ASSERT_THROW(plg.GetVersion(), Exception);
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp
index ef988cb12da..97eacdf9ae1 100644
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -1176,9 +1176,6 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
     size_t opt_deconv_layers_b_fs_zyx_fsv16 = 0;
     size_t total_crop_layers = 0;
 
-    size_t weighted_sum_feature_size = 0;
-    size_t weight_sum = 0;
-
     for (auto& node : get_processing_order()) {
         auto &prim = *node;
         if (prim.type() == cldnn::convolution::type_id()) {
@@ -1324,35 +1321,4 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
 
     if (should_use_bs_fs_yx_bsv16_fsv16)
         lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1);
-
-
-    // This is to avoid using fsv16 for shallow-feature networks.
-    // This may not be exactly same as real execution graph as layer fusing is not done yet,
-    // but it is a reasonable approximation.
-    // Check the expected network efficiency after setting layer optimization attributes.
-    // If network depth is shallow, it is faster with fsv4.
-    for (auto& node : get_processing_order()) {
-        auto &prim = *node;
-
-        if (prim.is_in_data_flow() && prim.type() == cldnn::convolution::type_id()) {
-            size_t num_feature = prim.get_output_layout().size.feature.vector()[0];
-            size_t num_spatial = 1;
-            for (auto s : prim.get_output_layout().size.spatial.vector())
-                num_spatial *= s;
-
-            if (lo.get_preferred_format(prim) != format::b_fs_yx_fsv4) {
-                weight_sum += num_spatial;
-                weighted_sum_feature_size += num_spatial * num_feature;
-            }
-        }
-    }
-
-    size_t weighted_average_feature_depth = weighted_sum_feature_size / std::max(weight_sum, static_cast<size_t>(1));
-
-    // Need to confirm that weighted_average_feature_depth > 1 to keep unittest behavior.
-    if (is_quantized_int8_model && weighted_average_feature_depth < 8 && weighted_average_feature_depth > 1) {
-        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::fs_b_yx_fsv32_network, 0);
-        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::b_fs_yx_fsv16_network, 0);
-        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 0);
-    }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index aa47fcd2a03..87516e47dae 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit aa47fcd2a03ee5caac119b6417bc66abe3154aab
+Subproject commit 87516e47dae71fc9c326d0f3685c1572c740e127
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index be0dc1c37e0..e7c779543cd 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -57,6 +57,7 @@ extensions/back/ReverseInputChannels.py
 extensions/back/RNNSequenceTypeRename.py
 extensions/back/ScalarConstNormalize.py
 extensions/back/SelectBroadcast.py
+extensions/back/ShapeOfConstFolding.py
 extensions/back/ShuffleChannelPatternOptimization.py
 extensions/back/ShufflenetReLUReorder.py
 extensions/back/SpecialNodesFinalization.py
diff --git a/model-optimizer/extensions/back/ShapeOfConstFolding.py b/model-optimizer/extensions/back/ShapeOfConstFolding.py
new file mode 100644
index 00000000000..b97b46fc35d
--- /dev/null
+++ b/model-optimizer/extensions/back/ShapeOfConstFolding.py
@@ -0,0 +1,29 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph, rename_nodes
+from mo.ops.const import Const
+
+
+class ShapeOfConstFolding(BackReplacementPattern):
+    """
+    The transformation folds ShapeOf(Const) -> Const
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.back.MatMulNormalizer import SmartReshape_HC_Reshape_MatMul
+        return [SmartReshape_HC_Reshape_MatMul]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for shapeof_node in graph.get_op_nodes(op='ShapeOf'):
+            in_node = shapeof_node.in_port(0).get_source().node
+            if in_node.op == 'Const':
+                shapeof_node.in_port(0).disconnect()
+                shape_name = shapeof_node.soft_get('name', shapeof_node.id)
+                shape_value = shapeof_node.out_port(0).data.get_value()
+                shape_const_node = Const(graph, {'name': shape_name + '/ExecutionConstValue',
+                                                 'value': shape_value}).create_node()
+                shapeof_node.out_port(0).get_connection().set_source(shape_const_node.out_port(0))
+                rename_nodes([(shapeof_node, shape_name + '/TBD'), (shape_const_node, shape_name)])
diff --git a/model-optimizer/extensions/front/onnx/top_k_ext.py b/model-optimizer/extensions/front/onnx/top_k_ext.py
index bae2bec3176..e073c593531 100644
--- a/model-optimizer/extensions/front/onnx/top_k_ext.py
+++ b/model-optimizer/extensions/front/onnx/top_k_ext.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import numpy as np
+
 from extensions.ops.topk import TopK
 from mo.front.extractor import FrontExtractorOp
 from mo.front.onnx.extractors.utils import onnx_attr, onnx_node_has_attr
@@ -18,7 +20,8 @@ class TopKExtractor(FrontExtractorOp):
         TopK-11 (k as input, sorting manipulations through `sorted` and `largest` attrs)
         """
         attrs = {
-            'axis': onnx_attr(node, 'axis', 'i', default=-1)
+            'axis': onnx_attr(node, 'axis', 'i', default=-1),
+            'index_element_type': np.int64
         }
         if onnx_node_has_attr(node, 'k'):
             attrs['k'] = onnx_attr(node, 'k', 'i')
diff --git a/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py b/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py
index ffd3c27eebd..c462d45285f 100644
--- a/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py
+++ b/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py
@@ -34,12 +34,13 @@ def replace_resize(graph: Graph, resize: Node):
         log.warning('The input shape is not 4D or 5D for op with name {}'.format(resize_name))
         return
 
-    num_of_inputs = len([port for port in resize.in_ports().values() if not port.disconnected()])
-    assert num_of_inputs in {3, 4}, \
-        "Number of inputs of ONNXResize (with name {}) should be equal to 3 or 4".format(resize_name)
+    assert (resize.is_in_port_connected(0) and (resize.is_in_port_connected(2) or resize.is_in_port_connected(3))), \
+        "Scales or sizes inputs must be connected to Node {} with op {}.".format(resize.soft_get("name", resize.id),
+                                                                                 resize.op)
 
     assert resize.soft_get('coordinate_transformation_mode') != 'tf_crop_and_resize', \
-        'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(resize.op, resize_name)
+        'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(resize.op,
+                                                                                 resize.soft_get("name", resize.id))
 
     layout = graph.graph['layout']
 
@@ -74,7 +75,7 @@ def replace_resize(graph: Graph, resize: Node):
                       {'name': resize_name + '/axis',
                        'value': int64_array(np.arange(begin_dim, end_dim))}).create_node()
 
-    shape_calculation_mode = 'scales' if num_of_inputs == 3 else 'sizes'
+    shape_calculation_mode = 'sizes' if resize.is_in_port_connected(3) else 'scales'
 
     interpolate_node = Interpolate(graph, {'version': 'opset4',
                                            'mode': convert_mode(resize.mode),
@@ -96,7 +97,7 @@ def replace_resize(graph: Graph, resize: Node):
 
     dst_dtype = np.float32  # even if data_type=FP16 use float32 for shape values
 
-    if num_of_inputs == 3:
+    if not resize.is_in_port_connected(3):
         cast_shape_to_float = Cast(graph, {'dst_type': dst_dtype}).create_node()
         mul_node = Mul(graph, {'name': resize_name + '/Mul'}).create_node()
         shape_of.out_port(0).connect(cast_shape_to_float.in_port(0))
diff --git a/model-optimizer/extensions/ops/ONNXResize11.py b/model-optimizer/extensions/ops/ONNXResize11.py
index 5ef8d7f6ac8..5476087a3d4 100644
--- a/model-optimizer/extensions/ops/ONNXResize11.py
+++ b/model-optimizer/extensions/ops/ONNXResize11.py
@@ -35,14 +35,15 @@ class ONNXResize11Op(Op):
         if input_shape is None:
             return
 
-        num_of_in_nodes = len(node.in_nodes())
-        assert num_of_in_nodes in {3, 4}, \
-            "Node {} with op {} number of inputs must be equal to 3 or 4.".format(node.name, node.op)
+        assert (node.is_in_port_connected(0) and (node.is_in_port_connected(2) or node.is_in_port_connected(3))), \
+            "One of the scales or sizes inputs must be connected to Node {} with op {}.".format(node.soft_get("name", node.id),
+                                                                                                node.op)
 
         assert node.coordinate_transformation_mode != 'tf_crop_and_resize', \
-            'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(node.op, node.name)
+            'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(node.op,
+                                                                                     node.soft_get("name", node.id))
 
-        if num_of_in_nodes == 3:
+        if not node.is_in_port_connected(3):
             # i.e. input 'sizes' is not given
             input2_value = node.in_port(2).data.get_value()
             assert input2_value is not None, \
@@ -53,7 +54,7 @@ class ONNXResize11Op(Op):
             # i.e. input 'sizes' is given
             sizes = node.in_port(3).data.get_value()
             assert sizes is not None, \
-                "Node {} with op {} has no value in input port 3".format(node.name, node.op)
+                "Node {} with op {} has no value in input port 3".format(node.soft_get("name", node.id), node.op)
             output_shape = input_shape.copy()
             spatial_dimension_indices = range(2, len(input_shape))
             output_shape[spatial_dimension_indices] = int64_array(sizes)[2:]
diff --git a/model-optimizer/mo/front/caffe/loader.py b/model-optimizer/mo/front/caffe/loader.py
index 2ffca364fb6..14497c6108d 100644
--- a/model-optimizer/mo/front/caffe/loader.py
+++ b/model-optimizer/mo/front/caffe/loader.py
@@ -130,10 +130,16 @@ def load_caffe_proto_model(caffe_pb2, proto_path: str, model_path: [str, None] =
                 map = mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ)
                 model.MergeFromString(map)
     except Exception as e:
+        third_point = ''
+        if api_implementation._implementation_type == 'python':
+            third_point = '      3. Python protobuf implementation was used. Some models can\'t be converted ' + \
+                          ' in this configuration. Please, use Python version with existing cpp implementation of ' + \
+                          'protobuf library or build it by yourself\n' + refer_to_faq_msg(103)
         log.error('Exception message: {}\n\n'.format(e) +
                   '    Possible reasons:\n' +
                   '      1. {} does not exist\n'.format(model_path) +
-                  '      2. {} does not have a valid structure\n'.format(model_path), extra={'framework_error': True})
+                  '      2. {} does not have a valid structure\n'.format(model_path) + third_point,
+                  extra={'framework_error': True})
         raise FrameworkError('Model Optimizer is not able to parse {}'.format(model_path)) from e
 
     return proto, model
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py b/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
index afb3de7648f..bc256ca4990 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
@@ -87,7 +87,10 @@ def concat_convolutions(graph: Graph, start_node: Node, last_node: Node):
     weights_value = np.array(weights_node.value)
     bias_value = np.array(bias_node.value) if has_biases else None
 
-    feature_dim = 3 if graph.graph['layout'] == 'NHWC' else 0
+    # gconv.get_weights_permute.perm contains permutation indices
+    # where feature dimension is set to zero position, so 0 value
+    # in gconv.get_weights_permute.inv indicates original feature dimension index
+    feature_dim = np.where(gconv.get_weights_permute.inv == 0)[0][0]
 
     for conv in conv_nodes[1:]:
         weights_value = np.concatenate((weights_value, conv.in_node(1).value), axis=feature_dim)
diff --git a/model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py b/model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py
new file mode 100644
index 00000000000..562c1416342
--- /dev/null
+++ b/model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py
@@ -0,0 +1,170 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from extensions.back.ShapeOfConstFolding import ShapeOfConstFolding
+from mo.front.common.partial_infer.eltwise import eltwise_infer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+const_value = np.random.rand(1, 3, 30, 30)
+nodes_attributes = {'input': {'shape': int64_array([1, 3, 30, 30]), 'type': 'Parameter', 'kind': 'op',
+                              'op': 'Parameter'},
+                    'input_data': {'value': None, 'shape': int64_array([1, 3, 30, 30]), 'kind': 'data'},
+                    'const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': const_value},
+                    'const_data': {'kind': 'data', 'value': const_value},
+                    'shapeof_input': {'kind': 'op', 'op': 'ShapeOf', 'value': int64_array([1, 3, 30, 30])},
+                    'shapeof_input_data': {'value': None, 'shape': None, 'kind': 'data',
+                                           'value': int64_array([1, 3, 30, 30])},
+
+                    'shapeof_const': {'kind': 'op', 'op': 'ShapeOf', 'value': int64_array([1, 3, 30, 30])},
+                    'shapeof_const_data': {'value': None, 'shape': None, 'kind': 'data',
+                                           'value': int64_array([1, 3, 30, 30])},
+
+                    'mul': {'kind': 'op', 'op': 'Mul', 'infer': lambda node: eltwise_infer(node, lambda a, b: a * b)},
+                    'mul_data': {'kind': 'data', 'value': np.array([1, 9, 900, 900])},
+                    'last': {'kind': 'op', 'op': 'Result'},
+
+                    # new nodes
+                    'new_const_shapeof': {'type': 'Const', 'kind': 'op', 'op': 'Const',
+                                          'value': int64_array([1, 3, 30, 30])}
+                    }
+
+const_value2 = np.random.rand(30, 30)
+nodes_attributes2 = {'input': {'shape': int64_array([1, 3, 30, 30]), 'type': 'Parameter', 'kind': 'op',
+                               'op': 'Parameter'},
+                     'input_data': {'value': None, 'shape': int64_array([1, 3, 30, 30]), 'kind': 'data'},
+
+                     'const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': const_value2},
+                     'const_data': {'kind': 'data', 'value': const_value2},
+
+                     'shapeof_const': {'kind': 'op', 'op': 'ShapeOf', 'value': int64_array([2700, 30])},
+                     'shapeof_const_data': {'value': int64_array([2700, 30]), 'shape': None, 'kind': 'data'},
+
+                     'gather': {'kind': 'op', 'op': 'Gather', 'batch_dims': 0},
+                     'gather_data': {'kind': 'data'},
+
+                     'const_concat': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': [1]},
+                     'const_concat_data': {'kind': 'data', 'value': [1]},
+                     'concat': {'kind': 'op', 'op': 'Concat'},
+                     'concat_data': {'kind': 'data'},
+
+                     'reshape': {'kind': 'op', 'op': 'Reshape'},
+                     'reshape_data': {'kind': 'data'},
+
+                     'matmul': {'kind': 'op', 'op': 'MatMul'},
+                     'matmul_data': {'kind': 'data'},
+                     'last': {'kind': 'op', 'op': 'Result'},
+
+                     # new nodes
+                     'new_const_shapeof': {'type': 'Const', 'kind': 'op', 'op': 'Const',
+                                          'value': int64_array([2700, 30])},
+                     }
+
+
+class ShapeOfConstFoldingTests(unittest.TestCase):
+    def test_const_with_one_output(self):
+        graph = build_graph(nodes_attributes,
+                            [('input', 'input_data'),
+                             ('input_data', 'shapeof_input'),
+                             ('shapeof_input', 'shapeof_input_data'),
+                             ('shapeof_input_data', 'mul'),
+                             ('const', 'const_data'),
+                             ('const_data', 'shapeof_const'),
+                             ('shapeof_const', 'shapeof_const_data'),
+                             ('shapeof_const_data', 'mul'),
+                             ('mul', 'mul_data'),
+                             ('mul_data', 'last')],
+                            {
+                                'input': {'shape': int64_array([1, 3, 30, 30])},
+                                'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                'shapeof_input': {'value': int64_array([1, 3, 30, 30])},
+                                'shapeof_input_data': {'value': int64_array([1, 3, 30, 30])},
+                                'const': {'value': const_value},
+                                'const_data': {'value': const_value},
+                                'shapeof_const': {'value': int64_array([1, 3, 30, 30])},
+                                'shapeof_const_data': {'value': int64_array([1, 3, 30, 30])},
+                                'mul_data': {'value': int64_array([1, 9, 900, 900])},
+                            },
+                            nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('input', 'input_data'),
+                                 ('input_data', 'shapeof_input'),
+                                 ('shapeof_input', 'shapeof_input_data'),
+                                 ('shapeof_input_data', 'mul'),
+                                 ('new_const_shapeof', 'shapeof_const_data'),
+                                 ('shapeof_const_data', 'mul'),
+                                 ('mul', 'mul_data'),
+                                 ('mul_data', 'last')],
+                                {
+                                    'input': {'shape': int64_array([1, 3, 30, 30])},
+                                    'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                    'shapeof_input': {'value': int64_array([1, 3, 30, 30])},
+                                    'shapeof_input_data': {'value': int64_array([1, 3, 30, 30])},
+                                    'new_const_shapeof': {'value': int64_array([1, 3, 30, 30])},
+                                    'shapeof_const_data': {'value': int64_array([1, 3, 30, 30])},
+                                    'mul_data': {'value': int64_array([1, 9, 900, 900])},
+                                },
+                                nodes_with_edges_only=True)
+        ShapeOfConstFolding().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    def test_const_with_two_outputs(self):
+        graph = build_graph(nodes_attributes2,
+                            [('input', 'input_data'),
+                             ('input_data', 'reshape'),
+                             ('const', 'const_data'),
+                             ('const_data', 'shapeof_const'),
+                             ('shapeof_const', 'shapeof_const_data'),
+                             ('shapeof_const_data', 'gather'),
+                             ('gather', 'gather_data'),
+                             ('const_concat', 'const_concat_data'),
+                             ('const_concat_data', 'concat'),
+                             ('gather_data', 'concat'),
+                             ('concat', 'reshape'),
+                             ('reshape', 'reshape_data'),
+                             ('reshape_data', 'matmul'),
+                             ('const_data', 'matmul'),
+                             ('matmul', 'matmul_data'),
+                             ('matmul_data', 'last')
+                             ],
+                            {
+                                'input': {'shape': int64_array([1, 3, 30, 30])},
+                                'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                'shapeof_const': {'value': int64_array([2700, 30])},
+                                'shapeof_const_data': {'value': int64_array([2700, 30])},
+                            },
+                            nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes2,
+                                [('input', 'input_data'),
+                                 ('input_data', 'reshape'),
+                                 ('new_const_shapeof', 'shapeof_const_data'),
+                                 ('shapeof_const_data', 'gather'),
+                                 ('gather', 'gather_data'),
+                                 ('const_concat', 'const_concat_data'),
+                                 ('const_concat_data', 'concat'),
+                                 ('gather_data', 'concat'),
+                                 ('concat', 'reshape'),
+                                 ('reshape', 'reshape_data'),
+                                 ('reshape_data', 'matmul'),
+                                 ('const', 'const_data'),
+                                 ('const_data', 'matmul'),
+                                 ('matmul', 'matmul_data'),
+                                 ('matmul_data', 'last')],
+                                {
+                                    'input': {'shape': int64_array([1, 3, 30, 30])},
+                                    'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                    'new_const_shapeof': {'value': int64_array([2700, 30])},
+                                    'shapeof_const_data': {'value': int64_array([2700, 30])},
+                                },
+                                nodes_with_edges_only=True)
+        ShapeOfConstFolding().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py b/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py
index b38773b8279..aac234c56ba 100644
--- a/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py
+++ b/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py
@@ -33,10 +33,8 @@ graph_edges_sizes = [
     ('input', 'input_data'),
     ('roi', 'roi_data'),
     ('sizes', 'sizes_data'),
-    ('scales', 'scales_data'),
     ('input_data', 'onnx_resize11', {'in': 0}),
     ('roi_data', 'onnx_resize11', {'in': 1}),
-    ('scales_data', 'onnx_resize11', {'in': 2}),
     ('sizes_data', 'onnx_resize11', {'in': 3}),
     ('onnx_resize11', 'onnx_resize11_data'),
     ('onnx_resize11_data', 'op_output'),
@@ -125,3 +123,69 @@ class TestONNXResize11Op(unittest.TestCase):
 
         self.assertTrue(np.array_equal(graph.node['onnx_resize11_data']['shape'], int64_array(output_shape)),
                         msg.format(scales, output_shape, graph.node['onnx_resize11_data']['shape']))
+
+    @generate(*[([1, 260, 100, 150], [1, 260, 200, 350], [1, 260, 200, 350], [1.0, 1.0, 1.0, 1.0]),
+                ([1, 260, 100, 150], [1, 260, 200, 350], [1, 1, 200, 350], [1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 300, 40], [5, 14, 140, 280], [1, 1, 140, 280], [1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 300, 40], [5, 14, 140, 280], [5, 14, 140, 280], [1.0, 1.0, 1.0, 1.0]),
+                ([1, 3, 260, 100, 150], [1, 3, 780, 200, 350], [1, 3, 780, 200, 350], [1.0, 1.0, 1.0, 1.0, 1.0]),
+                ([1, 3, 450, 100, 150], [1, 3, 260, 200, 350], [1, 3, 260, 200, 350], [1.0, 1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 1000, 300, 40], [5, 14, 500, 140, 280], [1, 1, 500, 140, 280], [1.0, 1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 1000, 300, 40], [5, 14, 500, 140, 280], [5, 14, 500, 140, 280], [1.0, 1.0, 1.0, 1.0, 1.0])])
+    def test_onnx_resize11_using_sizes_without_roi_input(self, input_shape, output_shape, sizes, scales):
+        np_scales = np.array(scales)
+        np_sizes = int64_array(sizes)
+        graph = build_graph(nodes_attrs=graph_node_attrs_sizes,
+                            edges=[('input', 'input_data'),
+                                   ('sizes', 'sizes_data'),
+                                   ('input_data', 'onnx_resize11', {'in': 0}),
+                                   ('sizes_data', 'onnx_resize11', {'in': 3}),
+                                   ('onnx_resize11', 'onnx_resize11_data'),
+                                   ('onnx_resize11_data', 'op_output'),
+                                ],
+                            update_attributes={
+                                'input_data': {'shape': int64_array(input_shape)},
+                                'scales': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                                'scales_data': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                                'sizes': {'shape': int64_array(np_sizes.shape), 'value': np_sizes},
+                                'sizes_data': {'shape': int64_array(np_sizes.shape), 'value': np_sizes},
+                            })
+        node = Node(graph, 'onnx_resize11')
+        ONNXResize11Op.onnx_resize_infer(node)
+
+        msg = "ONNXResize11 infer failed for case: sizes={}, scales={}, expected_shape={}, actual_shape={}"
+
+        self.assertTrue(np.array_equal(graph.node['onnx_resize11_data']['shape'], int64_array(output_shape)),
+                        msg.format(sizes, scales, output_shape, graph.node['onnx_resize11_data']['shape']))
+
+    @generate(*[([1, 260, 100, 150], [1, 260, 200, 350], [1.0, 1.0, 2.0, 350 / 150]),
+                ([1, 3, 100, 200], [1, 3, 350, 150], [1.0, 1.0, 3.5, 150 / 200]),
+                ([5, 14, 300, 40], [5, 14, 140, 280], [1.0, 1.0, 140 / 300, 7.0]),
+                ([5, 14, 300, 40], [5, 14, 140, 560], [1.0, 1.0, 140 / 300, 14.0]),
+                ([1, 3, 260, 100, 150], [1, 3, 780, 200, 350], [1.0, 1.0, 3.0, 2.0, 350 / 150]),
+                ([1, 3, 450, 100, 150], [1, 3, 260, 200, 350], [1.0, 1.0, 260 / 450, 2.0, 350 / 150]),
+                ([5, 14, 1000, 300, 40], [5, 14, 500, 140, 280], [1.0, 1.0, 0.5, 140 / 300, 7.0]),
+                ([4, 3, 180, 1340], [4, 3, 60, 804], [1.0, 1.0, 0.33333334, 0.6]),
+                ([4, 3, 500, 180, 1340], [4, 3, 750, 60, 804], [1.0, 1.0, 1.5, 0.33333334, 0.6])])
+    def test_onnx_resize_using_scales_without_roi(self, input_shape, output_shape, scales):
+        np_scales = np.array(scales)
+        graph = build_graph(nodes_attrs=graph_node_attrs_scales,
+                            edges=[('input', 'input_data'),
+                                   ('scales', 'scales_data'),
+                                   ('input_data', 'onnx_resize11', {'in': 0}),
+                                   ('scales_data', 'onnx_resize11', {'in': 2}),
+                                   ('onnx_resize11', 'onnx_resize11_data'),
+                                   ('onnx_resize11_data', 'op_output'),
+                            ],
+                            update_attributes={
+                                'input_data': {'shape': int64_array(input_shape)},
+                                'scales': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                                'scales_data': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                            })
+        node = Node(graph, 'onnx_resize11')
+        ONNXResize11Op.onnx_resize_infer(node)
+
+        msg = "ONNXResize11 infer failed for case: scales={}, expected_shape={}, actual_shape={}"
+
+        self.assertTrue(np.array_equal(graph.node['onnx_resize11_data']['shape'], int64_array(output_shape)),
+                        msg.format(scales, output_shape, graph.node['onnx_resize11_data']['shape']))
diff --git a/model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py b/model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py
new file mode 100644
index 00000000000..8e8bc61077a
--- /dev/null
+++ b/model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py
@@ -0,0 +1,106 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.middle.passes.fusing.fuse_grouped_conv import grouped_convolutions_fusing
+from mo.ops.op import PermuteAttrs
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph, result, connect, regular_op_with_shaped_data, regular_op, shaped_data, \
+    valued_const_with_data, shaped_const_with_data, valued_data
+
+nodes = {
+    **regular_op_with_shaped_data('placeholder1', [1, 16, 10, 10], {'type': 'Parameter'}),
+
+    **valued_const_with_data('split_1_axis', int64_array(1), {'type': 'Const'}),
+    **regular_op('split_1', {'type': 'Split', 'can_be_fused': True}),
+    **shaped_data('split_1_data1', [1, 4, 10, 10]),
+    **shaped_data('split_1_data2', [1, 4, 10, 10]),
+    **shaped_data('split_1_data3', [1, 4, 10, 10]),
+    **shaped_data('split_1_data4', [1, 4, 10, 10]),
+
+    **shaped_const_with_data('split_2_in_const_weights', int64_array([3, 3, 4, 16]), {'type': 'Const'}),
+    **regular_op('split_2', {'type': 'Split'}),
+    **valued_data('split_2_data1', np.zeros([3, 3, 4, 4])),
+    **valued_data('split_2_data2', np.zeros([3, 3, 4, 4])),
+    **valued_data('split_2_data3', np.zeros([3, 3, 4, 4])),
+    **valued_data('split_2_data4', np.zeros([3, 3, 4, 4])),
+
+    **regular_op_with_shaped_data('conv2d_1', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'channel_dims': np.array([1]), 'pad': np.array([2, 2]),
+                                   'stride': np.array([2, 2]),
+                                   'get_weights_permute': PermuteAttrs.Permutation(perm=int64_array([3, 2, 0, 1]),
+                                                                                   inv=int64_array([2, 3, 1, 0])),
+                                   'group': 1, 'output': 4, 'output_shape': [1, 4, 8, 8], 'can_be_fused': True}),
+    **regular_op_with_shaped_data('conv2d_2', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'pad': np.array([2, 2]), 'stride': np.array([2, 2]),
+                                   'can_be_fused': True}),
+    **regular_op_with_shaped_data('conv2d_3', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'pad': np.array([2, 2]), 'stride': np.array([2, 2]),
+                                   'can_be_fused': True}),
+    **regular_op_with_shaped_data('conv2d_4', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'pad': np.array([2, 2]), 'stride': np.array([2, 2]),
+                                   'can_be_fused': True}),
+
+    **regular_op_with_shaped_data('concat', [1, 16, 8, 8], {'type': 'Concat', 'axis': np.array(1)}),
+
+    **regular_op_with_shaped_data('fused_group_conv', [1, 16, 8, 8],
+                                  {'type': 'Convolution', 'channel_dims': np.array([1]), 'pad': np.array([2, 2]),
+                                   'stride': np.array([2, 2]),
+                                   'get_weights_permute': PermuteAttrs.Permutation(perm=int64_array([3, 2, 0, 1]),
+                                                                                   inv=int64_array([2, 3, 1, 0])),
+                                   'group': 1, 'output': 4, 'output_shape': [1, 4, 8, 8], 'can_be_fused': True}),
+    **shaped_const_with_data('new_weights_const', int64_array([3, 3, 4, 16]), {'type': 'Const'}),
+
+    **result('result')
+}
+
+
+class FuseGroupedConvTest(unittest.TestCase):
+    def test_fuse_grouped_conv(self):
+        graph = build_graph(nodes, [*connect('placeholder1', '0:split_1'), *connect('split_1_axis', '1:split_1'),
+                                    ('split_1', 'split_1_data1', {'out': 0}),
+                                    ('split_1', 'split_1_data2', {'out': 1}),
+                                    ('split_1', 'split_1_data3', {'out': 2}),
+                                    ('split_1', 'split_1_data4', {'out': 3}),
+
+                                    *connect('split_2_in_const_weights', 'split_2'),
+                                    ('split_2', 'split_2_data1', {'out': 0}),
+                                    ('split_2', 'split_2_data2', {'out': 1}),
+                                    ('split_2', 'split_2_data3', {'out': 2}),
+                                    ('split_2', 'split_2_data4', {'out': 3}),
+
+                                    ('split_1_data1', 'conv2d_1', {'in': 0}),
+                                    ('split_1_data2', 'conv2d_2', {'in': 0}),
+                                    ('split_1_data3', 'conv2d_3', {'in': 0}),
+                                    ('split_1_data4', 'conv2d_4', {'in': 0}),
+
+                                    ('split_2_data1', 'conv2d_1', {'in': 1}),
+                                    ('split_2_data2', 'conv2d_2', {'in': 1}),
+                                    ('split_2_data3', 'conv2d_3', {'in': 1}),
+                                    ('split_2_data4', 'conv2d_4', {'in': 1}),
+
+                                    *connect('conv2d_1', '0:concat'),
+                                    *connect('conv2d_2', '1:concat'),
+                                    *connect('conv2d_3', '2:concat'),
+                                    *connect('conv2d_4', '3:concat'),
+
+                                    *connect('concat', 'result')])
+
+        graph_ref = build_graph(nodes, [*connect('placeholder1', '0:fused_group_conv'),
+                                        *connect('new_weights_const', '1:fused_group_conv'),
+                                        *connect('fused_group_conv', 'result')])
+
+        graph.graph['layout'] = 'NCHW'
+        grouped_convolutions_fusing(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'result')
+        self.assertTrue(flag, resp)
+
+        group_conv_node = Node(graph, 'conv2d_1')
+        group_conv_weights_shape = group_conv_node.in_node(1).shape
+        self.assertTrue((group_conv_weights_shape == int64_array([3, 3, 4, 16])).all())
diff --git a/ngraph/core/CMakeLists.txt b/ngraph/core/CMakeLists.txt
index fa3a91c34f8..72272046b01 100644
--- a/ngraph/core/CMakeLists.txt
+++ b/ngraph/core/CMakeLists.txt
@@ -105,6 +105,7 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
     FILES_MATCHING
         PATTERN "*.hpp"
         PATTERN "*.h"
+        PATTERN "*version.in.hpp" EXCLUDE
 )
 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/ngraph/version.hpp
     DESTINATION ${NGRAPH_INSTALL_INCLUDE}/ngraph
diff --git a/ngraph/core/include/ngraph/op/log.hpp b/ngraph/core/include/ngraph/op/log.hpp
index 6c147d43314..997732b6116 100644
--- a/ngraph/core/include/ngraph/op/log.hpp
+++ b/ngraph/core/include/ngraph/op/log.hpp
@@ -16,8 +16,7 @@ namespace ngraph
             class NGRAPH_API Log : public util::UnaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Log", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
                 /// \brief Constructs a natural log operation.
                 Log() = default;
                 /// \brief Constructs a natural log operation.
diff --git a/ngraph/core/include/ngraph/op/util/op_types.hpp b/ngraph/core/include/ngraph/op/util/op_types.hpp
index b672f5518c4..6d162157ab5 100644
--- a/ngraph/core/include/ngraph/op/util/op_types.hpp
+++ b/ngraph/core/include/ngraph/op/util/op_types.hpp
@@ -34,6 +34,8 @@ namespace ngraph
         NGRAPH_API
         bool is_output(const ngraph::Node* node);
         NGRAPH_API
+        bool is_sink(const ngraph::Node* node);
+        NGRAPH_API
         bool is_constant(const ngraph::Node* node);
         NGRAPH_API
         bool is_commutative(const ngraph::Node* node);
@@ -60,6 +62,8 @@ namespace ngraph
         NGRAPH_API
         bool is_output(const std::shared_ptr<ngraph::Node>& node);
         NGRAPH_API
+        bool is_sink(const std::shared_ptr<ngraph::Node>& node);
+        NGRAPH_API
         bool is_constant(const std::shared_ptr<ngraph::Node>& node);
         NGRAPH_API
         bool is_commutative(const std::shared_ptr<ngraph::Node>& node);
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp
index 34a420399ac..d546bbd79af 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp
@@ -24,12 +24,21 @@ namespace ngraph
                        const Shape& arg_shape,
                        const Shape& slope_shape)
             {
-                int cnt = 0;
-                for (size_t i = 0; i < shape_size(arg_shape); ++i)
+                Shape slope_shape_tmp = slope_shape;
+                const auto channel_dim_idx = arg_shape.size() > 1 ? 1 : 0;
+                if (slope_shape.size() == 1 && arg_shape[channel_dim_idx] == slope_shape[0])
                 {
-                    out[i] =
-                        arg[i] < T(0) ? T(arg[i] * slope[cnt++ % shape_size(slope_shape)]) : arg[i];
+                    Shape channel_slope_shape(arg_shape.size(), 1);
+                    channel_slope_shape[channel_dim_idx] = slope_shape[0];
+                    std::swap(slope_shape_tmp, channel_slope_shape);
                 }
+                autobroadcast_binop(arg,
+                                    slope,
+                                    out,
+                                    arg_shape,
+                                    slope_shape_tmp,
+                                    ngraph::op::AutoBroadcastType::NUMPY,
+                                    [](T x, T y) -> T { return x < T(0) ? T(x * y) : x; });
             }
         } // namespace reference
     }     // namespace runtime
diff --git a/ngraph/core/src/op/log.cpp b/ngraph/core/src/op/log.cpp
index 49a41c0d931..7743dbed6bd 100644
--- a/ngraph/core/src/op/log.cpp
+++ b/ngraph/core/src/op/log.cpp
@@ -13,7 +13,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::Log::type_info;
+NGRAPH_RTTI_DEFINITION(op::v0::Log, "Log", 0);
 
 op::Log::Log(const Output<Node>& arg)
     : UnaryElementwiseArithmetic(arg)
diff --git a/ngraph/core/src/op/reduce_sum.cpp b/ngraph/core/src/op/reduce_sum.cpp
index 1eab2dcefb8..70c2b39ddc0 100644
--- a/ngraph/core/src/op/reduce_sum.cpp
+++ b/ngraph/core/src/op/reduce_sum.cpp
@@ -6,9 +6,11 @@
 #include "itt.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/op/broadcast.hpp"
+#include "ngraph/op/util/op_types.hpp"
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/runtime/reference/sum.hpp"
 #include "ngraph/shape_util.hpp"
+#include "util/evaluate_helpers.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -73,7 +75,15 @@ bool op::v1::ReduceSum::evaluate(const HostTensorVector& outputs,
                                  const HostTensorVector& inputs) const
 {
     NGRAPH_OP_SCOPE(v1_ReduceSum_evaluate);
-    return reduce_sum::evaluate_sum(inputs[0], outputs[0], get_reduction_axes(), get_keep_dims());
+
+    NGRAPH_CHECK(inputs.size() == 2,
+                 "The ReduceSum operation expects 2 input tensors. Got: ",
+                 inputs.size());
+
+    const auto reduction_axes = get_normalized_axes_from_tensor(
+        inputs[1], get_input_partial_shape(0).rank(), get_friendly_name());
+
+    return reduce_sum::evaluate_sum(inputs[0], outputs[0], reduction_axes, get_keep_dims());
 }
 
 bool op::v1::ReduceSum::has_evaluate() const
diff --git a/ngraph/core/src/op/util/evaluate_helpers.hpp b/ngraph/core/src/op/util/evaluate_helpers.hpp
new file mode 100644
index 00000000000..d67a212f157
--- /dev/null
+++ b/ngraph/core/src/op/util/evaluate_helpers.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/axis_set.hpp"
+#include "ngraph/descriptor/tensor.hpp"
+#include "ngraph/util.hpp"
+#include "ngraph/validation_util.hpp"
+
+namespace ngraph
+{
+    /// \brief Extracts the tensor data and returns a set of normalized axes created out of it.
+    ///
+    /// \param tensor A pointer to a HostTensor object containing the raw axes data
+    /// \param rank Rank of an operator's input data tensor (used to normalize the axes)
+    /// \param node_description An identifier of the operator's node (used to report errors)
+    ///
+    /// \return Normalized (positive only) axes as an AxisSet object.
+    AxisSet get_normalized_axes_from_tensor(const HostTensorPtr tensor,
+                                            const ngraph::Rank& rank,
+                                            const std::string& node_description)
+    {
+        const auto axes_vector = host_tensor_2_vector<int64_t>(tensor);
+        const auto normalized_axes = ngraph::normalize_axes(node_description, axes_vector, rank);
+        return AxisSet{normalized_axes};
+    }
+} // namespace ngraph
diff --git a/ngraph/core/src/op/util/op_types.cpp b/ngraph/core/src/op/util/op_types.cpp
index f0852233ec7..354c605ced1 100644
--- a/ngraph/core/src/op/util/op_types.cpp
+++ b/ngraph/core/src/op/util/op_types.cpp
@@ -76,6 +76,11 @@ bool ngraph::op::is_output(const ngraph::Node* node)
     return dynamic_cast<const ngraph::op::Result*>(node) != nullptr;
 }
 
+bool ngraph::op::is_sink(const ngraph::Node* node)
+{
+    return dynamic_cast<const ngraph::op::Sink*>(node) != nullptr;
+}
+
 bool ngraph::op::is_constant(const ngraph::Node* node)
 {
     return dynamic_cast<const ngraph::op::Constant*>(node) != nullptr;
@@ -134,6 +139,10 @@ bool ngraph::op::is_output(const std::shared_ptr<ngraph::Node>& node)
 {
     return is_output(node.get());
 }
+bool ngraph::op::is_sink(const std::shared_ptr<ngraph::Node>& node)
+{
+    return is_sink(node.get());
+}
 bool ngraph::op::is_constant(const std::shared_ptr<ngraph::Node>& node)
 {
     return is_constant(node.get());
diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_exceptions.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_exceptions.hpp
new file mode 100644
index 00000000000..46fa2be0888
--- /dev/null
+++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_exceptions.hpp
@@ -0,0 +1,123 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "frontend_manager_defs.hpp"
+#include "ngraph/check.hpp"
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        class FRONTEND_API GeneralFailure : public CheckFailure
+        {
+        public:
+            GeneralFailure(const CheckLocInfo& check_loc_info,
+                           const std::string& context,
+                           const std::string& explanation)
+                : CheckFailure(check_loc_info,
+                               "FrontEnd API failed with GeneralFailure: " + context,
+                               explanation)
+            {
+            }
+        };
+
+        class FRONTEND_API InitializationFailure : public CheckFailure
+        {
+        public:
+            InitializationFailure(const CheckLocInfo& check_loc_info,
+                                  const std::string& context,
+                                  const std::string& explanation)
+                : CheckFailure(check_loc_info,
+                               "FrontEnd API failed with InitializationFailure: " + context,
+                               explanation)
+            {
+            }
+        };
+
+        class FRONTEND_API OpValidationFailure : public CheckFailure
+        {
+        public:
+            OpValidationFailure(const CheckLocInfo& check_loc_info,
+                                const std::string& context,
+                                const std::string& explanation)
+                : CheckFailure(check_loc_info,
+                               "FrontEnd API failed with OpValidationFailure: " + context,
+                               explanation)
+            {
+            }
+        };
+
+        class FRONTEND_API OpConversionFailure : public CheckFailure
+        {
+        public:
+            OpConversionFailure(const CheckLocInfo& check_loc_info,
+                                const std::string& context,
+                                const std::string& explanation)
+                : CheckFailure(check_loc_info,
+                               "FrontEnd API failed with OpConversionFailure: " + context,
+                               explanation)
+            {
+            }
+        };
+
+        class FRONTEND_API NotImplementedFailure : public CheckFailure
+        {
+        public:
+            NotImplementedFailure(const CheckLocInfo& check_loc_info,
+                                  const std::string& context,
+                                  const std::string& explanation)
+                : CheckFailure(check_loc_info,
+                               "FrontEnd API failed with NotImplementedFailure: " + context,
+                               explanation)
+            {
+            }
+        };
+
+/// \brief Macro to check whether a boolean condition holds.
+/// \param cond Condition to check
+/// \param ... Additional error message info to be added to the error message via the `<<`
+///            stream-insertion operator. Note that the expressions here will be evaluated lazily,
+///            i.e., only if the `cond` evalutes to `false`.
+/// \throws ::ngraph::frontend::GeneralFailure if `cond` is false.
+#define FRONT_END_GENERAL_CHECK(...)                                                               \
+    NGRAPH_CHECK_HELPER(::ngraph::frontend::GeneralFailure, "", __VA_ARGS__)
+
+/// \brief Macro to check whether a boolean condition holds.
+/// \param cond Condition to check
+/// \param ... Additional error message info to be added to the error message via the `<<`
+///            stream-insertion operator. Note that the expressions here will be evaluated lazily,
+///            i.e., only if the `cond` evalutes to `false`.
+/// \throws ::ngraph::frontend::InitializationFailure if `cond` is false.
+#define FRONT_END_INITIALIZATION_CHECK(...)                                                        \
+    NGRAPH_CHECK_HELPER(::ngraph::frontend::InitializationFailure, "", __VA_ARGS__)
+
+/// \brief Macro to check whether a boolean condition holds.
+/// \param cond Condition to check
+/// \param ... Additional error message info to be added to the error message via the `<<`
+///            stream-insertion operator. Note that the expressions here will be evaluated lazily,
+///            i.e., only if the `cond` evalutes to `false`.
+/// \throws ::ngraph::frontend::OpConversionFailure if `cond` is false.
+#define FRONT_END_OP_CONVERSION_CHECK(...)                                                         \
+    NGRAPH_CHECK_HELPER(::ngraph::frontend::OpConversionFailure, "", __VA_ARGS__)
+
+/// \brief Assert macro.
+/// \param NAME Name of the function that is not implemented
+/// \throws ::ngraph::frontend::NotImplementedFailure
+#define FRONT_END_NOT_IMPLEMENTED(NAME)                                                            \
+    NGRAPH_CHECK_HELPER(::ngraph::frontend::NotImplementedFailure,                                 \
+                        "",                                                                        \
+                        false,                                                                     \
+                        #NAME " is not implemented for this FrontEnd class")
+
+/// \brief Assert macro.
+/// \param MSG Error message
+/// \throws ::ngraph::frontend::GeneralFailure
+#define FRONT_END_THROW(MSG) FRONT_END_GENERAL_CHECK(false, MSG)
+
+    } // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/frontend_manager/src/frontend_manager.cpp b/ngraph/frontend/frontend_manager/src/frontend_manager.cpp
index c1a864ffd6e..037a2522523 100644
--- a/ngraph/frontend/frontend_manager/src/frontend_manager.cpp
+++ b/ngraph/frontend/frontend_manager/src/frontend_manager.cpp
@@ -5,20 +5,13 @@
 #include <ngraph/env_util.hpp>
 #include <ngraph/except.hpp>
 
+#include "frontend_manager/frontend_exceptions.hpp"
 #include "frontend_manager/frontend_manager.hpp"
 #include "plugin_loader.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
-#define FRONT_END_NOT_IMPLEMENTED(NAME)                                                            \
-    throw std::runtime_error(#NAME " is not implemented for this FrontEnd class")
-#define FRONT_END_ASSERT(EXPRESSION)                                                               \
-    {                                                                                              \
-        if (!(EXPRESSION))                                                                         \
-            throw "AssertionFailed";                                                               \
-    }
-
 //----------- FrontEndManager ---------------------------
 class FrontEndManager::Impl
 {
@@ -32,7 +25,8 @@ public:
 
     FrontEnd::Ptr loadByFramework(const std::string& framework, FrontEndCapFlags fec)
     {
-        FRONT_END_ASSERT(m_factories.count(framework))
+        FRONT_END_INITIALIZATION_CHECK(
+            m_factories.count(framework), "FrontEnd for Framework ", framework, " is not found");
         return m_factories[framework](fec);
     }
 
diff --git a/ngraph/frontend/onnx_import/CMakeLists.txt b/ngraph/frontend/onnx_import/CMakeLists.txt
index 9f728a3f7e8..f3ef7b10fe7 100644
--- a/ngraph/frontend/onnx_import/CMakeLists.txt
+++ b/ngraph/frontend/onnx_import/CMakeLists.txt
@@ -11,14 +11,10 @@ file(GLOB_RECURSE LIBRARY_PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h
 # Remove disabled ops
 list(REMOVE_ITEM LIBRARY_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/src/op/conv_integer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/op/matmul_integer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/op/qlinear_matmul.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/op/quant_conv.cpp
     )
 list(REMOVE_ITEM LIBRARY_HEADERS
     ${CMAKE_CURRENT_SOURCE_DIR}/src/op/conv_integer.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/op/matmul_integer.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/src/op/qlinear_matmul.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/src/op/quant_conv.hpp
     )
 
diff --git a/ngraph/frontend/onnx_import/src/op/matmul_integer.cpp b/ngraph/frontend/onnx_import/src/op/matmul_integer.cpp
deleted file mode 100644
index 231256950f9..00000000000
--- a/ngraph/frontend/onnx_import/src/op/matmul_integer.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "op/matmul_integer.hpp"
-#include "ngraph/builder/matmul_factory.hpp"
-#include "ngraph/log.hpp"
-
-namespace ngraph
-{
-    namespace onnx_import
-    {
-        namespace op
-        {
-            namespace set_1
-            {
-                OutputVector matmul_integer(const Node& node)
-                {
-                    auto ng_inputs = node.get_ng_inputs();
-                    auto factory = builder::MatmulIntegerFactory(
-                        OutputVector(std::begin(ng_inputs), std::end(ng_inputs)));
-                    std::size_t left_rank{ng_inputs.at(0).get_shape().size()};
-                    std::size_t right_rank{ng_inputs.at(1).get_shape().size()};
-
-                    if (left_rank == 0 || right_rank == 0)
-                    {
-                        NGRAPH_WARN
-                            << (node) << " "
-                            << "ONNX standard doesn't allow scalar operands, however nGraph "
-                               "accepts them. Consider use of element-wise multiplication instead "
-                               "to conform with ONNX standard.";
-                    }
-                    return factory.make_matmul_op();
-                }
-            } // namespace set_1
-
-        } // namespace op
-
-    } // namespace onnx_import
-
-} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/matmul_integer.hpp b/ngraph/frontend/onnx_import/src/op/matmul_integer.hpp
deleted file mode 100644
index 8e40e3b608b..00000000000
--- a/ngraph/frontend/onnx_import/src/op/matmul_integer.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "ngraph/node.hpp"
-#include "onnx_import/core/node.hpp"
-
-namespace ngraph
-{
-    namespace onnx_import
-    {
-        namespace op
-        {
-            namespace set_1
-            {
-                /// \brief Performs ONNX MatMulInteger operation.
-                ///
-                /// \param node The ONNX node object representing this operation.
-                ///
-                /// \return The vector containing Ngraph nodes producing output of quantized ONNX
-                ///         matrix multiplication operation.
-                OutputVector matmul_integer(const Node& node);
-
-            } // namespace set_1
-
-        } // namespace op
-
-    } // namespace onnx_import
-
-} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/qlinear_matmul.cpp b/ngraph/frontend/onnx_import/src/op/qlinear_matmul.cpp
deleted file mode 100644
index 1baafa827ca..00000000000
--- a/ngraph/frontend/onnx_import/src/op/qlinear_matmul.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "op/qlinear_matmul.hpp"
-#include "ngraph/builder/matmul_factory.hpp"
-#include "ngraph/log.hpp"
-
-namespace ngraph
-{
-    namespace onnx_import
-    {
-        namespace op
-        {
-            namespace set_1
-            {
-                OutputVector qlinear_matmul(const Node& node)
-                {
-                    auto ng_inputs = node.get_ng_inputs();
-                    auto factory = builder::QLinearMatmulFactory(
-                        (OutputVector(std::begin(ng_inputs), std::end(ng_inputs))));
-                    std::size_t left_rank{ng_inputs.at(0).get_shape().size()};
-                    std::size_t right_rank{ng_inputs.at(1).get_shape().size()};
-
-                    if (left_rank == 0 || right_rank == 0)
-                    {
-                        NGRAPH_WARN
-                            << (node) << " "
-                            << "ONNX standard doesn't allow scalar operands, however nGraph "
-                               "accepts them. Consider use of element-wise multiplication instead "
-                               "to conform with ONNX standard.";
-                    }
-                    return factory.make_matmul_op();
-                }
-            } // namespace set_1
-
-        } // namespace op
-
-    } // namespace onnx_import
-
-} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/qlinear_matmul.hpp b/ngraph/frontend/onnx_import/src/op/qlinear_matmul.hpp
deleted file mode 100644
index 8e38f336836..00000000000
--- a/ngraph/frontend/onnx_import/src/op/qlinear_matmul.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "ngraph/node.hpp"
-#include "onnx_import/core/node.hpp"
-
-namespace ngraph
-{
-    namespace onnx_import
-    {
-        namespace op
-        {
-            namespace set_1
-            {
-                OutputVector qlinear_matmul(const Node& node);
-            } // namespace set_1
-
-        } // namespace op
-
-    } // namespace onnx_import
-
-} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/ops_bridge.cpp b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
index b807a31212d..f7864360ba7 100644
--- a/ngraph/frontend/onnx_import/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
@@ -73,7 +73,6 @@
 #include "op/lrn.hpp"
 #include "op/lstm.hpp"
 #include "op/matmul.hpp"
-//#include "op/matmul_integer.hpp"
 #include "op/max.hpp"
 #include "op/max_pool.hpp"
 #include "op/mean.hpp"
@@ -90,7 +89,6 @@
 #include "op/pad.hpp"
 #include "op/pow.hpp"
 #include "op/prelu.hpp"
-//#include "op/qlinear_matmul.hpp"
 // #include "op/quant_conv.hpp"
 #include "op/quantize_linear.hpp"
 #include "op/range.hpp"
@@ -376,7 +374,6 @@ namespace ngraph
             REGISTER_OPERATOR("LRN", 1, lrn);
             REGISTER_OPERATOR("LSTM", 1, lstm);
             REGISTER_OPERATOR("MatMul", 1, matmul);
-            // REGISTER_OPERATOR("MatMulInteger", 1, matmul_integer);
             REGISTER_OPERATOR("MaxPool", 1, max_pool);
             REGISTER_OPERATOR("Max", 1, max);
             REGISTER_OPERATOR("Max", 8, max);
@@ -399,7 +396,6 @@ namespace ngraph
             REGISTER_OPERATOR("Pow", 1, pow);
             REGISTER_OPERATOR("PRelu", 1, prelu);
             // REGISTER_OPERATOR("QLinearConv", 1, quant_conv);
-            // REGISTER_OPERATOR("QLinearMatMul", 1, qlinear_matmul);
             REGISTER_OPERATOR("QuantizeLinear", 1, quantize_linear);
             REGISTER_OPERATOR("QuantizeLinear", 13, quantize_linear);
             REGISTER_OPERATOR("Range", 1, range);
diff --git a/ngraph/frontend/onnx_import/src/utils/convpool.cpp b/ngraph/frontend/onnx_import/src/utils/convpool.cpp
index 64644c5e11e..52924d78116 100644
--- a/ngraph/frontend/onnx_import/src/utils/convpool.cpp
+++ b/ngraph/frontend/onnx_import/src/utils/convpool.cpp
@@ -130,6 +130,11 @@ namespace ngraph
                     auto pads_int64 = node.get_attribute_value<std::vector<int64_t>>("pads");
                     pads = CoordinateDiff{std::begin(pads_int64), std::end(pads_int64)};
                 }
+                else if (node.has_attribute("paddings"))
+                {
+                    auto pads_int64 = node.get_attribute_value<std::vector<int64_t>>("paddings");
+                    pads = CoordinateDiff{std::begin(pads_int64), std::end(pads_int64)};
+                }
 
                 if (pads.size() == kernel_rank * 2)
                 {
diff --git a/ngraph/python/src/pyngraph/function.cpp b/ngraph/python/src/pyngraph/function.cpp
index 6b60372a85a..ed84d9c458c 100644
--- a/ngraph/python/src/pyngraph/function.cpp
+++ b/ngraph/python/src/pyngraph/function.cpp
@@ -7,6 +7,7 @@
 
 #include "ngraph/function.hpp"     // ngraph::Function
 #include "ngraph/op/parameter.hpp" // ngraph::op::Parameter
+#include "ngraph/op/sink.hpp"
 #include "pyngraph/function.hpp"
 
 namespace py = pybind11;
@@ -17,6 +18,42 @@ void regclass_pyngraph_Function(py::module m)
 {
     py::class_<ngraph::Function, std::shared_ptr<ngraph::Function>> function(m, "Function");
     function.doc() = "ngraph.impl.Function wraps ngraph::Function";
+
+    function.def(py::init([](const ngraph::ResultVector& res,
+                             const std::vector<std::shared_ptr<ngraph::Node>>& nodes,
+                             const ngraph::ParameterVector& params,
+                             const std::string& name) {
+                     ngraph::SinkVector sinks;
+                     for (const auto& node : nodes)
+                     {
+                         auto sink = std::dynamic_pointer_cast<ngraph::op::Sink>(node);
+                         NGRAPH_CHECK(sink != nullptr, "Node {} is not instance of Sink");
+                         sinks.push_back(sink);
+                     }
+                     return std::make_shared<ngraph::Function>(res, sinks, params, name);
+                 }),
+                 py::arg("results"),
+                 py::arg("sinks"),
+                 py::arg("parameters"),
+                 py::arg("name"),
+                 R"(
+                    Create user-defined Function which is a representation of a model.
+
+                    Parameters
+                    ----------
+                    results : List[op.Result]
+                        List of results.
+
+                    sinks : List[Node]
+                        List of Nodes to be used as Sinks (e.g. Assign ops).
+
+                    parameters : List[op.Parameter]
+                        List of parameters.
+
+                    name : str
+                        String to set as function's friendly name.
+                 )");
+
     function.def(py::init<const std::vector<std::shared_ptr<ngraph::Node>>&,
                           const std::vector<std::shared_ptr<ngraph::op::Parameter>>&,
                           const std::string&>(),
@@ -37,6 +74,7 @@ void regclass_pyngraph_Function(py::module m)
                     name : str
                         String to set as function's friendly name.
                  )");
+
     function.def(py::init<const std::shared_ptr<ngraph::Node>&,
                           const std::vector<std::shared_ptr<ngraph::op::Parameter>>&,
                           const std::string&>(),
diff --git a/ngraph/python/src/pyngraph/util.cpp b/ngraph/python/src/pyngraph/util.cpp
index 5178e84fe90..69c0df89dcd 100644
--- a/ngraph/python/src/pyngraph/util.cpp
+++ b/ngraph/python/src/pyngraph/util.cpp
@@ -4,8 +4,8 @@
 
 #include <pybind11/numpy.h>
 
-#include "pyngraph/util.hpp"
 #include "ngraph/validation_util.hpp"
+#include "pyngraph/util.hpp"
 
 namespace py = pybind11;
 
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 930be465a63..817d4584534 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -47,12 +47,10 @@ xfail_issue_33651 = xfail_test(reason="RuntimeError: nGraph does not support the
 xfail_issue_33581 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
                                       "GatherElements")
 xfail_issue_33633 = xfail_test(reason="MaxPool: dilations unsupported")
-xfail_issue_35911 = xfail_test(reason="Assertion error: Pad model mismatch error")
 xfail_issue_35923 = xfail_test(reason="RuntimeError: PReLU without weights is not supported")
 xfail_issue_35927 = xfail_test(reason="RuntimeError: B has zero dimension that is not allowable")
 xfail_issue_36486 = xfail_test(reason="RuntimeError: HardSigmoid operation should be converted "
                                       "to HardSigmoid_IE")
-xfail_issue_36487 = xfail_test(reason="Assertion error - mvn operator computation mismatch")
 xfail_issue_38084 = xfail_test(reason="RuntimeError: AssertionFailed: layer->get_output_partial_shape(i)"
                                       "is_static() nGraph <value> operation with name: <value> cannot be"
                                       "converted to <value> layer with name: <value> because output"
@@ -110,8 +108,6 @@ xfail_issue_46762 = xfail_test(reason="Incorrect result of Minimum op if uint da
 xfail_issue_47323 = xfail_test(reason="RuntimeError: The plugin does not support FP64")
 xfail_issue_47337 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1::OneHot")
 xfail_issue_33593 = xfail_test(reason="Current implementation of MaxPool doesn't support indices output")
-xfail_issue_51993 = xfail_test(reason="PRelu supports only 1D tensor for 'slope' input broadcasted"
-                                      "by channel")
 xfail_issue_55760 = xfail_test(reason="RuntimeError: Reversed axis have axes above the source space shape")
 
 # Model MSFT issues:
diff --git a/ngraph/python/tests/test_ngraph/test_basic.py b/ngraph/python/tests/test_ngraph/test_basic.py
index 210bcb99ae0..da6cf993d3f 100644
--- a/ngraph/python/tests/test_ngraph/test_basic.py
+++ b/ngraph/python/tests/test_ngraph/test_basic.py
@@ -403,3 +403,25 @@ def test_mutiple_outputs():
     output = computation(input_data)
 
     assert np.equal(output, expected_output).all()
+
+
+def test_sink_function_ctor():
+    input_data = ng.parameter([2, 2], name="input_data", dtype=np.float32)
+    rv = ng.read_value(input_data, "var_id_667")
+    add = ng.add(rv, input_data, name="MemoryAdd")
+    node = ng.assign(add, "var_id_667")
+    res = ng.result(add, "res")
+    function = Function(results=[res], sinks=[node], parameters=[input_data], name="TestFunction")
+
+    ordered_ops = function.get_ordered_ops()
+    op_types = [op.get_type_name() for op in ordered_ops]
+    assert op_types == ["Parameter", "ReadValue", "Add", "Assign", "Result"]
+    assert len(function.get_ops()) == 5
+    assert function.get_output_size() == 1
+    assert function.get_output_op(0).get_type_name() == "Result"
+    assert function.get_output_element_type(0) == input_data.get_element_type()
+    assert list(function.get_output_shape(0)) == [2, 2]
+    assert (function.get_parameters()[0].get_partial_shape()) == PartialShape([2, 2])
+    assert len(function.get_parameters()) == 1
+    assert len(function.get_results()) == 1
+    assert function.get_friendly_name() == "TestFunction"
diff --git a/ngraph/python/tests/test_ngraph/test_ops_fused.py b/ngraph/python/tests/test_ngraph/test_ops_fused.py
index 48fff1623c5..d96f870f604 100644
--- a/ngraph/python/tests/test_ngraph/test_ops_fused.py
+++ b/ngraph/python/tests/test_ngraph/test_ops_fused.py
@@ -6,9 +6,7 @@ import pytest
 
 import ngraph as ng
 from tests.runtime import get_runtime
-from tests import (xfail_issue_36486,
-                   xfail_issue_36487,
-                   xfail_issue_44976)
+from tests import (xfail_issue_36486, xfail_issue_44976)
 
 
 def test_elu_operator_with_scalar_and_array():
@@ -424,14 +422,14 @@ def test_hard_sigmoid_operator():
     assert np.allclose(result, expected)
 
 
-@xfail_issue_36487
 def test_mvn_operator():
     runtime = get_runtime()
 
     data_shape = [3, 3, 3, 1]
-    across_channels = True
+    axes = [0, 2, 3]
     normalize_variance = True
     eps = np.float32(1e-9)
+    eps_mode = "outside_sqrt"
 
     data_value = np.array(
         [
@@ -456,7 +454,7 @@ def test_mvn_operator():
 
     parameter_data = ng.parameter(data_shape, name="Data", dtype=np.float32)
 
-    model = ng.mvn(parameter_data, across_channels, normalize_variance, eps)
+    model = ng.mvn(parameter_data, axes, normalize_variance, eps, eps_mode)
     computation = runtime.computation(model, parameter_data)
 
     result = computation(data_value)
@@ -464,21 +462,22 @@ def test_mvn_operator():
     expected = np.array(
         [
             [
-                [[0.9951074], [0.14548765], [-1.410561]],
-                [[-1.4999886], [-1.1923014], [-0.03975919]],
-                [[0.8463296], [1.2926502], [1.3340596]],
+                [[1.3546423], [0.33053496], [-1.5450814]],
+                [[-1.2106764], [-0.8925952], [0.29888135]],
+                [[0.38083088], [0.81808794], [0.85865635]],
             ],
             [
-                [[-1.0463363], [-0.1747985], [-0.7784088]],
-                [[0.47672555], [-1.5383], [0.32375798]],
-                [[1.2404392], [1.3878832], [-1.2228798]],
+                [[-1.1060555], [-0.05552877], [-0.78310335]],
+                [[0.83281356], [-1.250282], [0.67467856]],
+                [[0.7669372], [0.9113869], [-1.6463585]],
             ],
             [
-                [[-0.3228847], [1.2063044], [0.22751297]],
-                [[0.91956615], [0.81839436], [-1.2279599]],
-                [[0.5312334], [0.067952], [-1.3592235]],
+                [[-0.23402764], [1.6092131], [0.42940593]],
+                [[1.2906139], [1.1860244], [-0.92945826]],
+                [[0.0721334], [-0.38174], [-1.7799333]],
             ],
         ],
+        dtype=np.float32,
     )
 
     assert np.allclose(result, expected)
diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py
index d411919e977..a8ee1cbb065 100644
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@@ -59,7 +59,6 @@ from tests import (BACKEND_NAME,
                    xfail_issue_49753,
                    xfail_issue_49754,
                    xfail_issue_52463,
-                   xfail_issue_51993,
                    xfail_issue_55760)
 
 
@@ -369,9 +368,7 @@ tests_expected_to_fail = [
      "OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
     (xfail_issue_33593,
      "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_strides_cpu",
-     "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",),
-    (xfail_issue_51993,
-     "OnnxBackendNodeModelTest.test_prelu_broadcast_cpu",)
+     "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",)
 ]
 
 for test_group in tests_expected_to_fail:
diff --git a/ngraph/python/tests/test_onnx/test_ops_convpool.py b/ngraph/python/tests/test_onnx/test_ops_convpool.py
index 7a507a2b4e0..6637a06463b 100644
--- a/ngraph/python/tests/test_onnx/test_ops_convpool.py
+++ b/ngraph/python/tests/test_onnx/test_ops_convpool.py
@@ -5,10 +5,10 @@ import numpy as np
 import onnx
 import pytest
 from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info
+from onnx.onnx_cpp2py_export.checker import ValidationError
 
 from tests.runtime import get_runtime
 from tests.test_onnx.utils import get_node_model, import_onnx_model, run_model, run_node
-from tests import xfail_issue_35911
 
 
 @pytest.fixture
@@ -257,7 +257,6 @@ def test_2d_conv_transpose():
     )
 
 
-@xfail_issue_35911
 def test_pad_opset_1():
     x = np.ones((2, 2), dtype=np.float32)
     y = np.pad(x, pad_width=1, mode="constant")
@@ -281,7 +280,7 @@ def test_pad_opset_1():
 
     # no paddings arttribute
     model = get_node_model("Pad", x)
-    with pytest.raises(RuntimeError):
+    with pytest.raises(ValidationError):
         import_onnx_model(model)
 
 
diff --git a/ngraph/python/tox.ini b/ngraph/python/tox.ini
index e0ccc85785e..de7bb8337b2 100644
--- a/ngraph/python/tox.ini
+++ b/ngraph/python/tox.ini
@@ -7,7 +7,7 @@ skip_install=True
 deps =
   -rrequirements.txt
   -rrequirements_test.txt
-  mypy
+  mypy<0.900
   flake8-bugbear
   pytest-xdist
 setenv =
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 52148150e81..e25a40de184 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -232,6 +232,7 @@ set(SRC
     visitors/op/grn.cpp
     visitors/op/group_conv.cpp
     visitors/op/interpolate.cpp
+    visitors/op/log.cpp
     visitors/op/logical_xor.cpp
     visitors/op/lrn.cpp
     visitors/op/lstm_cell.cpp
@@ -286,13 +287,21 @@ set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
 if (ENABLE_MKL_DNN)
     message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
     set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
-    list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin)
+    if (ENABLE_STRICT_DEPENDENCIES)
+        # For convinience add a runtime dependency to build along with this target.
+        # Warning: Parallel build with -GNinja may not be efficient.
+        list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin)
+    endif()
 endif()
 
 if (ENABLE_CLDNN)
     message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
     set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
-    list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)
+    if (ENABLE_STRICT_DEPENDENCIES)
+        # For convinience add a runtime dependency to build along with this target.
+        # Warning: Parallel build with -GNinja may not be efficient.
+        list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)
+    endif()
 endif()
 
 if (NGRAPH_INTERPRETER_ENABLE)
@@ -399,6 +408,7 @@ set(MULTI_TEST_SRC
     backend/pad.in.cpp
     backend/parameter_as_output.in.cpp
     backend/power.in.cpp
+    backend/prelu.in.cpp
     backend/proposal.in.cpp
     backend/psroi_pooling.in.cpp
     backend/range.in.cpp
diff --git a/ngraph/test/backend/fused_op.in.cpp b/ngraph/test/backend/fused_op.in.cpp
index 74117a7fb9f..4b6bb15cb51 100644
--- a/ngraph/test/backend/fused_op.in.cpp
+++ b/ngraph/test/backend/fused_op.in.cpp
@@ -41,22 +41,6 @@ static string s_manifest = "${MANIFEST}";
 
 using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 
-NGRAPH_TEST(${BACKEND_NAME}, prelu)
-{
-    Shape shape{3, 2};
-    Shape rshape{3};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, rshape);
-    auto prelu = make_shared<op::PRelu>(A, B);
-    auto f = make_shared<Function>(NodeVector{prelu}, ParameterVector{A, B});
-    std::vector<float> a{-2, 3, -2, 1, -1, 0};
-    std::vector<float> b{0, 0.5, 1};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_multiple_inputs<float>({a, b});
-    test_case.add_expected_output<float>(vector<float>{0, 3, -1, 1, -1, 0});
-    test_case.run();
-}
 
 NGRAPH_TEST(${BACKEND_NAME}, hardsigmoid)
 {
@@ -94,39 +78,6 @@ NGRAPH_TEST(${BACKEND_NAME}, hardsigmoid)
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, prelu_shared_slope)
-{
-    Shape shape{3, 2};
-    Shape rshape{};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, rshape);
-    auto prelu = make_shared<op::PRelu>(A, B);
-    auto f = make_shared<Function>(NodeVector{prelu}, ParameterVector{A, B});
-    std::vector<float> a{-2, 3, -2, 1, -1, 0};
-    std::vector<float> b{0.5};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_multiple_inputs<float>({a, b});
-    test_case.add_expected_output<float>(vector<float>{-1, 3, -1, 1, -0.5, 0});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_slope)
-{
-    Shape shape{3, 2};
-    Shape rshape{};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, rshape);
-    auto prelu = make_shared<op::PRelu>(A, B);
-    auto f = make_shared<Function>(NodeVector{prelu}, ParameterVector{A, B});
-    std::vector<float> a{-2, 3, -2, 1, -1, 0};
-    std::vector<float> b{-0.5};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_multiple_inputs<float>({a, b});
-    test_case.add_expected_output<float>(vector<float>{1, 3, 1, 1, 0.5, 0});
-    test_case.run();
-}
 
 NGRAPH_TEST(${BACKEND_NAME}, space_to_depth_block_first)
 {
diff --git a/ngraph/test/backend/prelu.in.cpp b/ngraph/test/backend/prelu.in.cpp
new file mode 100644
index 00000000000..8523aeb39c3
--- /dev/null
+++ b/ngraph/test/backend/prelu.in.cpp
@@ -0,0 +1,942 @@
+// Co pyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/engine/test_engines.hpp"
+#include "util/test_case.hpp"
+#include "util/test_control.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+static string s_manifest = "${MANIFEST}";
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_1_param)
+{
+    Shape shape_a{6};
+    Shape shape_slope{1};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2};
+    std::vector<float> out{1, 2, -6, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_1_const)
+{
+    Shape shape_a{6};
+    Shape shape_slope{1};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2};
+    std::vector<float> out{1, 2, -6, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_param)
+{
+    Shape shape_a{6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 3, 4, 5, 6, 7};
+    std::vector<float> out{1, 2, -12, -20, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_6_const)
+{
+    Shape shape_a{6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 3, 4, 5, 6, 7};
+    std::vector<float> out{1, 2, -12, -20, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_2_W_param)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{0, 1};
+    std::vector<float> out{0, 3, 0, 1, 0, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_2_W_const)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{0, 1};
+    std::vector<float> out{0, 3, 0, 1, 0, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_6_W_param)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         7, 8, -9, -10, 11, 12};
+    std::vector<float> slope{1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, 5, 6,
+                            7, 8, -27, -40, 11, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_6_W_const)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         7, 8, -9, -10, 11, 12};
+    std::vector<float> slope{1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, 5, 6,
+                            7, 8, -27, -40, 11, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_C_2_param)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-1, -1, -1, -1, -1, -1};
+    std::vector<float> slope{2, 0.5};
+    std::vector<float> out{-2, -0.5, -2, -0.5, -2, -0.5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_C_2_const)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-1, -1, -1, -1, -1, -1};
+    std::vector<float> slope{2, 0.5};
+    std::vector<float> out{-2, -0.5, -2, -0.5, -2, -0.5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_equal_dims_slope_param)
+{
+    Shape shape_a{2, 2, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-0.5, -2, -3, -4, -5, -6, -7, -8};
+    std::vector<float> slope{-0.5, -2};
+    // std::vector<float> out{0.25, 4, 1.5, 8, 2.5, 12, 3.5, 16}; // broadcast (1, 1, 2)
+    std::vector<float> out{0.25, 1, 6, 8, 2.5, 3, 14, 16}; // broadcast (1, 2, 1)
+    // std::vector<float> out{0.25, 1, 1.5, 2, 10, 12, 14, 16}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_equal_dims_slope_const)
+{
+    Shape shape_a{2, 2, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-0.5, -2, -3, -4, -5, -6, -7, -8};
+    std::vector<float> slope{-0.5, -2};
+    // std::vector<float> out{0.25, 4, 1.5, 8, 2.5, 12, 3.5, 16}; // broadcast (1, 1, 2)
+    std::vector<float> out{0.25, 1, 6, 8, 2.5, 3, 14, 16}; // broadcast (1, 2, 1)
+    // std::vector<float> out{0.25, 1, 1.5, 2, 10, 12, 14, 16}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_slope_param)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{-0.5, -1};
+    std::vector<float> out{1, 3, 1, 1, 0.5, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_slope_const)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{-0.5, -1};
+    std::vector<float> out{1, 3, 1, 1, 0.5, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_same_shape_param)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{2, 6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 2, 2, 2, 2, 2,
+                         1, 1, 4, 2, 1, 1};
+    std::vector<float> out{1, 2, -6, -8, 5, 6, 1, 2, -12, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_same_shape_const)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{2, 6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 2, 2, 2, 2, 2,
+                         1, 1, 4, 2, 1, 1};
+    std::vector<float> out{1, 2, -6, -8, 5, 6, 1, 2, -12, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_W_slope_param)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_W_slope_const)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_slope_1_C_1_W_param)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{1, 2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_slope_1_C_1_W_const)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{1, 2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_W_param)
+{
+    Shape shape_a{2, 2, 6};
+    Shape shape_slope{2, 1, 6};
+
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4, -5, -6,
+                         1, 2, -3, -4, 5, 6,
+                         -2, 4, -6, -8, 10, 12};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                         1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -2, -2, -9, -16, -5, -42,
+                            1, 2, -9, -16, 5, 6,
+                            -2, 4, -18, -32, 10, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_W_const)
+{
+    Shape shape_a{2, 2, 6};
+    Shape shape_slope{2, 1, 6};
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4, -5, -6,
+                         1, 2, -3, -4, 5, 6,
+                         -2, 4, -6, -8, 10, 12};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                         1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -2, -2, -9, -16, -5, -42,
+                            1, 2, -9, -16, 5, 6,
+                            -2, 4, -18, -32, 10, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_same_shape_param)
+{
+    Shape shape_a{2, 3, 2};
+    Shape shape_slope{2, 3, 2};
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4,-5, -6,};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                            1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -1, -4, -9, -16, -25, -36};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_same_shape_const)
+{
+    Shape shape_a{2, 3, 2};
+    Shape shape_slope{2, 3, 2};
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4,-5, -6,};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                            1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -1, -4, -9, -16, -25, -36};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_broadcast_C_W_slope_param)
+{
+    Shape shape_a{2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 2, 1)
+    // std::vector<float> out{-1, -1, -100, -100}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_broadcast_C_W_slope_const)
+{
+    Shape shape_a{2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 2, 1)
+    // std::vector<float> out{-1, -1, -100, -100}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_W_slope_param)
+{
+    Shape shape_a{1, 2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    // std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 1, 1, 2)
+    std::vector<float> out{-1, -1, -100, -100}; // broadcast (1, 2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_W_slope_const)
+{
+    Shape shape_a{1, 2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    // std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 1, 1, 2)
+    std::vector<float> out{-1, -1, -100, -100}; // broadcast (1, 2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_slope_param)
+{
+    Shape shape_a{1, 5, 1, 1};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1, 0, -1, -1, -1};
+    std::vector<float> slope{1, 2, 3, 4, 5};
+    std::vector<float> out{-1, 0, -3, -4, -5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_slope_const)
+{
+    Shape shape_a{1, 5, 1, 1};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1, 0, -1, -1, -1};
+    std::vector<float> slope{1, 2, 3, 4, 5};
+    std::vector<float> out{-1, 0, -3, -4, -5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_batch_nd_elementwise_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{2, 3, 4, 5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+    std::vector<float> slope(shape_size(shape_slope));
+    std::iota(std::begin(slope), std::end(slope), 0);
+
+    std::vector<float> out{-0.,   -1.,   -2.,   -3.,   -4.,   -5.,   -6.,   -7.,   -8.,
+         -9.,  -10.,  -11.,  -12.,  -13.,  -14.,  -15.,  -16.,  -17.,
+        -18.,  -19.,  -20.,  -21.,  -22.,  -23.,  -24.,  -25.,  -26.,
+        -27.,  -28.,  -29.,  -30.,  -31.,  -32.,  -33.,  -34.,  -35.,
+        -36.,  -37.,  -38.,  -39.,  -40.,  -41.,  -42.,  -43.,  -44.,
+        -45.,  -46.,  -47.,  -48.,  -49.,  -50.,  -51.,  -52.,  -53.,
+        -54.,  -55.,  -56.,  -57.,  -58.,  -59.,  -60.,  -61.,  -62.,
+        -63.,  -64.,  -65.,  -66.,  -67.,  -68.,  -69.,  -70.,  -71.,
+        -72.,  -73.,  -74.,  -75.,  -76.,  -77.,  -78.,  -79.,  -80.,
+        -81.,  -82.,  -83.,  -84.,  -85.,  -86.,  -87.,  -88.,  -89.,
+        -90.,  -91.,  -92.,  -93.,  -94.,  -95.,  -96.,  -97.,  -98.,
+        -99., -100., -101., -102., -103., -104., -105., -106., -107.,
+       -108., -109., -110., -111., -112., -113., -114., -115., -116.,
+       -117., -118., -119.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_batch_nd_elementwise_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{2, 3, 4, 5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+    std::vector<float> slope(shape_size(shape_slope));
+    std::iota(std::begin(slope), std::end(slope), 0);
+
+    std::vector<float> out{-0.,   -1.,   -2.,   -3.,   -4.,   -5.,   -6.,   -7.,   -8.,
+         -9.,  -10.,  -11.,  -12.,  -13.,  -14.,  -15.,  -16.,  -17.,
+        -18.,  -19.,  -20.,  -21.,  -22.,  -23.,  -24.,  -25.,  -26.,
+        -27.,  -28.,  -29.,  -30.,  -31.,  -32.,  -33.,  -34.,  -35.,
+        -36.,  -37.,  -38.,  -39.,  -40.,  -41.,  -42.,  -43.,  -44.,
+        -45.,  -46.,  -47.,  -48.,  -49.,  -50.,  -51.,  -52.,  -53.,
+        -54.,  -55.,  -56.,  -57.,  -58.,  -59.,  -60.,  -61.,  -62.,
+        -63.,  -64.,  -65.,  -66.,  -67.,  -68.,  -69.,  -70.,  -71.,
+        -72.,  -73.,  -74.,  -75.,  -76.,  -77.,  -78.,  -79.,  -80.,
+        -81.,  -82.,  -83.,  -84.,  -85.,  -86.,  -87.,  -88.,  -89.,
+        -90.,  -91.,  -92.,  -93.,  -94.,  -95.,  -96.,  -97.,  -98.,
+        -99., -100., -101., -102., -103., -104., -105., -106., -107.,
+       -108., -109., -110., -111., -112., -113., -114., -115., -116.,
+       -117., -118., -119.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_W_slope_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2, 3, 4};
+
+    std::vector<float> out{-0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4.,
+       -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_W_slope_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2, 3, 4};
+
+    std::vector<float> out{-0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4.,
+       -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_C_slope_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_C_slope_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_1_slope_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3, 1, 1};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_1_slope_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3, 1, 1};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    auto f = make_shared<Function>(make_shared<op::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
diff --git a/ngraph/test/frontend/frontend_manager.cpp b/ngraph/test/frontend/frontend_manager.cpp
index 10e8eac8e60..af70885d237 100644
--- a/ngraph/test/frontend/frontend_manager.cpp
+++ b/ngraph/test/frontend/frontend_manager.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <frontend_manager/frontend_exceptions.hpp>
 #include <frontend_manager/frontend_manager.hpp>
 #include <memory>
 
@@ -161,3 +162,150 @@ TEST(FrontEndManagerTest, testDefaultPlace)
     ASSERT_ANY_THROW(place->is_equal(nullptr));
     ASSERT_ANY_THROW(place->is_equal_data(nullptr));
 }
+
+TEST(FrontEndExceptionTest, frontend_general_error_no_throw)
+{
+    EXPECT_NO_THROW(FRONT_END_GENERAL_CHECK(true));
+}
+
+TEST(FrontEndExceptionTest, frontend_general_error_no_throw_info)
+{
+    EXPECT_NO_THROW(FRONT_END_GENERAL_CHECK(true, "msg example"));
+}
+
+TEST(FrontEndExceptionTest, frontend_general_error_throw_no_info)
+{
+    EXPECT_THROW(FRONT_END_GENERAL_CHECK(false), ngraph::frontend::GeneralFailure);
+}
+
+TEST(FrontEndExceptionTest, frontend_initialization_error_no_throw)
+{
+    EXPECT_NO_THROW(FRONT_END_INITIALIZATION_CHECK(true));
+}
+
+TEST(FrontEndExceptionTest, frontend_initialization_error_no_throw_info)
+{
+    EXPECT_NO_THROW(FRONT_END_INITIALIZATION_CHECK(true, "msg example"));
+}
+
+TEST(FrontEndExceptionTest, frontend_initialization_error_throw_no_info)
+{
+    EXPECT_THROW(FRONT_END_INITIALIZATION_CHECK(false), ngraph::frontend::InitializationFailure);
+}
+
+TEST(FrontEndExceptionTest, frontend_op_conversion_error_no_throw)
+{
+    EXPECT_NO_THROW(FRONT_END_OP_CONVERSION_CHECK(true));
+}
+
+TEST(FrontEndExceptionTest, frontend_op_conversion_error_no_throw_info)
+{
+    EXPECT_NO_THROW(FRONT_END_OP_CONVERSION_CHECK(true, "msg example"));
+}
+
+TEST(FrontEndExceptionTest, frontend_op_conversion_error_throw_no_info)
+{
+    EXPECT_THROW(FRONT_END_OP_CONVERSION_CHECK(false), ngraph::frontend::OpConversionFailure);
+}
+
+TEST(FrontEndExceptionTest, frontend_assert_throw_check_info)
+{
+    std::string msg("msg example");
+    try
+    {
+        FRONT_END_THROW(msg);
+    }
+    catch (const ngraph::frontend::GeneralFailure& ex)
+    {
+        std::string caught_msg(ex.what());
+        EXPECT_NE(caught_msg.find(msg), std::string::npos);
+        return;
+    }
+    catch (...)
+    {
+        FAIL() << "Not expected exception type.";
+    }
+    FAIL() << "Test is expected to throw an exception.";
+}
+
+TEST(FrontEndExceptionTest, frontend_not_implemented_throw_check_info)
+{
+    struct TestClass
+    {
+    };
+    try
+    {
+        FRONT_END_NOT_IMPLEMENTED(TestClass);
+    }
+    catch (const ngraph::frontend::NotImplementedFailure& ex)
+    {
+        std::string caught_msg(ex.what());
+        EXPECT_NE(caught_msg.find("TestClass"), std::string::npos);
+        return;
+    }
+    catch (...)
+    {
+        FAIL() << "Not expected exception type.";
+    }
+    FAIL() << "Test is expected to throw an exception.";
+}
+
+TEST(FrontEndExceptionTest, frontend_general_error_throw_info)
+{
+    std::string msg("msg example");
+    try
+    {
+        FRONT_END_GENERAL_CHECK(false, msg);
+    }
+    catch (const ngraph::frontend::GeneralFailure& ex)
+    {
+        std::string caught_msg(ex.what());
+        EXPECT_NE(caught_msg.find(msg), std::string::npos);
+        return;
+    }
+    catch (...)
+    {
+        FAIL() << "Not expected exception type.";
+    }
+    FAIL() << "Test is expected to throw an exception.";
+}
+
+TEST(FrontEndExceptionTest, frontend_op_conversion_error_throw_info)
+{
+    std::string msg("msg example");
+    try
+    {
+        FRONT_END_OP_CONVERSION_CHECK(false, msg);
+    }
+    catch (const ngraph::frontend::OpConversionFailure& ex)
+    {
+        std::string caught_msg(ex.what());
+        EXPECT_NE(caught_msg.find(msg), std::string::npos);
+        return;
+    }
+    catch (...)
+    {
+        FAIL() << "Not expected exception type.";
+    }
+    FAIL() << "Test is expected to throw an exception.";
+}
+
+TEST(FrontEndExceptionTest, frontend_initialization_error_throw_info)
+{
+    std::string msg("msg example");
+    try
+    {
+        FRONT_END_INITIALIZATION_CHECK(false, msg);
+    }
+    catch (const ngraph::frontend::InitializationFailure& ex)
+    {
+        std::string caught_msg(ex.what());
+        EXPECT_NE(caught_msg.find(msg), std::string::npos);
+        return;
+    }
+    catch (...)
+    {
+        FAIL() << "Not expected exception type.";
+    }
+    FAIL() << "Test is expected to throw an exception.";
+}
diff --git a/ngraph/test/models/onnx/prelu_1d.prototxt b/ngraph/test/models/onnx/prelu_1d.prototxt
new file mode 100644
index 00000000000..9034d1d42f6
--- /dev/null
+++ b/ngraph/test/models/onnx/prelu_1d.prototxt
@@ -0,0 +1,72 @@
+ir_version: 7
+producer_name: "onnx-importer-test"
+graph {
+  node {
+    input: "X"
+    input: "SLOPE"
+    output: "Y"
+    op_type: "PRelu"
+  }
+  name: "test-model-prelu"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "SLOPE"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 12
+}
diff --git a/ngraph/test/models/onnx/prelu_batch_nd.prototxt b/ngraph/test/models/onnx/prelu_batch_nd.prototxt
new file mode 100644
index 00000000000..cf693db83c3
--- /dev/null
+++ b/ngraph/test/models/onnx/prelu_batch_nd.prototxt
@@ -0,0 +1,81 @@
+ir_version: 7
+producer_name: "onnx-importer-test"
+graph {
+  node {
+    input: "X"
+    input: "SLOPE"
+    output: "Y"
+    op_type: "PRelu"
+  }
+  name: "test-model-prelu"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "SLOPE"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 12
+}
diff --git a/ngraph/test/models/onnx/prelu_c_1_1.prototxt b/ngraph/test/models/onnx/prelu_c_1_1.prototxt
new file mode 100644
index 00000000000..83f2a748860
--- /dev/null
+++ b/ngraph/test/models/onnx/prelu_c_1_1.prototxt
@@ -0,0 +1,78 @@
+ir_version: 7
+producer_name: "onnx-importer-test"
+graph {
+  node {
+    input: "X"
+    input: "SLOPE"
+    output: "Y"
+    op_type: "PRelu"
+  }
+  name: "test-model-prelu"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "SLOPE"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 12
+}
diff --git a/ngraph/test/models/onnx/reduce_sum_13_axes_as_input.prototxt b/ngraph/test/models/onnx/reduce_sum_13_axes_as_input.prototxt
index 6a458e74393..273e436803a 100644
--- a/ngraph/test/models/onnx/reduce_sum_13_axes_as_input.prototxt
+++ b/ngraph/test/models/onnx/reduce_sum_13_axes_as_input.prototxt
@@ -15,16 +15,10 @@ graph {
         elem_type: 1
         shape {
           dim {
-            dim_value: 1
+            dim_value: 2
           }
           dim {
-            dim_value: 1
-          }
-          dim {
-            dim_value: 4
-          }
-          dim {
-            dim_value: 4
+            dim_value: 2
           }
         }
       }
@@ -37,7 +31,7 @@ graph {
         elem_type: 7
         shape {
           dim {
-            dim_value: 4
+            dim_value: 1
           }
         }
       }
@@ -50,6 +44,9 @@ graph {
       tensor_type {
         elem_type: 1
         shape {
+          dim {
+            dim_value: 2
+          }
           dim {
             dim_value: 1
           }
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index 6a5ecfdcdf8..849a015c4c3 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -1238,25 +1238,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_input)
         file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_input.prototxt"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
-    test_case.add_input<float>({1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f,
-                                1.0f});
-    test_case.add_input<int64_t>({0, 1, 2, 3});
+    test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
+    test_case.add_input<int64_t>({1});
 
-    test_case.add_expected_output<float>(Shape{1, 1, 1, 1}, {16.0f});
+    test_case.add_expected_output<float>(Shape{2, 1}, {3.0f, 7.0f});
     test_case.run();
 }
 
@@ -2101,7 +2086,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_leaky_relu)
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu)
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_nd)
 {
     auto function =
         onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu.prototxt"));
@@ -2133,6 +2118,129 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu)
     test_case.run();
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise)
+{
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_batch_nd.prototxt"));
+
+    Inputs inputs;
+    // Shape{2, 3, 4, 5}
+    inputs.emplace_back(std::vector<float>{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.});
+
+    // Shape{2, 3, 4, 5}
+    std::vector<float> slope(shape_size(Shape{2, 3, 4, 5}));
+    std::iota(std::begin(slope), std::end(slope), 0);
+    inputs.emplace_back(slope);
+
+    // Shape{2, 3, 4, 5}
+    auto expected_output = std::vector<float>{-0.,   -1.,   -2.,   -3.,   -4.,   -5.,   -6.,   -7.,   -8.,
+         -9.,  -10.,  -11.,  -12.,  -13.,  -14.,  -15.,  -16.,  -17.,
+        -18.,  -19.,  -20.,  -21.,  -22.,  -23.,  -24.,  -25.,  -26.,
+        -27.,  -28.,  -29.,  -30.,  -31.,  -32.,  -33.,  -34.,  -35.,
+        -36.,  -37.,  -38.,  -39.,  -40.,  -41.,  -42.,  -43.,  -44.,
+        -45.,  -46.,  -47.,  -48.,  -49.,  -50.,  -51.,  -52.,  -53.,
+        -54.,  -55.,  -56.,  -57.,  -58.,  -59.,  -60.,  -61.,  -62.,
+        -63.,  -64.,  -65.,  -66.,  -67.,  -68.,  -69.,  -70.,  -71.,
+        -72.,  -73.,  -74.,  -75.,  -76.,  -77.,  -78.,  -79.,  -80.,
+        -81.,  -82.,  -83.,  -84.,  -85.,  -86.,  -87.,  -88.,  -89.,
+        -90.,  -91.,  -92.,  -93.,  -94.,  -95.,  -96.,  -97.,  -98.,
+        -99., -100., -101., -102., -103., -104., -105., -106., -107.,
+       -108., -109., -110., -111., -112., -113., -114., -115., -116.,
+       -117., -118., -119.};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_multiple_inputs(inputs);
+    test_case.add_expected_output(expected_output);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_1d)
+{
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_1d.prototxt"));
+
+    Inputs inputs;
+    // Shape{2, 3, 4, 5}
+    inputs.emplace_back(std::vector<float>{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.});
+
+    // Shape{5}
+    inputs.emplace_back(std::vector<float>{0, 1, 2, 3, 4});
+
+    // Shape{2, 3, 4, 5}
+    auto expected_output = std::vector<float>{-0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4.,
+       -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4.};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_multiple_inputs(inputs);
+    test_case.add_expected_output(expected_output);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_C_1_1)
+{
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_c_1_1.prototxt"));
+
+    Inputs inputs;
+    // Shape{2, 3, 4, 5}
+    inputs.emplace_back(std::vector<float>{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.});
+
+    // Shape{3, 1, 1}
+    inputs.emplace_back(std::vector<float>{0, 1, 2});
+
+    // Shape{2, 3, 4, 5}
+    auto expected_output = std::vector<float>{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_multiple_inputs(inputs);
+    test_case.add_expected_output(expected_output);
+    test_case.run();
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_selu)
 {
     auto function =
@@ -3000,7 +3108,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign)
 
     test_case.add_input<int32_t>({-4, 7, 5, 4, -7, 8});
     test_case.add_input<int32_t>({2, -3, 8, -2, 3, 5});
-    test_case.add_expected_output<int32_t>(Shape{6}, {0, -2,  5,  0,  2,  3});
+    test_case.add_expected_output<int32_t>(Shape{6}, {0, -2, 5, 0, 2, 3});
 
     test_case.run();
 }
@@ -3013,7 +3121,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_i64)
 
     test_case.add_input<int64_t>({-4, 7, 5, 4, -7, 8});
     test_case.add_input<int64_t>({2, -3, 8, -2, 3, 5});
-    test_case.add_expected_output<int64_t>(Shape{6}, {0, -2,  5,  0,  2,  3});
+    test_case.add_expected_output<int64_t>(Shape{6}, {0, -2, 5, 0, 2, 3});
 
     test_case.run();
 }
@@ -3036,13 +3144,15 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_f32)
     try
     {
         const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_f32.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_f32.prototxt"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
     {
-        EXPECT_HAS_SUBSTRING(e.what(),
-                             std::string("If the input type is floating point, then `fmod` attribute must be set to 1."));
+        EXPECT_HAS_SUBSTRING(
+            e.what(),
+            std::string(
+                "If the input type is floating point, then `fmod` attribute must be set to 1."));
     }
     catch (...)
     {
@@ -3084,7 +3194,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_f32)
 
     test_case.add_input<float>({-4.3, 7.2, 5.0, 4.3, -7.2, 8.0});
     test_case.add_input<float>({2.1, -3.4, 8.0, -2.1, 3.4, 5.0});
-    test_case.add_expected_output<float>(Shape{6}, {-0.10000038, 0.39999962, 5. , 0.10000038, -0.39999962, 3.});
+    test_case.add_expected_output<float>(
+        Shape{6}, {-0.10000038, 0.39999962, 5., 0.10000038, -0.39999962, 3.});
 
     test_case.run();
 }
@@ -3094,13 +3205,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_incorrect_fmod)
     try
     {
         const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_incorrect_fmod.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/mod_incorrect_fmod.prototxt"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
     {
-        EXPECT_HAS_SUBSTRING(e.what(),
-                             std::string("Unsupported value of 'fmod' attribute (should be: 0 or 1)"));
+        EXPECT_HAS_SUBSTRING(
+            e.what(), std::string("Unsupported value of 'fmod' attribute (should be: 0 or 1)"));
     }
     catch (...)
     {
@@ -4241,8 +4352,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_negativelog_likelihood_loss)
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_input_as_shape_default_value)
 {
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_input_as_shape_default_value.prototxt"));
+    auto function = onnx_import::import_onnx_model(file_util::path_join(
+        SERIALIZED_ZOO, "onnx/constant_fill_input_as_shape_default_value.prototxt"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{1, 2, 3}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
@@ -4299,7 +4410,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_bfloat_tensor)
     test_case.run();
 }
 
-
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_scalar)
 {
     auto function = onnx_import::import_onnx_model(
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index 9d6ea205f82..f95ce0d951e 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -758,9 +758,6 @@ lrn_2d_across_outermost_axis
 dyn_group_convolution_backprop_filters
 
 # Layer <op_name> input port 1 is not connected to any data
-prelu
-prelu_shared_slope
-prelu_negative_slope
 convolution_simple_padding
 
 # Cannot cast ngraph node Stack to CNNLayer!
@@ -1670,14 +1667,6 @@ IE_CPU.evaluate_ctc_greedy_decoder_seq_len_f16
 minimum_u16
 IE_CPU/ElemTypesTests/1.onnx_test_add_abc_set_precission
 
-# not implemented in CPU and GPU
-evaluate_mvn_6_no_variance
-evaluate_mvn_6
-evaluate_mvn_6_inside_sqrt
-evaluate_mvn_6_across_chanells
-evaluate_mvn_6_across_batch
-IE_CPU.onnx_mvn_v6
-
 # not yet implemented on CPU/GPU Gather 7
 gather_v7_data_int32_2d_indices_axis_1_negative_batch_dims
 gather_v7_1d_int32
diff --git a/ngraph/test/type_prop/unary_ops.cpp b/ngraph/test/type_prop/unary_ops.cpp
index 3859dd7369d..788b50917fa 100644
--- a/ngraph/test/type_prop/unary_ops.cpp
+++ b/ngraph/test/type_prop/unary_ops.cpp
@@ -96,6 +96,6 @@ REGISTER_TYPED_TEST_CASE_P(UnaryOperator,
                            dynamic_rank_input_shape_3D,
                            dynamic_rank_input_shape_full);
 
-using Types = ::testing::Types<op::Acos, op::Asin, op::Abs, op::Sqrt, op::Sin, op::Exp, op::Floor>;
+using Types = ::testing::Types<op::Acos, op::Asin, op::Abs, op::Sqrt, op::Sin, op::Exp, op::Floor, op::Log>;
 
 INSTANTIATE_TYPED_TEST_CASE_P(type_prop, UnaryOperator, Types);
diff --git a/ngraph/test/util/engine/interpreter_engine.cpp b/ngraph/test/util/engine/interpreter_engine.cpp
index 3254f395f08..876bd63d146 100644
--- a/ngraph/test/util/engine/interpreter_engine.cpp
+++ b/ngraph/test/util/engine/interpreter_engine.cpp
@@ -133,8 +133,9 @@ testing::AssertionResult
         if (expected_shape != result_shape)
         {
             comparison_result = testing::AssertionFailure();
-            comparison_result << "Computed data shape does not match the expected shape for output "
-                              << i << std::endl;
+            comparison_result << "Computed data shape(" << result_shape
+                              << ") does not match the expected shape(" << expected_shape
+                              << ") for output " << i << std::endl;
             break;
         }
 
@@ -175,8 +176,9 @@ testing::AssertionResult test::INTERPRETER_Engine::compare_results(const size_t
         if (expected_shape != result_shape)
         {
             comparison_result = testing::AssertionFailure();
-            comparison_result << "Computed data shape does not match the expected shape for output "
-                              << i << std::endl;
+            comparison_result << "Computed data shape(" << result_shape
+                              << ") does not match the expected shape(" << expected_shape
+                              << ") for output " << i << std::endl;
             break;
         }
 
diff --git a/ngraph/test/visitors/op/log.cpp b/ngraph/test/visitors/op/log.cpp
new file mode 100644
index 00000000000..02d6293871c
--- /dev/null
+++ b/ngraph/test/visitors/op/log.cpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+
+#include "ngraph/ngraph.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+#include "util/visitor.hpp"
+
+using namespace std;
+using namespace ngraph;
+using ngraph::test::NodeBuilder;
+
+TEST(attributes, log_op)
+{
+    using namespace opset1;
+
+    NodeBuilder::get_ops().register_factory<Log>();
+    const auto data_input = make_shared<Parameter>(element::f32, Shape{1, 2, 3});
+    const auto op = make_shared<Sqrt>(data_input);
+
+    NodeBuilder builder(op);
+    const auto expected_attr_count = 0;
+
+    EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
+}
\ No newline at end of file
diff --git a/openvino/conditional_compilation/CMakeLists.txt b/openvino/conditional_compilation/CMakeLists.txt
index 4a27ac50b7c..1db12d4eefb 100644
--- a/openvino/conditional_compilation/CMakeLists.txt
+++ b/openvino/conditional_compilation/CMakeLists.txt
@@ -19,10 +19,7 @@ elseif(SELECTIVE_BUILD STREQUAL "ON")
         message(FATAL_ERROR "In case SELECTIVE_BUILD is enabled, the SELECTIVE_BUILD_STAT variable should contain the path to the collected InelSEAPI statistics.\
  Usage: -DSELECTIVE_BUILD=ON -DSELECTIVE_BUILD_STAT=/path/*.csv")
     endif()
-    find_package (PythonInterp REQUIRED)
-    if(NOT PYTHON_VERSION_MAJOR EQUAL 3)
-        message(FATAL_ERROR " Python3 wasn't found!")
-    endif()
+    find_package (PythonInterp 3 REQUIRED)
 
     file(GLOB STAT_FILES ${SELECTIVE_BUILD_STAT})
 
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml
new file mode 100644
index 00000000000..b4a8be108c3
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+        # References were collected from DB with next query: {"commit_id": "aa2ae13c1ee6d700dd287ab809403e7de8c7c5e3", "commit_date": "2021-06-05 14:30:47+00:00"}
+        # and modified on FACTOR = 1.3
+        <!--Models with FP32 precision-->
+        <model path="public/alexnet/FP32/alexnet.xml" precision="FP32" test="create_exenetwork" device="MYRIAD" vmsize="2219614" vmpeak="2807734" vmrss="522615" vmhwm="959597" />
+        <model path="public/efficientnet-b0/FP32/efficientnet-b0.xml" precision="FP32" test="infer_request_inference" device="MYRIAD" vmsize="1939886" vmpeak="1939886" vmrss="138392" vmhwm="145381" />
+        <model path="public/mobilenet-v2-1.4-224/FP32/mobilenet-v2-1.4-224.xml" precision="FP32" test="create_exenetwork" device="MYRIAD" vmsize="1750569" vmpeak="1771416" vmrss="131944" vmhwm="144471" />
+        <model path="public/se-resnet-50/FP32/se-resnet-50.xml" precision="FP32" test="infer_request_inference" device="MYRIAD" vmsize="2239920" vmpeak="2325117" vmrss="434096" vmhwm="471562" />
+        <model path="public/Sphereface/FP32/Sphereface.xml" precision="FP32" test="create_exenetwork" device="MYRIAD" vmsize="1890917" vmpeak="2115952" vmrss="192202" vmhwm="362102" />
+        <model path="public/yolo-v1-tiny-tf/FP32/yolo-v1-tiny-tf.xml" precision="FP32" test="infer_request_inference" device="MYRIAD" vmsize="1935247" vmpeak="2020444" vmrss="142324" vmhwm="258949" />
+        <!--Models with FP16 precision-->
+        <model path="public/ctdet_coco_dlav0_384/FP16/ctdet_coco_dlav0_384.xml" precision="FP16" test="create_exenetwork" device="MYRIAD" vmsize="1904879" vmpeak="1904879" vmrss="207604" vmhwm="207604" />
+        <model path="public/i3d-rgb-tf/FP16/i3d-rgb-tf.xml" precision="FP16" test="create_exenetwork" device="MYRIAD" vmsize="3860984" vmpeak="3860984" vmrss="2147116" vmhwm="2147116" />
+        <model path="public/mobilenet-ssd/FP16/mobilenet-ssd.xml" precision="FP16" test="infer_request_inference" device="MYRIAD" vmsize="1886658" vmpeak="1886658" vmrss="94010" vmhwm="94010" />
+        <model path="public/se-inception/FP16/se-inception.xml" precision="FP16" test="create_exenetwork" device="MYRIAD" vmsize="1839182" vmpeak="1839182" vmrss="142168" vmhwm="142168" />
+        <model path="public/ssd_mobilenet_v2_coco/FP16/ssd_mobilenet_v2_coco.xml" precision="FP16" test="infer_request_inference" device="MYRIAD" vmsize="2013221" vmpeak="2098418" vmrss="219991" vmhwm="219991" />
+    </models>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml
new file mode 100644
index 00000000000..7409c6287da
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml
@@ -0,0 +1,115 @@
+<attributes>
+    <devices>
+        <value>MYRIAD</value>
+    </devices>
+    <models>
+        <!--Models with FP32 precision-->
+        <model name="mobilenet-v2-1.4-224" precision="FP32" source="omz" />
+        <model name="brain-tumor-segmentation-0001" precision="FP32" source="omz" />
+        <model name="faster_rcnn_inception_resnet_v2_atrous_coco" precision="FP32" source="omz" />
+        <model name="yolo-v2-tf" precision="FP32" source="omz" />
+        <model name="se-inception" precision="FP32" source="omz" />
+        <model name="efficientnet-b0" precision="FP32" source="omz" />
+        <model name="mobilenet-v1-1.0-224-tf" precision="FP32" source="omz" />
+        <model name="mask_rcnn_resnet101_atrous_coco" precision="FP32" source="omz" />
+        <model name="ssd_mobilenet_v1_coco" precision="FP32" source="omz" />
+        <model name="se-resnet-152" precision="FP32" source="omz" />
+        <model name="octave-resnext-50-0.25" precision="FP32" source="omz" />
+        <model name="googlenet-v3" precision="FP32" source="omz" />
+        <model name="ssd_mobilenet_v2_coco" precision="FP32" source="omz" />
+        <model name="alexnet" precision="FP32" source="omz" />
+        <model name="googlenet-v4-tf" precision="FP32" source="omz" />
+        <model name="ssd300" precision="FP32" source="omz" />
+        <model name="vgg19" precision="FP32" source="omz" />
+        <model name="ctdet_coco_dlav0_384" precision="FP32" source="omz" />
+        <model name="googlenet-v1" precision="FP32" source="omz" />
+        <model name="yolo-v3-tf" precision="FP32" source="omz" />
+        <model name="mtcnn-o" precision="FP32" source="omz" />
+        <model name="yolo-v1-tiny-tf" precision="FP32" source="omz" />
+        <model name="googlenet-v1-tf" precision="FP32" source="omz" />
+        <model name="yolo-v2-tiny-tf" precision="FP32" source="omz" />
+        <model name="ssd512" precision="FP32" source="omz" />
+        <model name="densenet-169" precision="FP32" source="omz" />
+        <model name="brain-tumor-segmentation-0002" precision="FP32" source="omz" />
+        <model name="Sphereface" precision="FP32" source="omz" />
+        <model name="googlenet-v2" precision="FP32" source="omz" />
+        <model name="ctdet_coco_dlav0_512" precision="FP32" source="omz" />
+        <model name="ctpn" precision="FP32" source="omz" />
+        <model name="i3d-rgb-tf" precision="FP32" source="omz" />
+        <model name="mobilenet-v2" precision="FP32" source="omz" />
+        <model name="mobilenet-ssd" precision="FP32" source="omz" />
+        <model name="se-resnext-50" precision="FP32" source="omz" />
+        <model name="caffenet" precision="FP32" source="omz" />
+        <model name="mtcnn-r" precision="FP32" source="omz" />
+        <model name="se-resnet-50" precision="FP32" source="omz" />
+        <model name="densenet-121" precision="FP32" source="omz" />
+        <!--Models with FP16 precision-->
+        <model name="mobilenet-v2-1.4-224" precision="FP16" source="omz" />
+        <model name="brain-tumor-segmentation-0001" precision="FP16" source="omz" />
+        <model name="faster_rcnn_inception_resnet_v2_atrous_coco" precision="FP16" source="omz" />
+        <model name="yolo-v2-tf" precision="FP16" source="omz" />
+        <model name="se-inception" precision="FP16" source="omz" />
+        <model name="efficientnet-b0" precision="FP16" source="omz" />
+        <model name="mobilenet-v1-1.0-224-tf" precision="FP16" source="omz" />
+        <model name="mask_rcnn_resnet101_atrous_coco" precision="FP16" source="omz" />
+        <model name="ssd_mobilenet_v1_coco" precision="FP16" source="omz" />
+        <model name="se-resnet-152" precision="FP16" source="omz" />
+        <model name="octave-resnext-50-0.25" precision="FP16" source="omz" />
+        <model name="googlenet-v3" precision="FP16" source="omz" />
+        <model name="ssd_mobilenet_v2_coco" precision="FP16" source="omz" />
+        <model name="alexnet" precision="FP16" source="omz" />
+        <model name="googlenet-v4-tf" precision="FP16" source="omz" />
+        <model name="ssd300" precision="FP16" source="omz" />
+        <model name="vgg19" precision="FP16" source="omz" />
+        <model name="ctdet_coco_dlav0_384" precision="FP16" source="omz" />
+        <model name="googlenet-v1" precision="FP16" source="omz" />
+        <model name="yolo-v3-tf" precision="FP16" source="omz" />
+        <model name="mtcnn-o" precision="FP16" source="omz" />
+        <model name="yolo-v1-tiny-tf" precision="FP16" source="omz" />
+        <model name="googlenet-v1-tf" precision="FP16" source="omz" />
+        <model name="yolo-v2-tiny-tf" precision="FP16" source="omz" />
+        <model name="ssd512" precision="FP16" source="omz" />
+        <model name="densenet-169" precision="FP16" source="omz" />
+        <model name="brain-tumor-segmentation-0002" precision="FP16" source="omz" />
+        <model name="Sphereface" precision="FP16" source="omz" />
+        <model name="googlenet-v2" precision="FP16" source="omz" />
+        <model name="ctdet_coco_dlav0_512" precision="FP16" source="omz" />
+        <model name="ctpn" precision="FP16" source="omz" />
+        <model name="i3d-rgb-tf" precision="FP16" source="omz" />
+        <model name="mobilenet-v2" precision="FP16" source="omz" />
+        <model name="mobilenet-ssd" precision="FP16" source="omz" />
+        <model name="se-resnext-50" precision="FP16" source="omz" />
+        <model name="caffenet" precision="FP16" source="omz" />
+        <model name="mtcnn-r" precision="FP16" source="omz" />
+        <model name="se-resnet-50" precision="FP16" source="omz" />
+        <model name="densenet-121" precision="FP16" source="omz" />
+        <!--Models with FP16-INT8 precision-->
+        <model name="vehicle-attributes-recognition-barrier-0039" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-action-recognition-0006" precision="FP16-INT8" source="omz" />
+        <model name="person-vehicle-bike-detection-crossroad-1016" precision="FP16-INT8" source="omz" />
+        <model name="age-gender-recognition-retail-0013" precision="FP16-INT8" source="omz" />
+        <model name="vehicle-detection-adas-0002" precision="FP16-INT8" source="omz" />
+        <model name="image-retrieval-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-retail-0002" precision="FP16-INT8" source="omz" />
+        <model name="person-attributes-recognition-crossroad-0230" precision="FP16-INT8" source="omz" />
+        <model name="action-recognition-0001-decoder" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-action-recognition-teacher-0002" precision="FP16-INT8" source="omz" />
+        <model name="person-vehicle-bike-detection-crossroad-0078" precision="FP16-INT8" source="omz" />
+        <model name="face-detection-adas-0001" precision="FP16-INT8" source="omz" />
+        <model name="unet-camvid-onnx-0001" precision="FP16-INT8" source="omz" />
+        <model name="human-pose-estimation-0001" precision="FP16-INT8" source="omz" />
+        <model name="faster-rcnn-resnet101-coco-sparse-60-0001" precision="FP16-INT8" source="omz" />
+        <model name="action-recognition-0001-encoder" precision="FP16-INT8" source="omz" />
+        <model name="yolo-v2-ava-sparse-35-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-raisinghand-recognition-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-asl-0001" precision="FP16-INT8" source="omz" />
+        <model name="yolo-v2-tiny-ava-0001" precision="FP16-INT8" source="omz" />
+        <model name="license-plate-recognition-barrier-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-retail-0013" precision="FP16-INT8" source="omz" />
+        <model name="single-image-super-resolution-1032" precision="FP16-INT8" source="omz" />
+        <model name="landmarks-regression-retail-0009" precision="FP16-INT8" source="omz" />
+        <model name="driver-action-recognition-adas-0002-decoder" precision="FP16-INT8" source="omz" />
+        <model name="yolo-v2-ava-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-action-recognition-0005" precision="FP16-INT8" source="omz" />
+    </models>
+</attributes>
\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml
new file mode 100644
index 00000000000..3718751b253
--- /dev/null
+++ b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml
@@ -0,0 +1,19 @@
+<attributes>
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+    </threads>
+    <iterations>
+        <value>1000</value>
+    </iterations>
+    <devices>
+        <value>MYRIAD</value>
+    </devices>
+    <models>
+        <model name="alexnet" precision="FP32" source="omz" />
+        <model name="mobilenet-ssd" precision="FP32" source="omz" />
+        <model name="mtcnn-r" precision="FP32" source="omz" />
+    </models>
+</attributes>
\ No newline at end of file
diff --git a/tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml b/tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml
new file mode 100644
index 00000000000..979194a991a
--- /dev/null
+++ b/tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml
@@ -0,0 +1,20 @@
+<attributes>
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+        <value>2</value>
+    </threads>
+    <iterations>
+        <value>100</value>
+    </iterations>
+    <devices>
+        <value>MYRIAD</value>
+    </devices>
+    <models>
+        <model name="alexnet" precision="FP32" source="omz" />
+        <model name="mobilenet-ssd" precision="FP32" source="omz" />
+        <model name="mtcnn-r" precision="FP32" source="omz" />
+    </models>
+</attributes>
diff --git a/thirdparty/cnpy/cnpy.cpp b/thirdparty/cnpy/cnpy.cpp
index a3a3e0ef406..26d0614bca1 100644
--- a/thirdparty/cnpy/cnpy.cpp
+++ b/thirdparty/cnpy/cnpy.cpp
@@ -183,9 +183,9 @@ void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_siz
 }
 
 cnpy::NpyArray load_the_npy_file(FILE* fp) {
-    std::vector<size_t> shape;
-    size_t word_size;
-    bool fortran_order;
+    std::vector<size_t> shape(0);
+    size_t word_size = 0;
+    bool fortran_order = false;
     cnpy::parse_npy_header(fp,word_size,shape,fortran_order);
     if (word_size >= 0 && word_size < ULLONG_MAX) {
         cnpy::NpyArray arr(shape, word_size, fortran_order);
@@ -225,9 +225,9 @@ cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncom
     err = inflate(&d_stream, Z_FINISH);
     err = inflateEnd(&d_stream);
 
-    std::vector<size_t> shape;
-    size_t word_size;
-    bool fortran_order;
+    std::vector<size_t> shape(0);
+    size_t word_size = 0;
+    bool fortran_order = false;
     cnpy::parse_npy_header(&buffer_uncompr[0],word_size,shape,fortran_order);
     if (word_size >= 0 && word_size < ULLONG_MAX) {
         cnpy::NpyArray array(shape, word_size, fortran_order);
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 0a09df95faa..fdc6d9ef11a 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,24 +1,25 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 cmake_minimum_required(VERSION 3.13)
+
 project(python_tools)
 
 if(NOT DEFINED OpenVINO_MAIN_SOURCE_DIR)
     find_package(InferenceEngineDeveloperPackage QUIET)
 endif()
 
-find_package(PythonInterp 3 REQUIRED)
-set(PYTHON_VERSION python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})
-
-set(TARGET_NAME "python_tools")
-
-if(WIN32)
-    set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$<CONFIG>/python_api/${PYTHON_VERSION}/openvino)
-else()
-    set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/${PYTHON_VERSION}/openvino)
-endif()
-
 if(ENABLE_PYTHON)
+    find_package(PythonInterp 3 REQUIRED)
+    set(PYTHON_VERSION python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})
+
+    set(TARGET_NAME "python_tools")
+
+    if(WIN32)
+        set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$<CONFIG>/python_api/${PYTHON_VERSION}/openvino)
+    else()
+        set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/${PYTHON_VERSION}/openvino)
+    endif()
+
     # creates a copy inside bin directory for developers to have ability running python benchmark_app
     add_custom_target(${TARGET_NAME} ALL
         COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/tools
diff --git a/tools/benchmark/main.py b/tools/benchmark/main.py
index 29aff45742e..26ef6246f0c 100644
--- a/tools/benchmark/main.py
+++ b/tools/benchmark/main.py
@@ -152,7 +152,7 @@ def run(args):
                 if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name:
                     logger.warning("Turn on GPU trottling. Multi-device execution with the CPU + GPU performs best with GPU trottling hint, " +
                                    "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)")
-                    config[device]['CLDNN_PLUGIN_THROTTLE'] = '1'
+                    config[device]['GPU_PLUGIN_THROTTLE'] = '1'
             elif device == MYRIAD_DEVICE_NAME:
                 set_throughput_streams()
                 config[device]['LOG_LEVEL'] = 'LOG_INFO'