diff --git a/.gitignore b/.gitignore
index 1f7c2fbdb8f..1c5368e74d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ build/
 .gdb_history
 .vimspector.json
 doc/
+!ngraph/doc
 docs/build_documentation/work_dir/
 inference-engine/plugins/
 inference-engine/temp
@@ -55,4 +56,16 @@ __pycache__
 /model-optimizer/!CMakeLists.txt
 /model-optimizer/*.mapping
 /model-optimizer/*.dat
-/model-optimizer/*.svg
\ No newline at end of file
+/model-optimizer/*.svg
+
+# ngraph
+ngraph/src/CPackConfig.cmake
+ngraph/src/CPackSourceConfig.cmake
+ngraph/src/VERSION
+ngraph/src/gtest/
+ngraph/src/json/
+ngraph/src/ngraphConfig.cmake
+ngraph/src/ngraphConfigVersion.cmake
+ngraph/src/protobuf/
+ngraph/src/src/
+ngraph/src/test/
diff --git a/cmake/developer_package.cmake b/cmake/developer_package.cmake
index 1deb23ff45b..e0027fdcc53 100644
--- a/cmake/developer_package.cmake
+++ b/cmake/developer_package.cmake
@@ -64,12 +64,11 @@ endmacro()
 
 macro(ie_cpack)
     set(CPACK_GENERATOR "TGZ")
+    string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
     if(WIN32)
         set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
-        string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
     else()
         set(CPACK_PACKAGE_NAME inference-engine)
-        string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
     endif()
     set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
     set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt
index 79894a74a6c..2aa19ac73bb 100644
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@@ -159,6 +159,17 @@ if(ENABLE_PYTHON)
             COMPONENT python_samples)
 endif()
 
+# install speech demo files
+
+if(SPEECH_LIBS_AND_DEMOS)
+    ie_cpack_add_component(speech_demo_files REQUIRED)
+
+    install(DIRECTORY ${TEMP}/deployment_tools
+                      ${TEMP}/data_processing
+            DESTINATION .
+            COMPONENT speech_demo_files)
+endif()
+
 #
 # Developer package
 #
diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt
index 519a5c64338..977260b9dff 100644
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@@ -57,7 +57,7 @@ add_subdirectory (src/openvino/inference_engine)
 
 # Check Cython version
 if("${CYTHON_VERSION}" VERSION_LESS "0.29")
-    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
 else()
     message(STATUS "Found Cython version ${CYTHON_VERSION}")
 endif()
diff --git a/inference-engine/ie_bridges/python/cmake/FindCython.cmake b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
index f960fe20998..5ac7c7049f7 100644
--- a/inference-engine/ie_bridges/python/cmake/FindCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
@@ -58,6 +58,6 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
 
 # Find Cython version
 execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
-string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
 
 mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index 2ad199f8b35..aa2a30c0555 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -23,6 +23,7 @@ foreach(PYX_FILE ${OTHER_SOURCES})
     get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
     set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
     cython_add_module(${PYX_NAME} ${PYX_FILE})
+    add_dependencies(${TARGET_NAME} ${PYX_NAME})
     target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
     target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
 endforeach()
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
index 0dd7ef2a33c..4de736279d5 100644
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -3,6 +3,7 @@ import os
 import pytest
 import warnings
 import threading
+from datetime import datetime
 
 from openvino.inference_engine import ie_api as ie
 from conftest import model_path, image_path
@@ -195,11 +196,25 @@ def test_async_infer_wait_finish(device):
 def test_async_infer_wait_time(device):
     ie_core = ie.IECore()
     net = ie_core.read_network(test_net_xml, test_net_bin)
-    exec_net = ie_core.load_network(net, device, num_requests=1)
+    exec_net = ie_core.load_network(net, device, num_requests=2)
     img = read_image()
     request = exec_net.requests[0]
     request.async_infer({'data': img})
-    request.wait(100)
+    start_time = datetime.utcnow()
+    status = request.wait(ie.WaitMode.RESULT_READY)
+    assert status == ie.StatusCode.OK
+    time_delta = datetime.utcnow() - start_time
+    latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000)
+    timeout = max(100, latency_ms)
+    request = exec_net.requests[1]
+    request.async_infer({'data': img})
+    max_repeat = 10
+    status = ie.StatusCode.REQUEST_BUSY
+    i = 0
+    while i < max_repeat and status != ie.StatusCode.OK:
+        status = request.wait(timeout)
+        i += 1
+    assert status == ie.StatusCode.OK
     res = request.output_blobs['fc_out'].buffer
     assert np.argmax(res) == 2
     del exec_net
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
index 9affebbd8bf..7ab2469e45c 100644
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -100,6 +100,9 @@ static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file
 static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
                                     " in case of one input size.";
 
+// @brief message for quantization bits
+static const char gna_qb_message[] = "Optional. Weight bits for quantization:  8 or 16 (default)";
+
 /// @brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
 
@@ -184,6 +187,9 @@ DEFINE_string(dump_config, "", dump_config_message);
 /// @brief Define flag for input shape <br>
 DEFINE_string(shape, "", shape_message);
 
+/// @brief Define flag for quantization bits (default 16)
+DEFINE_int32(qb, 16, gna_qb_message);
+
 /**
 * @brief This function show a help message
 */
@@ -221,4 +227,5 @@ static void showUsage() {
     std::cout << "    -dump_config              " << dump_config_message << std::endl;
     std::cout << "    -load_config              " << load_config_message << std::endl;
 #endif
+    std::cout << "    -qb                       " << gna_qb_message << std::endl;
 }
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 684753b8b55..87da8c5d224 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -13,6 +13,7 @@
 #include <inference_engine.hpp>
 #include <vpu/vpu_plugin_config.hpp>
 #include <cldnn/cldnn_config.hpp>
+#include <gna/gna_config.hpp>
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
@@ -274,6 +275,14 @@ int main(int argc, char *argv[]) {
                 }
             } else if (device == "MYRIAD") {
                 device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+            } else if (device == "GNA") {
+                if (FLAGS_qb == 8)
+                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
+                else
+                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
+
+                if (isFlagSetInCommandLine("nthreads"))
+                    device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
             }
         }
 
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 94079aebdf3..80254dca3c0 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -24,6 +24,7 @@
 #include "details/caseless.hpp"
 #include <details/ie_cnn_network_tools.h>
 #include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
 #include <ngraph/op/fused/gelu.hpp>
 #include <generic_ie.hpp>
 #include <transformations/common_optimizations/common_optimizations.hpp>
@@ -73,7 +74,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
     std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
     if (network.getFunction()) {
         const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
-            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) != nullptr;
+            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
+                   std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
         };
         CNNNetwork net(network.getFunction());
         auto nGraphFunc = net.getFunction();
diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
index ae54fa67796..8a25d74d0ca 100644
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -17,6 +17,8 @@
 #include "blob_factory.hpp"
 #include "precision_ex.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "weights_converter.hpp"
+#include "layer_transform.hpp"
 
 namespace GNAPluginNS {
 namespace frontend {
@@ -137,6 +139,48 @@ class Quant<QuantI8> {
     }
 };
 
+template <typename T>
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
+        fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
+    prec_blob->allocate();
+
+    int i = 0;
+    for (auto& precValue : *prec_blob) {
+        auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
+        if (f32Value > std::numeric_limits<T>::max()) {
+            precValue = std::numeric_limits<T>::max();
+        } else if (f32Value < std::numeric_limits<T>::min()) {
+            precValue = std::numeric_limits<T>::min();
+        } else {
+            precValue = static_cast<T>(f32Value);
+        }
+    }
+
+    return  static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
+}
+
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    InferenceEngine::Blob::Ptr result_ptr = nullptr;
+    switch (precision) {
+    case InferenceEngine::Precision::FP32:
+        result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I32:
+        result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I16:
+        result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I8:
+        result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
+        break;
+    default:
+        THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
+    }
+    return result_ptr;
+}
+
 template<class QuantDesc, class QuantFunc>
 inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
                                   InferenceEngine::WeightableLayer *wl,
@@ -389,6 +433,18 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
         }
         cnnLayer->precision = Desc::mandatory().getInputPrecision();
 
+        if (cnnLayer->type == "Const") {
+            if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
+            }
+            auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
+            auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
+            auto const_blob = cnnLayer->blobs["custom"];
+            if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
+                cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+            }
+        }
+
         return true;
     }
 };
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 9e9ebc10aaf..ba221f68ad3 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -197,6 +197,36 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
             return true;
         }
 
+        if (cnnLayer->type == "Const") {
+            auto blob = cnnLayer->blobs["custom"];
+            if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                blob = make_fp32_blob(blob);
+            }
+            auto max_val = std::numeric_limits<float>::min();
+            auto min_val = std::numeric_limits<float>::max();
+
+            auto flt_buf = blob->buffer().as<float*>();
+            auto size = blob->size();
+
+            for (int i=0; i < size; i++) {
+                auto val = flt_buf[i];
+                if (val > max_val) max_val = val;
+                if (val < min_val) min_val = val;
+            }
+
+            auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
+            auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
+
+            // TODO: Investigate what should be the scale in such cases (31910)
+            if (std::isinf(scale_val)) {
+                quant->_dst_quant.scale = quant->_src_quant.scale;
+            } else {
+                quant->_dst_quant.scale = scale_val;
+            }
+
+            return ScaleFactorUpdateResult();
+        }
+
         if (!CNNNetHasPrevLayer(cnnLayer)) {
             quant->_dst_quant.scale = quant->_src_quant.scale;
             return ScaleFactorUpdateResult();
@@ -231,6 +261,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
 
         auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
         auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
+
         auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
 
         switch (eltwiseLayer->_operation) {
@@ -239,6 +270,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 quantData->_dst_quant.scale     = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
                 break;
             }
+            case InferenceEngine::EltwiseLayer::Sub:
             case InferenceEngine::EltwiseLayer::Sum: {
                 // detect which input will be used as biases
                 if (LayerInfo(in0).has32BOutput()) {
@@ -247,6 +279,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 }
 
                 // this path might result in significant data loss
+                quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                 quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                 quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;
 
diff --git a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
index 549b8ef9e98..040f7bb11f6 100644
--- a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
+++ b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
@@ -7,22 +7,28 @@
 #include "quantized_layer_params.hpp"
 #include "precision_utils.h"
 
+inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
+    auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+         fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
+    fp32_blob->allocate();
+
+    int i = 0;
+    for (auto& f32Value : *fp32_blob) {
+        auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
+        f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
+    }
+
+    return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
+}
+
 inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
     InferenceEngine::BlobMap newBlobs;
     for (auto& blob : lp->blobs) {
         if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
             THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
         }
-        auto tmp =
-                InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                    blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
-        tmp->allocate();
-        int i = 0;
-        for (auto& f32Value : *tmp) {
-            auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
-            f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
-        }
-        newBlobs[blob.first] = tmp;
+        auto fp32_blob = make_fp32_blob(blob.second);
+        newBlobs[blob.first] = fp32_blob;
     }
     lp->_biases = newBlobs["biases"];
     lp->_weights = newBlobs["weights"];
@@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
     for (auto& dataItem : lp->outData) {
         dataItem->setPrecision(InferenceEngine::Precision::FP32);
     }
+    InferenceEngine::BlobMap newBlobs;
+    for (auto& blob_pair : lp->blobs) {
+        auto blob_name = blob_pair.first;
+        auto blob_ptr = blob_pair.second;
+        if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+            auto new_blob = make_fp32_blob(blob_ptr);
+            newBlobs[blob_name] = new_blob;
+        } else {
+            newBlobs[blob_name] = blob_ptr;
+        }
+    }
+
     return true;
 }
 
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 60ff272b6fb..46f41199ee9 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -185,17 +185,16 @@ void  GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
     if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
         THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
     }
-    auto constBlob = constLayer->blobs["custom"];
+    auto const_blob = constLayer->blobs["custom"];
 
-    void* ptr_for_const_blob = &ptr_for_const_blob;
-    connectOutput(constLayer, ptr_for_const_blob, constBlob->size());
-
-    const_connections[constLayer->name] = ptr_for_const_blob;
+    const_connections[constLayer->name] = &const_connections[constLayer->name];
+    void* ptr_for_const_blob = &const_connections[constLayer->name];
 
+    connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
     // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
     // dont see practical use case when bind storage type need to be different that allocation type
-    gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
-        ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
+    gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
+        ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
         });
 }
 
@@ -602,15 +601,35 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
     if (cropLayer == nullptr) {
         return;
     }
-    if (cropLayer->axis.size() > 1) {
+
+    IE_ASSERT(!layer->insData.empty());
+    auto inputs = layer->insData.begin()->lock();
+
+    IE_ASSERT(!cropLayer->axis.empty());
+    IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size());
+    IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
+
+    std::vector<int> axis, dim, offset;
+    for (int n = 0; n < cropLayer->axis.size(); n++) {
+        uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
+        // Exclude crop layer components that do nothing
+        if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
+            continue;
+        }
+        axis.push_back(cropLayer->axis[n]);
+        dim.push_back(cropLayer->dim[n]);
+        offset.push_back(cropLayer->offset[n]);
+    }
+
+    if (axis.size() > 1) {
         THROW_GNA_EXCEPTION <<
-            "Crop layer does not support the number of cropped dimensions = "
-            << cropLayer->axis.size() << ".";
+            "Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: "
+            << axis.size() << ".";
     }
 
     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-    size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
-    size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
+    size_t cropOffset = offset.front() * cropLayer->precision.size();
+    size_t cropOutputSize = dim.front() * cropLayer->precision.size();
 
     if (ALIGN64(cropOffset) == cropOffset) {
         // leave crop as it is
@@ -637,20 +656,18 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
     } else {
         gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
         IE_ASSERT(!layer->outData.empty());
-        IE_ASSERT(!layer->insData.empty());
         auto outputs = *layer->outData.begin();
-        auto inputs = layer->insData.begin()->lock();
 
         // only 1D crops supported
-        if (cropLayer->axis.size() != 1) {
+        if (axis.size() != 1) {
             THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name;
         }
 
         // TODO: add unit tests for 4d crops blobs
-        uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]);
+        uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
         uint32_t num_columns_in = 1;
 
-        uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]);
+        uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
         uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
 
         void* ptr_inputs = nullptr;
@@ -686,7 +703,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
         connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
         connectOutput(layer, ptr_outputs, num_data_bytes_out);
 
-        FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
+        FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
 
         (quantized == nullptr) ?
             gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
@@ -713,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     int biasesLayerIdx = 1;
 
     if (quantized) {
-        if (eltwise._operation == EltwiseLayer::Sum) {
+        switch (eltwise._operation) {
+        case InferenceEngine::EltwiseLayer::Sum:
+        case InferenceEngine::EltwiseLayer::Sub:
+        {
             if (inputs4Bytes->getPrecision().size() != 4) {
                 std::swap(inputs4Bytes, inputs2Bytes);
                 biasesLayerIdx = 0;
             }
             GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
             GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
-        } else {
+            break;
+        }
+        case InferenceEngine::EltwiseLayer::Prod:
+        {
             // for mul both inputs should be 2 bytes precision
             GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
             GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
+            break;
+        }
+        default:
+            THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
         }
     }
 
@@ -767,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
 
     switch (eltwise._operation) {
+    case EltwiseLayer::Sub:
+        if (quantized == nullptr) {
+            gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
+        } else {
+            auto scaledIdentity = -quantized->_weights_quant.scale;
+
+            auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
+
+            gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
+        }
+        connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
+        break;
     case EltwiseLayer::Sum:
         if (quantized == nullptr) {
             gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
index 216a2180c8e..8a7613584c8 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@@ -248,8 +248,6 @@ void Config::AdjustKeyMapValues() {
     key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
             gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
     key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
-    key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
-            gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
     key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
             gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
     key_config_map[CONFIG_KEY(PERF_COUNT)] =
diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
index 8b8f5591614..4cc135e1087 100644
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -153,6 +153,15 @@ class LayerInfo {
         return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
                InferenceEngine::EltwiseLayer::Sum;
     }
+    bool isEltwiseSub() const noexcept {
+        IS_VALID();
+        if (!isEltwise()) return false;
+        // dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer) is validated in isEltwise function
+        // coverity[var_deref_op]
+        return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
+            InferenceEngine::EltwiseLayer::Sub;
+    }
+
     bool isEltwiseMul() const noexcept {
         IS_VALID();
         if (!isEltwise()) return false;
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 0c4b9f12961..b74d67710db 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -150,6 +150,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
         auto prev1 = PrevFunctionalLayer(l, 1);
 
         switch (eltwise->_operation) {
+            case EltwiseLayer::Sub:
             case EltwiseLayer::Sum:
                 if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
                     return prevLayers;
@@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() {
             // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
             // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
 
-            if (eltwise->_operation != EltwiseLayer::Sum)
+            if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
                 continue;
 
             auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {
diff --git a/inference-engine/src/ir_readers/ie_ir_parser.cpp b/inference-engine/src/ir_readers/ie_ir_parser.cpp
index 27222c95d65..7f1047f4f9f 100644
--- a/inference-engine/src/ir_readers/ie_ir_parser.cpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.cpp
@@ -392,7 +392,7 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
         auto blobs = node.child("blobs");
         if (!blobs.empty()) {
             for (pugi::xml_node blob = blobs.first_child(); !blob.empty(); blob = blob.next_sibling()) {
-                size_t size = GetUIntAttr(blob, "size", 0);
+                size_t size = GetUInt64Attr(blob, "size", 0);
                 uint64_t offset = GetUInt64Attr(blob, "offset", 0);
                 Precision precision(Precision::U8);
                 const std::string& preStr = GetStrAttr(blob, "precision", "");
@@ -787,7 +787,7 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::LSTMCell>::cre
     std::vector<float> activations_beta = getParameters<float>(dn, "activations_beta", {});
     float clip = GetFloatAttr(dn, "clip", 0.f);
     return std::make_shared<ngraph::op::LSTMCell>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5],
-                                                  GetUIntAttr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
+                                                  GetUInt64Attr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
                                                   activations, activations_alpha, activations_beta, clip);
 }
 
@@ -1365,8 +1365,8 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::Constant>::cre
     if (dn.empty())
         THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;
 
-    size_t offset = GetUIntAttr(dn, "offset");
-    size_t size = GetUIntAttr(dn, "size");
+    size_t offset = GetUInt64Attr(dn, "offset");
+    size_t size = GetUInt64Attr(dn, "size");
 
     if (!weights || weights->cbuffer() == nullptr)
         THROW_IE_EXCEPTION << "Cannot read network! The model requires weights data! "
diff --git a/inference-engine/src/ir_readers/ie_ir_parser.hpp b/inference-engine/src/ir_readers/ie_ir_parser.hpp
index 4b2d96e1dde..17b331cb3fb 100644
--- a/inference-engine/src/ir_readers/ie_ir_parser.hpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.hpp
@@ -166,12 +166,12 @@ private:
     class XmlDeserializer : public ngraph::AttributeVisitor {
     public:
         explicit XmlDeserializer(const pugi::xml_node& node): node(node) {}
-        void on_attribute(const std::string& name, std::string& value) override {
+        void on_adapter(const std::string& name, ngraph::ValueAccessor<std::string>& value) override {
             std::string val;
             if (!getStrAttribute(node.child("data"), name, val)) return;
-            value = val;
+            value.set(val);
         }
-        void on_attribute(const std::string& name, bool& value) override {
+        void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& value) override {
             std::string val;
             if (!getStrAttribute(node.child("data"), name, val)) return;
             std::transform(val.begin(), val.end(), val.begin(), [](char ch) {
@@ -184,7 +184,7 @@ private:
             bool is_false = false_names.find(val) != false_names.end();
 
             if (!is_true && !is_false) return;
-            value = is_true;
+            value.set(is_true);
         }
         void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override {
             std::string val;
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index d1eef3df1f0..1c6ed0f3227 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -63,12 +63,8 @@ public:
 
     CNNLayerPtr create();
 
-    void on_attribute(const std::string& name, std::string& value) override {
-        params[name] = value;
-    }
-
-    void on_attribute(const std::string& name, bool& value) override {
-        params[name] = value ? "true" : "false";
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<bool> &value) override {
+        params[name] = value.get() ? "true" : "false";
     }
 
     void addSpecificCreator(const std::vector<std::string>& forTypes, const CreatorFor& creator) {
@@ -417,6 +413,15 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
         res->params = params;
         return res;
     });
+
+    addSpecificCreator({"StaticShapeTopK"}, [](const std::shared_ptr<::ngraph::Node>& node,
+        const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), "TopK",
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<TopKLayer>(attrs);
+        res->params = params;
+        return res;
+    });
 }
 
 CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
@@ -530,7 +535,6 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
                 std::make_shared<Builder::NodeConverter<::ngraph::op::Sign>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::Sinh>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::SquaredDifference>>(),
-                std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Select>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Softmax>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Split>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::VariadicSplit>>(),
diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp
index cb5afbff4e2..180c63c0eda 100644
--- a/inference-engine/src/legacy_api/src/graph_transformer.cpp
+++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp
@@ -16,6 +16,7 @@
 #include <string>
 #include <vector>
 #include <mutex>
+#include <algorithm>
 
 #include <cnn_network_ngraph_impl.hpp>
 #include "blob_factory.hpp"
@@ -228,6 +229,12 @@ std::vector<CNNLayerPtr> ConstTransformer::foldConstSubgraphsInternal(const std:
     return remainingConstLayers;
 }
 
+static std::vector<std::string> skipConstInfer = {
+    "FakeQuantize",
+    "Quantize",
+    "CumSum"        // Const inference function for CumSum is not implemented!
+};
+
 const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers) {
     std::map<std::string, bool> mapConstLayers;
     // collect all const layers, which inputs are const layers.
@@ -235,7 +242,7 @@ const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::ve
         // Layers with "Shape" and "Const" type are Const by definition
         if (layer->type == "Shape" || layer->type == "Const") {
             mapConstLayers[layer->name] = false;
-        } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) {
+        } else if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end() && !isForFakeQuantzie(*layer)) {
             bool isAllInputsConst = true;
             for (auto const& data : layer->insData) {
                 auto creator = data.lock()->getCreatorLayer().lock();
@@ -336,7 +343,7 @@ const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>&
     };
 
     for (const auto& layer : sortedLayers) {
-        if (layer->type == "FakeQuantize" || layer->type == "Quantize") {
+        if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) != skipConstInfer.end()) {
             continue;
         }
 
@@ -346,13 +353,13 @@ const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>&
 
             auto implPtr = holder.getConstInferImpl(layer->type);
             if (!implPtr && !isForShape)
-                if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+                if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
                     THROW_IE_EXCEPTION << "Failed to find reference implementation for `" + layer->name +
                                               "` Layer with `" + layer->type + "` Type on constant propagation";
             if (!isForShape) {
                 auto outputBlobs = getOutputBlobs(layer->outData);
                 auto inp = getInputBlobs(layer->insData, isForShape);
-                if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+                if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
                     implPtr->infer(inp, layer->params, layer->blobs, outputBlobs);
                 for (int i = 0; i < layer->outData.size(); i++) {
                     std::string dataName = layer->outData[i]->getName();
diff --git a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
index 290dc13e452..0bf0115d1db 100644
--- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
@@ -1381,24 +1381,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::SquaredDifference>::createLayer(const st
     return res;
 }
 
-template <>
-CNNLayer::Ptr NodeConverter<ngraph::op::v1::Select>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "Select", details::convertPrecision(layer->get_output_element_type(0))};
-
-    auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
-    auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Select>(layer);
-    if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
-
-    auto broadcast = castedLayer->get_auto_broadcast().m_type;
-    if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) {
-        res->params["auto_broadcast"] = "numpy";
-    } else if (broadcast == ngraph::op::AutoBroadcastType::NONE) {
-        res->params["auto_broadcast"] = "none";
-    }
-
-    return res;
-}
-
 template <>
 CNNLayer::Ptr NodeConverter<ngraph::op::DetectionOutput>::createLayer(
     const std::shared_ptr<ngraph::Node>& layer) const {
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index 2bb9b37dfbd..2e7df2f0bdc 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -4,6 +4,10 @@
 
 set(TARGET_NAME "MKLDNNPlugin")
 
+if(ENABLE_LTO)
+    ie_enable_lto()
+endif()
+
 if (WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
 endif()
@@ -41,6 +45,7 @@ set(LAYERS
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_resample_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp
 
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
@@ -93,6 +98,7 @@ set(LAYERS
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp
 )
 
 foreach(LAYER ${LAYERS})
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
index 5095ea31d3a..12c9644b4cb 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
@@ -51,7 +51,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
 
     memory::primitive_desc fetch() const {
         memory::primitive_desc adesc;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
 
         cdesc = mkldnn_primitive_desc_iterator_fetch(get());
 
@@ -72,7 +72,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
     memory::primitive_desc src_primitive_desc(size_t index = 0) const {
         memory::primitive_desc adesc;
         memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
         cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
         const_mkldnn_primitive_desc_t const_cdesc =
                 mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -86,7 +86,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
     memory::primitive_desc dst_primitive_desc(size_t index = 0) const {
         memory::primitive_desc adesc;
         memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
         cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
         const_mkldnn_primitive_desc_t const_cdesc =
                 mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -101,7 +101,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
     memory::primitive_desc diff_src_primitive_desc(size_t index = 0) const {
         memory::primitive_desc adesc;
         memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
         cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
         const_mkldnn_primitive_desc_t const_cdesc =
                 mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -115,7 +115,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
     memory::primitive_desc weights_primitive_desc(size_t index = 0) const {
         memory::primitive_desc adesc;
         memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
         cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
         const_mkldnn_primitive_desc_t const_cdesc =
                 mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -129,7 +129,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
     memory::primitive_desc diff_dst_primitive_desc(size_t index = 0) const {
         memory::primitive_desc adesc;
         memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
         cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
         const_mkldnn_primitive_desc_t const_cdesc =
                 mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -152,7 +152,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
 
     template <typename T>
     void getPrimitiveDescriptor(T& pdesc) const {
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
 
         memory::primitive_desc cdescpd;
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index 43c0b93995f..209bcc44d61 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -151,7 +151,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
     auto inputDesc = getInputDesc();
     auto outputDesc = getOutputDesc();
     if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
-            (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
+            (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 &&
+            (inputDesc.getPrecision() != outputDesc.getPrecision() ||
+             inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc())))
         THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
     if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
         THROW_IE_EXCEPTION << "Cannot get input descriptor!";
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp
deleted file mode 100644
index c71790729ce..00000000000
--- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mkldnn_layers_dispatcher.hpp"
-#include <details/ie_exception.hpp>
-#include "nodes/list.hpp"
-#include <memory>
-
-using namespace InferenceEngine;
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr) {
-    if (!mngr)
-        THROW_IE_EXCEPTION << "Cannot add default extensions! Extension manager is empty.";
-
-    auto defaultExtensions = std::make_shared<Extensions::Cpu::MKLDNNExtensions>();
-    mngr->AddExtension(defaultExtensions);
-}
-
-}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp
deleted file mode 100644
index 4c7e5b6cb67..00000000000
--- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "mkldnn_extension_mngr.h"
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr);
-
-}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
index ad1bc551220..1ea2ecd2c0f 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -23,11 +23,14 @@ MKLDNNMemory::MKLDNNMemory(const engine& eng) : eng(eng) {}
 
 size_t MKLDNNMemory::GetSize() const {
     uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType()));
+    return GetElementsCount() * itemSize;
+}
 
+size_t MKLDNNMemory::GetElementsCount() const {
     auto desc = GetDescriptor();
     std::vector<int> dims(desc.data.layout_desc.blocking.padding_dims,
                           desc.data.layout_desc.blocking.padding_dims + desc.data.ndims);
-    return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>()) * itemSize;
+    return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>());
 }
 
 void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory::format format, const void* data) {
@@ -182,6 +185,7 @@ bool MKLDNNMemory::isConsistant(memory::dims dims, memory::format format) {
         case f::OhIw16o4i:
         case f::OIhw4i16o4i:
         case f::OhIw8o4i:
+        case f::IOhw16o16i:
             ndims = 4; break;
         // DHW
         case f::ncdhw:
@@ -411,6 +415,7 @@ std::string MKLDNNMemory::formatToString(memory::format fmt) {
         case memory::OhIw8o4i: return "OhIw8o4i";
         case memory::OhIw16o4i: return "OhIw16o4i";
         case memory::OIhw4i16o4i: return "OIhw4i16o4i";
+        case memory::IOhw16o16i: return "IOhw16o16i";
 
         case memory::oidhw: return "oidhw";
         case memory::dhwio: return "dhwio";
@@ -718,6 +723,33 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
             blkDims.push_back(16);
             layout = Layout::BLOCKED;
             break;
+        case memory::OIhw8o8i:
+            order = {0, 1, 2, 3, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+            blkDims.push_back(8);
+            blkDims.push_back(8);
+            layout = Layout::BLOCKED;
+            break;
+        case memory::OIhw16o16i:
+            order = {0, 1, 2, 3, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
+        case memory::IOhw16o16i:
+            order = {1, 0, 2, 3, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
         case memory::OIdhw8i8o:
             order = {0, 1, 2, 3, 4, 1, 0};
             blkDims = dims;
@@ -736,8 +768,26 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
             blkDims.push_back(16);
             layout = Layout::BLOCKED;
             break;
+        case memory::OIdhw8o8i:
+            order = {0, 1, 2, 3, 4, 1, 0};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+            blkDims.push_back(8);
+            blkDims.push_back(8);
+            layout = Layout::BLOCKED;
+            break;
+        case memory::OIdhw16o16i:
+            order = {0, 1, 2, 3, 4, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
         case memory::gOIhw4o4i:
-            order = {0, 1, 2, 3, 4, 2, 1};
+            order = {0, 1, 2, 3, 4, 1, 2};
             blkDims = dims;
             blkDims[1] = blkDims[1] / 4 + (blkDims[1] % 4 ? 1 : 0);
             blkDims[2] = blkDims[2] / 4 + (blkDims[2] % 4 ? 1 : 0);
@@ -754,6 +804,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
             blkDims.push_back(8);
             layout = Layout::BLOCKED;
             break;
+        case memory::gOIhw8o8i:
+            order = {0, 1, 2, 3, 4, 1, 2};
+            blkDims = dims;
+            blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+            blkDims[2] = blkDims[2] / 8 + (blkDims[2] % 8 ? 1 : 0);
+            blkDims.push_back(8);
+            blkDims.push_back(8);
+            layout = Layout::BLOCKED;
+            break;
         case memory::gOIhw16i16o:
             order = {0, 1, 2, 3, 4, 2, 1};
             blkDims = dims;
@@ -763,6 +822,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
             blkDims.push_back(16);
             layout = Layout::BLOCKED;
             break;
+        case memory::gOIhw16o16i:
+            order = {0, 1, 2, 3, 4, 1, 2};
+            blkDims = dims;
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims[2] = blkDims[2] / 16 + (blkDims[2] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
         case memory::OhIw8o4i:
             order = {0, 2, 1, 3, 0, 1};
             blkDims = dims;
@@ -1067,6 +1135,16 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
                     } else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
                         mkldnnFormat = memory::format::OIhw16i16o;
                     }
+                } else if (order.size() == 6 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+                    if (blkdDims[4] == 8 && blkdDims[5] == 8) {
+                        mkldnnFormat = memory::format::OIhw8o8i;
+                    } else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+                        mkldnnFormat = memory::format::OIhw16o16i;
+                    }
+                } else if (order.size() == 6 && order[0] == 1 && order[1] == 0 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+                    if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+                        mkldnnFormat = memory::format::IOhw16o16i;
+                    }
                 } else if (order.size() == 5 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0) {
                     if (blkdDims[4] == 8) {
                         mkldnnFormat = memory::format::Ohwi8o;
@@ -1122,6 +1200,13 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
                     } else if (blkdDims[6] == 16) {
                         mkldnnFormat = memory::format::OIdhw16i16o;
                     }
+                } else if (order.size() == 7 &&
+                           order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
+                    if (blkdDims[6] == 8) {
+                        mkldnnFormat = memory::format::OIdhw8o8i;
+                    } else if (blkdDims[6] == 16) {
+                        mkldnnFormat = memory::format::OIdhw16o16i;
+                    }
                 } else if (order.size() == 7 &&
                            order[0] == 0 && order[1] == 2 && order[2] == 3 && order[3] == 1 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
                     if (blkdDims[5] == 8) {
@@ -1136,12 +1221,21 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
                 } else if (order.size() == 7 &&
                            order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 2 && order[6] == 1) {
                     if (blkdDims[6] == 4) {
-                        mkldnnFormat = memory::format::gOIhw4o4i;
+                        mkldnnFormat = memory::format::gOIhw4i4o;
                     } else if (blkdDims[6] == 8) {
                         mkldnnFormat = memory::format::gOIhw8i8o;
                     } else if (blkdDims[6] == 16) {
                         mkldnnFormat = memory::format::gOIhw16i16o;
                     }
+                } else if (order.size() == 7 &&
+                           order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
+                    if (blkdDims[6] == 4) {
+                        mkldnnFormat = memory::format::gOIhw4o4i;
+                    } else if (blkdDims[6] == 8) {
+                        mkldnnFormat = memory::format::gOIhw8o8i;
+                    } else if (blkdDims[6] == 16) {
+                        mkldnnFormat = memory::format::gOIhw16o16i;
+                    }
                 } else if (order.size() == 7 &&
                            order[0] == 0 && order[1] == 1 && order[2] == 3 && order[3] == 2 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
                     if (blkdDims[5] == 8 && blkdDims[6] == 4) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
index 63b46c9e531..4b0d024d223 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
@@ -87,6 +87,7 @@ public:
     }
 
     size_t GetSize() const;
+    size_t GetElementsCount() const;
 
     mkldnn::memory::format GetFormat() const {
         return static_cast<mkldnn::memory::format>(prim->get_primitive_desc().desc().data.format);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index e40475fa40d..e5afa640662 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -1017,12 +1017,17 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
     auto config = selected_pd->getConfig();
     if (!isInitConfig(config)) {
         for (size_t i = 0; i < config.inConfs.size(); i++) {
-            config.inConfs[i].desc = getConfiguredInputDesc(config, i);
+            // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field.
+            // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+            config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i));
         }
 
         for (size_t i = 0; i < config.outConfs.size(); i++) {
-            config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
+            // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field.
+            // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+            config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i));
         }
+
         initDescriptor(config);
     } else if (getType() != RNNSeq && getType() != RNNCell) {
         initDescriptor(config);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 46c3d26fe02..02c5083863c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -370,7 +370,10 @@ public:
             if (srcDescs.empty() || selectedDescs.empty())
                 return false;
             for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
-                if (srcDescs[i] != selectedDescs[i].desc && srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
+                if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() &&
+                      srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() &&
+                      srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) &&
+                      srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
                     return false;
             }
             return true;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index f626c75f0bf..95c7a38a59c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -5,7 +5,6 @@
 #include "ie_metric_helpers.hpp"
 #include "mkldnn_plugin.h"
 #include "mkldnn_extension_mngr.h"
-#include "mkldnn_layers_dispatcher.hpp"
 #include "mkldnn_weights_cache.hpp"
 #include <cpp_interfaces/base/ie_plugin_base.hpp>
 #include <threading/ie_executor_manager.hpp>
@@ -15,6 +14,7 @@
 #include <tuple>
 #include <ie_system_conf.h>
 #include <generic_ie.hpp>
+#include <nodes/list.hpp>
 
 #include "convert_function_to_cnn_network.hpp"
 #include <transformations/common_optimizations/common_optimizations.hpp>
@@ -23,6 +23,7 @@
 #include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
 #include <ngraph/op/fused/gelu.hpp>
 
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
@@ -40,7 +41,7 @@ using namespace InferenceEngine;
 
 Engine::Engine() {
     _pluginName = "CPU";
-    addDefaultExtensions(extensionManager);
+    extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
 }
 
 Engine::~Engine() {
@@ -83,7 +84,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
         const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
             return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
                 std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
-                std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
+                std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
+                std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
         };
         auto nGraphFunc = clonedNetwork->getFunction();
         // Disable shape inference (WA for generic operations)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
index c7134eba5e0..449168f504c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include "argmax_imp.hpp"
@@ -49,7 +48,7 @@ private:
     argmax_conf conf;
 };
 
-REG_FACTORY_FOR(ImplFactory<ArgMaxImpl>, ArgMax);
+REG_FACTORY_FOR(ArgMaxImpl, ArgMax);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
index 4fabb6010d1..b90851387d6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
@@ -4,6 +4,7 @@
 
 #include "argmax_imp.hpp"
 
+#include <cstring>
 #include <algorithm>
 #include <string>
 #include <vector>
@@ -181,7 +182,7 @@ void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape
             vmask_type vmask;
             int s_index = i0 * dim * after_num + ib1 * block_size;
 
-            memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
+            std::memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
 
             auto vswap_func = [&](int index1, int index2) {
                 vtmp = vmax_values[index1];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
index 2f9014ad081..db0268846f8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
@@ -6,7 +6,7 @@
 
 #include <ie_iextension.h>
 #include "ie_util_internal.hpp"
-#include "list.hpp"
+#include "nodes/list.hpp"
 
 #include <string>
 #include <vector>
@@ -176,6 +176,21 @@ protected:
 
 IE_SUPPRESS_DEPRECATED_END
 
+template <typename __prim>
+inline void extRegister(MKLDNNExtensions * extInstance, const char * __type) {
+    IE_SUPPRESS_DEPRECATED_START
+    extInstance->AddExt(__type,
+                [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+                    return new __prim(layer);
+                });
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+#define REG_FACTORY_FOR(__prim, __type) \
+    void __prim ## __type(MKLDNNExtensions * extInstance) { \
+        extRegister<ImplFactory<__prim>>(extInstance, #__type); \
+    }
+
 }  // namespace Cpu
 }  // namespace Extensions
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
index 2d0d3fc2948..60e15726fc9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -219,7 +218,7 @@ private:
     std::vector<size_t> _crops_end;
 };
 
-REG_FACTORY_FOR(ImplFactory<BatchToSpaceImpl>, BatchToSpace);
+REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
index cdaedb24d64..2e784c4b85c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -129,7 +128,7 @@ private:
     const size_t BROADCAST_SHAPE = 1;
 };
 
-REG_FACTORY_FOR(ImplFactory<BroadcastImpl>, Broadcast);
+REG_FACTORY_FOR(BroadcastImpl, Broadcast);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
index 278cd53b68b..bae370b59f0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -123,7 +122,7 @@ private:
     bool with_bins = false;
 };
 
-REG_FACTORY_FOR(ImplFactory<BucketizeImpl>, Bucketize);
+REG_FACTORY_FOR(BucketizeImpl, Bucketize);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
index 7f460dd9faa..bd55bb86294 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
@@ -14,8 +14,16 @@ namespace Cpu {
 
 #if defined(HAVE_AVX512F)
 namespace AVX512F {
+    static inline __m512 _mm_uni_any_ps() {
+        return __m512{};
+    }
+
+    static inline __m512i _mm_uni_any_epi32() {
+        return __m512i{};
+    }
+
     static inline __m512 _mm_uni_loadu_ps(const float* psrc) {
-        return _mm512_loadu_ps(psrc);
+        return _mm512_mask_loadu_ps(_mm_uni_any_ps(), (__mmask16)-1, psrc);
     }
 
     static inline void _mm_uni_storeu_ps(float* pdst, const __m512& vec) {
@@ -62,8 +70,12 @@ namespace AVX512F {
         return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1)));
     }
 
+    static inline __m512i _mm_uni_set1_epi32(int value) {
+        return _mm512_mask_set1_epi32(_mm_uni_any_epi32(), (__mmask16)-1, value);
+    }
+
     static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) {
-        return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1);
+        return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm_uni_set1_epi32(0)), vec0, vec1);
     }
 
     static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) {
@@ -90,10 +102,6 @@ namespace AVX512F {
         return _mm512_add_epi32(vec0, vec1);
     }
 
-    static inline __m512i _mm_uni_set1_epi32(int value) {
-        return _mm512_set1_epi32(value);
-    }
-
     static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) {
         return _mm512_sll_epi32(vec, _mm_set1_epi64x(value));
     }
@@ -119,7 +127,7 @@ namespace AVX512F {
     }
 
     static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) {
-        return _mm512_cvtepi32_ps(vec);
+        return _mm512_mask_cvtepi32_ps(_mm_uni_any_ps(), (__mmask16)-1, vec);
     }
 }  // namespace AVX512F
 #elif defined(HAVE_AVX2)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
index f171a142828..5e4bd96fb46 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -129,7 +128,7 @@ private:
     std::string precision;
 };
 
-REG_FACTORY_FOR(ImplFactory<ConvertImpl>, Convert);
+REG_FACTORY_FOR(ConvertImpl, Convert);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
index 372b237c7c4..717af9f9e3f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -86,7 +85,7 @@ public:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<CTCGreedyDecoderImpl>, CTCGreedyDecoder);
+REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
new file mode 100644
index 00000000000..03a4f2dbc6f
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
@@ -0,0 +1,230 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "list.hpp"
+#include "base.hpp"
+
+#include <string>
+#include <vector>
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class CumSumImpl: public ExtLayerBase {
+    enum { CUM_SUM_DATA, AXIS, numOfInputs };
+    enum { N, C, D, H, W, numOfDims };
+    bool exclusive;
+    bool reverse;
+    size_t axis = 0;
+    std::vector<size_t> shape5d;
+
+public:
+    explicit CumSumImpl(const CNNLayer* layer) {
+        try {
+            layerName = layer->name;
+            if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";
+
+            const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
+            const auto &dataShape = dataTensor.getDims();
+            if (dataShape.size() < 1 || dataShape.size() > 5) {
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
+            }
+
+            exclusive = layer->GetParamAsBool("exclusive", false);
+            reverse = layer->GetParamAsBool("reverse", false);
+
+            const auto& dataPrecision = dataTensor.getPrecision();
+            if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
+                dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();
+
+            if (layer->insData.size() == numOfInputs) {
+                const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
+                const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+                if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
+                    THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();
+
+                const auto axisTensorRank = axisTensor.getDims().size();
+                if (axisTensorRank != 0)
+                    THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
+            }
+
+            if (dataShape != layer->outData[0]->getTensorDesc().getDims())
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";
+
+            shape5d = get5dShape(dataShape);
+
+            LayerConfig config;
+            for (size_t i = 0; i < layer->insData.size(); i++) {
+                DataConfig inConfig;
+                inConfig.inPlace = -1;
+                inConfig.constant = false;
+
+                Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision();
+                if (inPrecision == Precision::BF16)
+                    inPrecision = Precision::FP32;
+                const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
+                inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
+
+                config.inConfs.push_back(inConfig);
+            }
+            DataConfig outConfig;
+            outConfig.inPlace = -1;
+            outConfig.constant = false;
+            Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
+            if (outPrecision == Precision::BF16)
+                outPrecision = Precision::FP32;
+            const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
+            outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
+
+            config.outConfs.push_back(outConfig);
+
+            config.dynBatchSupport = false;
+            confs.push_back(config);
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        if (inputs.size() == numOfInputs)
+            axis = getAxis(inputs[AXIS], inputs[CUM_SUM_DATA]);
+
+        const auto &dataPrecision = inputs[CUM_SUM_DATA]->getTensorDesc().getPrecision();
+        switch (dataPrecision) {
+            case Precision::I8   : { execImpl<int8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::U8   : { execImpl<uint8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::I16  : { execImpl<int16_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::I32  : { execImpl<int32_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::FP32 : { execImpl<float>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::I64  : { execImpl<int64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::U64  : { execImpl<uint64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            default : {
+                if (resp) {
+                    std::string errorMsg = "CumSum layer with name '" + layerName + "' has unsupported 'data' input precision: " + dataPrecision.name();
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return GENERAL_ERROR;
+            }
+        }
+        return OK;
+    }
+
+private:
+    template <typename dataType>
+    void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) {
+        const auto *input = _input->cbuffer().as<const dataType *>() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        auto *output = _output->buffer().as<dataType *>() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis];
+
+        if (reverse) {
+            if (exclusive) {
+                cumSum<true, true, dataType>(input, output, offset);
+            } else {
+                cumSum<true, false, dataType>(input, output, offset);
+            }
+        } else {
+            if (exclusive) {
+                cumSum<false, true, dataType>(input, output, offset);
+            } else {
+                cumSum<false, false, dataType>(input, output, offset);
+            }
+        }
+    }
+
+    template <bool reverse, bool exclusive, typename dataType>
+    void cumSum(const dataType *input, dataType *output, const size_t &offset) {
+        std::vector<size_t> iterationRange(numOfDims - 1);
+        size_t j = 0;
+        for (size_t i = 0; i < shape5d.size(); i++) {
+            if (i == axis)
+                continue;
+            iterationRange[j++] = shape5d[i];
+        }
+        parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) {
+            std::vector<size_t> forStartOffset;
+            forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3);
+            forStartOffset.insert(forStartOffset.begin() + axis, 0);
+            size_t startOffset = getStartOffset(forStartOffset);
+
+            const dataType *inputStart = input + startOffset;
+            dataType *outputStart = output + startOffset;
+
+            if (reverse) {
+                if (exclusive) {
+                    outputStart[offset*(shape5d[axis] - 1)] = 0;
+                    for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+                        outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset];
+                    }
+                } else {
+                    outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)];
+                    for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+                        outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset];
+                    }
+                }
+            } else {
+                if (exclusive) {
+                    outputStart[0] = 0;
+                    for (size_t i = 1; i < shape5d[axis]; i++) {
+                        outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset];
+                    }
+                } else {
+                    outputStart[0] = inputStart[0];
+                    for (size_t i = 1; i < shape5d[axis]; i++) {
+                        outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset];
+                    }
+                }
+            }
+        });
+    }
+
+    size_t getStartOffset(std::vector<size_t> &forStartOffset) {
+        return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] +
+               forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W];
+    }
+
+    size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) {
+        const auto& axisPrecision = _axis->getTensorDesc().getPrecision();
+        const int64_t dataShapeSize = static_cast<int64_t>(_data->getTensorDesc().getDims().size());
+        int64_t axisValueFromBlob;
+        switch (axisPrecision) {
+            case Precision::I32 : {
+                const auto *axisPtr = _axis->cbuffer().as<const int32_t *>();
+                axisValueFromBlob = static_cast<int64_t>(axisPtr[0]);
+                break;
+            }
+            case Precision::I64 : {
+                const auto *axisPtr = _axis->cbuffer().as<const int64_t *>();
+                axisValueFromBlob = axisPtr[0];
+                break;
+            }
+            default : {
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "'  doesn't support 'axis' input with precision: " << axisPrecision.name();
+            }
+        }
+        if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1)
+            THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "'  has axis with a value out of range: " << axisValueFromBlob;
+        return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize);
+    }
+
+    std::vector<size_t> get5dShape(const SizeVector& dims) {
+        std::vector<size_t> shape5d(numOfDims, 1);
+        for (size_t i = 0; i < dims.size(); i++)
+            shape5d[i] = dims[i];
+        return shape5d;
+    }
+
+private:
+    std::string layerName;
+};
+
+REG_FACTORY_FOR(CumSumImpl, CumSum);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
index f3b208b0994..69d9024029f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -118,7 +117,7 @@ private:
     size_t ownStrides[CNTR_SIZE];
 };
 
-REG_FACTORY_FOR(ImplFactory<DepthToSpaceImpl>, DepthToSpace);
+REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
index f80b48e29af..e5a7c09956a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cfloat>
@@ -604,7 +603,7 @@ void DetectionOutputImpl::nms_mx(const float* conf_data,
     }
 }
 
-REG_FACTORY_FOR(ImplFactory<DetectionOutputImpl>, DetectionOutput);
+REG_FACTORY_FOR(DetectionOutputImpl, DetectionOutput);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
index ca8c8876343..c1f75770669 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cassert>
@@ -392,7 +391,7 @@ private:
 
 
 
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
+REG_FACTORY_FOR(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
index c8d347244ed..e08897184a1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -121,7 +120,7 @@ private:
     const size_t FILL_VALUE = 1;
 };
 
-REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
+REG_FACTORY_FOR(FillImpl, Fill);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
index e624bf03651..cd7e0378f07 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -148,7 +147,7 @@ private:
 };
 
 
-REG_FACTORY_FOR(ImplFactory<GatherImpl>, Gather);
+REG_FACTORY_FOR(GatherImpl, Gather);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
index 5bb17c8a0bc..5e420b22ddd 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -150,7 +149,7 @@ private:
     InferenceEngine::Precision precision;
 };
 
-REG_FACTORY_FOR(ImplFactory<GatherTreeImpl>, GatherTree);
+REG_FACTORY_FOR(GatherTreeImpl, GatherTree);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
index 46e647d7206..b5e4e214965 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -58,7 +57,7 @@ private:
     float bias = 1.0f;
 };
 
-REG_FACTORY_FOR(ImplFactory<GRNImpl>, GRN);
+REG_FACTORY_FOR(GRNImpl, GRN);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
index 02138570a1b..873575b8be4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <string>
 #include <vector>
@@ -434,7 +433,7 @@ private:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<InterpImpl>, Interp);
+REG_FACTORY_FOR(InterpImpl, Interp);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.cpp b/inference-engine/src/mkldnn_plugin/nodes/list.cpp
new file mode 100644
index 00000000000..e017bae6c38
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.cpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "nodes/list.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+#define FACTORY_DECLARATION(__prim, __type) \
+    void __prim ## __type(MKLDNNExtensions * extInstance)
+
+#define FACTORY_CALL(__prim, __type) \
+    __prim ## __type(this)
+
+#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_DECLARATION(__prim, __type)
+# include "list_tbl.hpp"
+#undef MKLDNN_EXTENSION_NODE
+
+MKLDNNExtensions::MKLDNNExtensions() {
+    #define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_CALL(__prim, __type)
+    # include "list_tbl.hpp"
+    #undef MKLDNN_EXTENSION_NODE
+}
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.hpp b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
index 63b66b4ebd7..92ae6d80c69 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
@@ -12,17 +12,6 @@
 #include <memory>
 #include <algorithm>
 
-// WA for xbyak.h
-#ifdef _WIN32
-# ifndef _WINSOCKAPI_
-#  define _WINSOCKAPI_
-# endif
-# ifndef _WINSOCK2API_
-#  define _WINSOCK2API_
-# endif
-#endif
-#include <cpu_isa_traits.hpp>
-
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
@@ -37,14 +26,16 @@ struct ExtensionsHolder {
 
 class MKLDNNExtensions : public IExtension {
 public:
+    MKLDNNExtensions();
+
     StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override {
-        collectTypes(types, size, MKLDNNExtensions::GetExtensionsHolder()->list);
+        collectTypes(types, size, extensionsHolder->list);
         return OK;
     }
 
     StatusCode
     getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept override {
-        auto& factories = MKLDNNExtensions::GetExtensionsHolder()->list;
+        auto& factories = extensionsHolder->list;
         if (factories.find(cnnLayer->type) == factories.end()) {
             std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
             errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
@@ -78,21 +69,13 @@ public:
         delete this;
     }
 
-    static void AddExt(std::string name, ext_factory factory) {
-        auto extensionsHolder = GetExtensionsHolder();
-        if (extensionsHolder != nullptr)
-            extensionsHolder->list[name] = factory;
-    }
-
-    static std::shared_ptr<ExtensionsHolder> GetExtensionsHolder() {
-        static std::shared_ptr<ExtensionsHolder> localHolder;
-        if (localHolder == nullptr) {
-            localHolder = std::make_shared<ExtensionsHolder>();
-        }
-        return localHolder;
+    void AddExt(std::string name, ext_factory factory) {
+        extensionsHolder->list[name] = factory;
     }
 
 private:
+    std::shared_ptr<ExtensionsHolder> extensionsHolder = std::make_shared<ExtensionsHolder>();
+
     template<class T>
     void collectTypes(char**& types, unsigned int& size, const std::map<std::string, T> &factories) {
         types = new char *[factories.size()];
@@ -108,22 +91,6 @@ private:
 
 IE_SUPPRESS_DEPRECATED_END
 
-template<typename Ext>
-class ExtRegisterBase {
-public:
-    explicit ExtRegisterBase(const std::string& type) {
-        IE_SUPPRESS_DEPRECATED_START
-        MKLDNNExtensions::AddExt(type,
-                              [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
-                                  return new Ext(layer);
-                              });
-        IE_SUPPRESS_DEPRECATED_END
-    }
-};
-
-#define REG_FACTORY_FOR(__prim, __type) \
-static ExtRegisterBase<__prim> __reg__##__type(#__type)
-
 }  // namespace Cpu
 }  // namespace Extensions
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
new file mode 100644
index 00000000000..e0ebf3ff8cd
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@@ -0,0 +1,93 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef MKLDNN_EXTENSION_NODE
+# warning "MKLDNN_EXTENSION_NODE is not defined"
+# define MKLDNN_EXTENSION_NODE(__prim, __type)
+#endif
+
+MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
+MKLDNN_EXTENSION_NODE(MathImpl, Abs);
+MKLDNN_EXTENSION_NODE(MathImpl, Acos);
+MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Asin);
+MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Atan);
+MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
+MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
+MKLDNN_EXTENSION_NODE(MathImpl, Cos);
+MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Erf);
+MKLDNN_EXTENSION_NODE(MathImpl, Floor);
+MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid);
+MKLDNN_EXTENSION_NODE(MathImpl, Log);
+MKLDNN_EXTENSION_NODE(MathImpl, Neg);
+MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal);
+MKLDNN_EXTENSION_NODE(MathImpl, Selu);
+MKLDNN_EXTENSION_NODE(MathImpl, Sign);
+MKLDNN_EXTENSION_NODE(MathImpl, Sin);
+MKLDNN_EXTENSION_NODE(MathImpl, Sinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Softplus);
+MKLDNN_EXTENSION_NODE(MathImpl, Softsign);
+MKLDNN_EXTENSION_NODE(MathImpl, Tan);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
+MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
+MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
+MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
+MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
+MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
+MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
+MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
+MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
+MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
+MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
+MKLDNN_EXTENSION_NODE(FillImpl, Fill);
+MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
+MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
+MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace);
+MKLDNN_EXTENSION_NODE(ScatterImpl, ScatterUpdate);
+MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
+MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
+MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
+MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
+MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
+MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
+MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
+MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth);
+MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
+MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
+MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
+MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
+MKLDNN_EXTENSION_NODE(PadImpl, Pad);
+MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
+MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
+MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
+MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
+MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
+MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
+MKLDNN_EXTENSION_NODE(RangeImpl, Range);
+MKLDNN_EXTENSION_NODE(SelectImpl, Select);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceAnd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL1);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL2);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSumExp);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMax);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMean);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMin);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceOr);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceProd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSumSquare);
+MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
+MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
+MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
+MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
index cea0169c56f..b79109d946b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -105,7 +104,7 @@ private:
     bool is_last_dim = false;
 };
 
-REG_FACTORY_FOR(ImplFactory<LogSoftmaxImpl>, LogSoftmax);
+REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/math.cpp b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
index 92f2059618e..2920badc7f2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -272,29 +271,29 @@ private:
     float gamma = 0.0f;
 };
 
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Abs);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Acos);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Acosh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Asin);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Asinh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Atan);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Atanh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Ceil);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Cos);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Cosh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Erf);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Floor);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, HardSigmoid);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Log);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Neg);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Reciprocal);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Selu);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sign);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sin);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sinh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Softplus);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Softsign);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Tan);
+REG_FACTORY_FOR(MathImpl, Abs);
+REG_FACTORY_FOR(MathImpl, Acos);
+REG_FACTORY_FOR(MathImpl, Acosh);
+REG_FACTORY_FOR(MathImpl, Asin);
+REG_FACTORY_FOR(MathImpl, Asinh);
+REG_FACTORY_FOR(MathImpl, Atan);
+REG_FACTORY_FOR(MathImpl, Atanh);
+REG_FACTORY_FOR(MathImpl, Ceil);
+REG_FACTORY_FOR(MathImpl, Cos);
+REG_FACTORY_FOR(MathImpl, Cosh);
+REG_FACTORY_FOR(MathImpl, Erf);
+REG_FACTORY_FOR(MathImpl, Floor);
+REG_FACTORY_FOR(MathImpl, HardSigmoid);
+REG_FACTORY_FOR(MathImpl, Log);
+REG_FACTORY_FOR(MathImpl, Neg);
+REG_FACTORY_FOR(MathImpl, Reciprocal);
+REG_FACTORY_FOR(MathImpl, Selu);
+REG_FACTORY_FOR(MathImpl, Sign);
+REG_FACTORY_FOR(MathImpl, Sin);
+REG_FACTORY_FOR(MathImpl, Sinh);
+REG_FACTORY_FOR(MathImpl, Softplus);
+REG_FACTORY_FOR(MathImpl, Softsign);
+REG_FACTORY_FOR(MathImpl, Tan);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
index 66303d74405..28b25e25751 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
@@ -108,7 +108,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
 
 MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::primitive_desc &primitive_desc) const {
     memory::primitive_desc aprimitive_desc;
-    mkldnn_primitive_desc_t bndesc;
+    mkldnn_primitive_desc_t bndesc = nullptr;
     mkldnn_batch_normalization_desc_t *p;
     error::wrap_c_api(mkldnn_primitive_desc_query(
             primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
@@ -128,7 +128,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::pri
 
 MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primitive_desc &primitive_desc) const {
     memory::primitive_desc aprimitive_desc;
-    mkldnn_primitive_desc_t bndesc;
+    mkldnn_primitive_desc_t bndesc = nullptr;
     mkldnn_batch_normalization_desc_t *p;
     error::wrap_c_api(mkldnn_primitive_desc_query(
             primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
@@ -148,7 +148,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primiti
 
 MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const memory::primitive_desc &primitive_desc) const {
     memory::primitive_desc adesc;
-    mkldnn_primitive_desc_t bndesc;
+    mkldnn_primitive_desc_t bndesc = nullptr;
     const_mkldnn_primitive_desc_t const_bndesc =
             mkldnn_primitive_desc_query_pd(primitive_desc.get(),
                                            mkldnn::convert_to_c(weights_pd), 0);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
index 9213a746438..e5f3b84f71e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@@ -17,6 +17,16 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <ie_layers_internal.hpp>
+
+// WA for xbyak.h
+#ifdef _WIN32
+# ifndef _WINSOCKAPI_
+#  define _WINSOCKAPI_
+# endif
+# ifndef _WINSOCK2API_
+#  define _WINSOCK2API_
+# endif
+#endif
 #include "cpu_isa_traits.hpp"
 
 using namespace mkldnn;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index c9e93ccc86e..e30e8842134 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -279,8 +279,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                 getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc);
         createDescriptor({in_candidate}, {out_candidate});
     } else {
-        inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
-        outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
+        inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
+        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
+        outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
+        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
         eltwisePrecision = Precision::FP32;
         for (int i = 0; i < fusedWith.size(); i++) {
             auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index 8591bebfbfb..a7c3fdc3046 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -37,7 +37,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
         precision = InferenceEngine::Precision::FP32;
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    if (getParentEdges().size() != 1)
+    if (getParentEdges().empty() || getParentEdges().size() > 3)
         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
     if (getChildEdges().empty())
         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
@@ -45,7 +45,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get());
     if (deconvLayer == nullptr)
         THROW_IE_EXCEPTION << "Cannot convert deconvolution layer.";
-    if (deconvLayer->_weights == nullptr) {
+    if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) {
         THROW_IE_EXCEPTION << "Weights are empty for layer: " << deconvLayer->name
                            << " used in MKLDNN node: " << getName() << "\n"
                            << "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
@@ -54,11 +54,22 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     withGroups = (deconvLayer->_group > 1);
     isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth &&
             deconvLayer->_group == deconvLayer->input()->getDims()[1];
-    withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0);
+
+    bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3;
     if (withBiases) {
-        biases = deconvLayer->_biases;
+        Blob::Ptr biases;
+
+        if (getParentEdges().size() == 3) {
+            auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer();
+            if (biasLayer->type != "Const")
+                THROW_IE_EXCEPTION << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases";
+            biases = biasLayer->blobs["custom"];
+        } else {
+            biases = deconvLayer->_biases;
+        }
+
         //  WA: we add bias as depthwise post op
-        setBiasAsPostOp();
+        setBiasAsPostOp(biases);
     }
 
     /* Original layout format for deconv weights is iohw (from Caffe).
@@ -83,7 +94,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
         weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]);
     }
 
-    internalBlobs.push_back(createInternalBlob(weightDims, true));
+    if (getParentEdges().size() == 1)
+        internalBlobs.push_back(createInternalBlob(weightDims, true));
 
     invertVectorCopyUtoI(deconvLayer->_stride, stride);
     for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
@@ -113,7 +125,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     }
 }
 
-void MKLDNNDeconvolutionNode::setBiasAsPostOp() {
+void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) {
     mkldnn::post_ops ops;
     MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biases->size(), 16))});
 
@@ -157,7 +169,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
 
     prim.reset(new convolution_backward_data(prim_desc,
             getParentEdgeAt(0)->getMemory().GetPrimitive(),
-            internalBlobMemory[0]->GetPrimitive(),
+            getWeights(),
             getChildEdgeAt(0)->getMemory().GetPrimitive()));
 }
 
@@ -197,15 +209,32 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
 }
 
 MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
+    InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(idx - 1).desc())
+                                               : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
+
+    if (desc.getLayout() == InferenceEngine::Layout::ANY) {
         return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
                                                             getParentEdgeAt(idx)->getDims().ToSizeVector(),
                                                             desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
+    } else {
+        if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
+                                                                                       desc.getBlockingDesc().getOrder().end()) + 1) {
+            auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
+            auto new_dims = weightsDims.ToSizeVector();
+
+            auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
+                                                  new_dims,
+                                                  desc.getBlockingDesc());
+            if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
+                td.setLayout(BLOCKED);
+            }
+            return MKLDNNMemoryDesc(td);
+        } else {
+            return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
+                                                                getParentEdgeAt(idx)->getDims().ToSizeVector(),
+                                                                desc.getBlockingDesc()));
+        }
+    }
 }
 
 MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
@@ -219,4 +248,9 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_i
                                                             getChildEdgeAt(idx)->getDims().ToSizeVector(),
                                                             desc.getBlockingDesc()));
 }
+
+const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const {
+    return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
index e4a5ab26e8a..c2493f45d5b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@@ -27,11 +27,14 @@ public:
         return false;
     }
 
+    size_t descInputNumbers(MKLDNNDescriptor desc) override {
+        return static_cast<size_t>(getParentEdges().size());
+    }
+
     MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
     MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
 
 private:
-    bool withBiases = false;
     bool withGroups = false;
     bool isDW = false;
     size_t groupNum = 1;
@@ -40,13 +43,14 @@ private:
     std::vector<ptrdiff_t> dilation;
     std::vector<ptrdiff_t> paddingR;
     MKLDNNDims weightsDims;
-    InferenceEngine::Blob::Ptr biases;
     std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
     std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
 
     mkldnn::primitive_attr attr;
     std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
-    void setBiasAsPostOp();
+    void setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases);
+
+    const mkldnn::memory& getWeights() const;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
index 31065c2d498..a004acdd63c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@@ -715,7 +715,9 @@ void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() {
     setPostOps(attr, true);
 
     Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+    inputPrecision = inputPrecision == Precision::BF16 ? Precision(Precision::FP32) : inputPrecision;
     Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+    outputPrecision = outputPrecision == Precision::BF16 ? Precision(Precision::FP32) : outputPrecision;
 
     if (!fusedWith.empty()) {
         auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
index ac81e16413c..c7b7ef25617 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@@ -119,22 +119,38 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe
         // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats.
         // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw)
         // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout.
-        if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
+        if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
+            MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
             try {
                 mkldnn::memory::dims newDims = dst_blocked->GetDims();
-                mkldnn::memory::format newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
-                                                   src_blocked->GetDims().size() == 5 ? memory::goidhw :
-                                                   src_blocked->GetFormat();
+                mkldnn::memory::format newFormat;
+                newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
+                            src_blocked->GetDims().size() == 5 ? memory::goidhw :
+                            src_blocked->GetFormat();
 
                 auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat);
                 src_blocked->Create(newDesc, srcPtr, false);
 
                 createReorder();
-            } catch (const std::exception&) {
+            } catch (...) {
                 THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
             }
+        // MKLDNN doesn't support direct reorders between planar data formats in case they have different rank but the same number of elements.
+        // Code block below detects these cases and substitute src dims with dst ones.
+        } else if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
+                   MKLDNNMemory::GetPlainFormat(dst_blocked->GetDims()) == dst_blocked->GetFormat() &&
+                   src_blocked->GetElementsCount() == dst_blocked->GetElementsCount()) {
+            try {
+                auto newDesc = mkldnn::memory::desc(dst_blocked->GetDims(), src_blocked->GetDataType(), dst_blocked->GetFormat());
+                src_blocked->Create(newDesc, srcPtr, false);
+
+                createReorder();
+            } catch (...) {
+                THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
+            }
+        } else {
+            THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
         }
-        // TODO: should't we throw exception in this case?
     }
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
index 028b354e976..407eb51228f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -242,7 +241,7 @@ private:
     bool sort_result_descending = true;
 };
 
-REG_FACTORY_FOR(ImplFactory<NonMaxSuppressionImpl>, NonMaxSuppression);
+REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp b/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
index 734aeb9bdf3..56d43ea7872 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include "ie_parallel.hpp"
 
@@ -103,7 +102,7 @@ private:
     Precision input_precision;
 };
 
-REG_FACTORY_FOR(ImplFactory<OneHotImpl>, OneHot);
+REG_FACTORY_FOR(OneHotImpl, OneHot);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/pad.cpp b/inference-engine/src/mkldnn_plugin/nodes/pad.cpp
index 89fa86d5085..bd02cee4ce6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/pad.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/pad.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -251,7 +250,7 @@ void PadImpl::pad_symmetric(const float *src_data, float* dst_data) {
     });
 }
 
-REG_FACTORY_FOR(ImplFactory<PadImpl>, Pad);
+REG_FACTORY_FOR(PadImpl, Pad);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp b/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
index 2068c141070..c0a0cbf9ab0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -57,7 +56,7 @@ private:
     std::vector<int> shift_;
 };
 
-REG_FACTORY_FOR(ImplFactory<PowerFileImpl>, PowerFile);
+REG_FACTORY_FOR(PowerFileImpl, PowerFile);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
index 737bc5fdb15..d372c760746 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <vector>
@@ -338,7 +337,7 @@ private:
     int _num_priors = 0;
 };
 
-REG_FACTORY_FOR(ImplFactory<PriorBoxImpl>, PriorBox);
+REG_FACTORY_FOR(PriorBoxImpl, PriorBox);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
index 50cc57a2fdd..954f7d6fed6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <algorithm>
 #include <vector>
@@ -117,7 +116,7 @@ private:
     float offset_;
 };
 
-REG_FACTORY_FOR(ImplFactory<PriorBoxClusteredImpl>, PriorBoxClustered);
+REG_FACTORY_FOR(PriorBoxClusteredImpl, PriorBoxClustered);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
index e1df4242805..c783797ea26 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <algorithm>
 #include <cassert>
@@ -90,7 +89,7 @@ private:
 };
 
 
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronPriorGridGeneratorImpl>, ExperimentalDetectronPriorGridGenerator);
+REG_FACTORY_FOR(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
index 62dafbaa49a..4c794fe73ad 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include "proposal_imp.hpp"
@@ -179,7 +178,7 @@ private:
     bool store_prob;  // store blob with proposal probabilities
 };
 
-REG_FACTORY_FOR(ImplFactory<ProposalImpl>, Proposal);
+REG_FACTORY_FOR(ProposalImpl, Proposal);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp
index 562755e2cf0..61d7db2c353 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp
@@ -4,6 +4,7 @@
 
 #include "proposal_imp.hpp"
 
+#include <cstring>
 #include <cmath>
 #include <string>
 #include <vector>
@@ -137,7 +138,7 @@ static void nms_cpu(const int num_boxes, int is_dead[],
     const float* x1 = boxes + 2 * num_proposals;
     const float* y1 = boxes + 3 * num_proposals;
 
-    memset(is_dead, 0, num_boxes * sizeof(int));
+    std::memset(is_dead, 0, num_boxes * sizeof(int));
 
 #if defined(HAVE_AVX2)
     __m256  vc_fone = _mm256_set1_ps(coordinates_offset);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
index 526c7775c36..e6370b16a5a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
@@ -2,9 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
+#include <cstring>
 #include <cassert>
 #include <cmath>
 #include <string>
@@ -130,7 +130,7 @@ void nms_cpu(const int num_boxes, int is_dead[],
     const float* x1 = boxes + 2 * num_proposals;
     const float* y1 = boxes + 3 * num_proposals;
 
-    memset(is_dead, 0, num_boxes * sizeof(int));
+    std::memset(is_dead, 0, num_boxes * sizeof(int));
 
 #if defined(HAVE_AVX2)
     __m256  vc_fone = _mm256_set1_ps(coordinates_offset);
@@ -410,7 +410,7 @@ private:
     std::vector<int> roi_indices_;
 };
 
-REG_FACTORY_FOR(ImplFactory<ONNXCustomProposalImpl>, ExperimentalDetectronGenerateProposalsSingleImage);
+REG_FACTORY_FOR(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp b/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
index 372c7de03df..f42061338b5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <cmath>
 #include <vector>
@@ -303,7 +302,7 @@ private:
     float trans_std_;
 };
 
-REG_FACTORY_FOR(ImplFactory<PSROIPoolingImpl>, PSROIPooling);
+REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/range.cpp b/inference-engine/src/mkldnn_plugin/nodes/range.cpp
index 3416cc798e5..3f6c2ecfb41 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/range.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/range.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -125,7 +124,7 @@ StatusCode RangeImpl::range(data_t start, data_t limit, data_t delta, Blob::Ptr
     });
     return OK;
 }
-REG_FACTORY_FOR(ImplFactory<RangeImpl>, Range);
+REG_FACTORY_FOR(RangeImpl, Range);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp
index 837313d81a2..f59178f5034 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -388,18 +387,18 @@ void ReduceImpl::reduce(
     }
 }
 
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceAnd);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL1);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL2);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSum);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSumExp);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMax);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMean);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMin);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceOr);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceProd);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSum);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSumSquare);
+REG_FACTORY_FOR(ReduceImpl, ReduceAnd);
+REG_FACTORY_FOR(ReduceImpl, ReduceL1);
+REG_FACTORY_FOR(ReduceImpl, ReduceL2);
+REG_FACTORY_FOR(ReduceImpl, ReduceLogSum);
+REG_FACTORY_FOR(ReduceImpl, ReduceLogSumExp);
+REG_FACTORY_FOR(ReduceImpl, ReduceMax);
+REG_FACTORY_FOR(ReduceImpl, ReduceMean);
+REG_FACTORY_FOR(ReduceImpl, ReduceMin);
+REG_FACTORY_FOR(ReduceImpl, ReduceOr);
+REG_FACTORY_FOR(ReduceImpl, ReduceProd);
+REG_FACTORY_FOR(ReduceImpl, ReduceSum);
+REG_FACTORY_FOR(ReduceImpl, ReduceSumSquare);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
index beb3c77cdb2..9bf522a4c60 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include "common/defs.h"
 #include "common/softmax.h"
@@ -304,7 +303,7 @@ private:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<RegionYoloImpl>, RegionYolo);
+REG_FACTORY_FOR(RegionYoloImpl, RegionYolo);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
index 331d2b26e2a..0b74fbd4395 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <vector>
 
@@ -64,7 +63,7 @@ private:
     int stride;
 };
 
-REG_FACTORY_FOR(ImplFactory<ReorgYoloImpl>, ReorgYolo);
+REG_FACTORY_FOR(ReorgYoloImpl, ReorgYolo);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
index 60037ccad17..52499d1d973 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -172,7 +171,7 @@ private:
     size_t work_amount_dst;
 };
 
-REG_FACTORY_FOR(ImplFactory<ReverseSequenceImpl>, ReverseSequence);
+REG_FACTORY_FOR(ReverseSequenceImpl, ReverseSequence);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
index 07aad16a9ee..29539ef049c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
@@ -7,7 +7,6 @@
 // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <cassert>
 #include <cmath>
@@ -406,7 +405,7 @@ private:
     int nw = 0;
 };
 
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronROIFeatureExtractorImpl>, ExperimentalDetectronROIFeatureExtractor);
+REG_FACTORY_FOR(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp b/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp
index 67348d1e706..1a4a3edb928 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -182,7 +181,7 @@ private:
     const size_t SCATTER_UPDATES = 2;
 };
 
-REG_FACTORY_FOR(ImplFactory<ScatterImpl>, ScatterUpdate);
+REG_FACTORY_FOR(ScatterImpl, ScatterUpdate);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/select.cpp b/inference-engine/src/mkldnn_plugin/nodes/select.cpp
index 35c606b5102..3813986f4e1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/select.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/select.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <string>
@@ -221,7 +220,7 @@ private:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<SelectImpl>, Select);
+REG_FACTORY_FOR(SelectImpl, Select);
 }  // namespace Cpu
 }  // namespace Extensions
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
index b05ac7abb0e..8a818edec64 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -142,7 +141,7 @@ private:
     size_t ownStrides[CNTR_SIZE];
 };
 
-REG_FACTORY_FOR(ImplFactory<ShuffleChannelsImpl>, ShuffleChannels);
+REG_FACTORY_FOR(ShuffleChannelsImpl, ShuffleChannels);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp b/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
index 9f00833444e..2bc2f8c506a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -332,7 +331,7 @@ private:
     std::vector<simpler_nms_anchor> anchors_;
 };
 
-REG_FACTORY_FOR(ImplFactory<SimplerNMSImpl>, SimplerNMS);
+REG_FACTORY_FOR(SimplerNMSImpl, SimplerNMS);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp b/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
index 11a90855106..d84da1ac400 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
@@ -4,7 +4,6 @@
 
 #include "base.hpp"
 #include "ie_parallel.hpp"
-#include "list.hpp"
 
 #include <cmath>
 #include <string>
@@ -222,7 +221,7 @@ private:
     std::vector<size_t> _pads_end;
 };
 
-REG_FACTORY_FOR(ImplFactory<SpaceToBatchImpl>, SpaceToBatch);
+REG_FACTORY_FOR(SpaceToBatchImpl, SpaceToBatch);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp b/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp
index 39c294ae4ad..911dc69370c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -119,7 +118,7 @@ private:
     size_t ownStrides[CNTR_SIZE];
 };
 
-REG_FACTORY_FOR(ImplFactory<SpaceToDepthImpl>, SpaceToDepth);
+REG_FACTORY_FOR(SpaceToDepthImpl, SpaceToDepth);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
index 195702dcf21..2ecf370db9d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -225,7 +224,7 @@ private:
     size_t outMaxNumValues = 0;
 };
 
-REG_FACTORY_FOR(ImplFactory<SparseFillEmptyRowsImpl>, SparseFillEmptyRows);
+REG_FACTORY_FOR(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
index 07f313a1c49..9aeaf096364 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -202,9 +201,9 @@ private:
     ReducedOp reduction_op;
 };
 
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentMean);
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSqrtN);
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSum);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentMean);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSum);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
index 4c046e2386b..2e8af4a85e5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -152,7 +151,7 @@ private:
     bool with_default_value = false;
 };
 
-REG_FACTORY_FOR(ImplFactory<SparseToDenseImpl>, SparseToDense);
+REG_FACTORY_FOR(SparseToDenseImpl, SparseToDense);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
index d68040f7fe9..1a6726b2d94 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -226,7 +225,7 @@ private:
     Precision input_default_value_precision;
 };
 
-REG_FACTORY_FOR(ImplFactory<ExperimentalSparseWeightedReduceImpl>, ExperimentalSparseWeightedSum);
+REG_FACTORY_FOR(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
index 8bd80f81c5d..1d5a916304a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -57,7 +56,7 @@ public:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<SqueezeImpl>, Squeeze);
+REG_FACTORY_FOR(SqueezeImpl, Squeeze);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp b/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
index 0afdff0af96..0ab3202158e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -376,7 +375,7 @@ void StridedSliceImpl::strided_slice_p(const float *src_data, float* dst_data) {
     });
 }
 
-REG_FACTORY_FOR(ImplFactory<StridedSliceImpl>, StridedSlice);
+REG_FACTORY_FOR(StridedSliceImpl, StridedSlice);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
index 04aa8be79d1..f2f715ec687 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -578,7 +577,7 @@ private:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<TopKImpl>, TopK);
+REG_FACTORY_FOR(TopKImpl, TopK);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
index c6dad29037e..5410ad6baee 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 #include <algorithm>
 #include <cassert>
@@ -71,7 +70,7 @@ private:
     int max_rois_num_;
 };
 
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronTopKROIsImpl>, ExperimentalDetectronTopKROIs);
+REG_FACTORY_FOR(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
index e1094107339..3598479db34 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -199,7 +198,7 @@ private:
     size_t num_elements = 0;
 };
 
-REG_FACTORY_FOR(ImplFactory<UniqueImpl>, Unique);
+REG_FACTORY_FOR(UniqueImpl, Unique);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
index f65c06e450c..f3f67a4a20b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "list.hpp"
 #include "base.hpp"
 
 #include <cmath>
@@ -52,7 +51,7 @@ public:
     }
 };
 
-REG_FACTORY_FOR(ImplFactory<UnsqueezeImpl>, Unsqueeze);
+REG_FACTORY_FOR(UnsqueezeImpl, Unsqueeze);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp
index 17f9f03b289..79e1689b09c 100644
--- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp
@@ -38,9 +38,9 @@ class INFERENCE_ENGINE_API_CLASS(ConvFusion);
 class ngraph::pass::ConvFusion: public ngraph::pass::GraphRewrite {
 public:
     ConvFusion() : GraphRewrite() {
-        fuse_convolution_with<op::ConvolutionIE,   op::v1::Multiply>();
-        fuse_convolution_with<op::ConvolutionIE,   op::v1::Add>();
-        fuse_convolution_with<op::DeconvolutionIE, op::v1::Add>();
+        fuse_convolution_with<op::ConvolutionIE,   opset1::Multiply>();
+        fuse_convolution_with<op::ConvolutionIE,   opset1::Add>();
+        fuse_convolution_with<op::DeconvolutionIE, opset1::Add>();
     }
 
 private:
@@ -53,8 +53,8 @@ private:
 
 template <class Conv, class Eltwise>
 void ngraph::pass::ConvFusion::fuse_convolution_with() {
-    static_assert(std::is_same<Eltwise, ngraph::op::v1::Multiply>() || std::is_same<Eltwise, ngraph::op::v1::Add>(),
-                  "This transformation works only with ngraph::op::v1::Add and ngraph::op::v1::Multiply");
+    static_assert(std::is_same<Eltwise, ngraph::opset1::Multiply>() || std::is_same<Eltwise, ngraph::opset1::Add>(),
+                  "This transformation works only with ngraph::opset1::Add and ngraph::opset1::Multiply");
 
     static_assert(std::is_same<Conv, ngraph::op::ConvolutionIE>() || std::is_same<Conv, ngraph::op::DeconvolutionIE>(),
                   "This transformation works only with ngraph::op::ConvolutionIE and ngraph::op::DeconvolutionIE");
@@ -85,56 +85,63 @@ ngraph::graph_rewrite_callback ngraph::pass::ConvFusion::get_callback() {
         }
 
         // TODO: check that constant can be scalar and do not match [1, C, 1, 1] layout
-        auto constant_shape = m_const->get_shape();
-        auto output_shape = m_conv->get_shape();
-        size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
-        if (constant_size != output_shape[1]) {
+        const auto constant_shape = m_const->get_shape();
+        const auto output_pshape = m_conv->get_output_partial_shape(0);
+
+        if (output_pshape.rank().is_dynamic() || output_pshape[1].is_dynamic()) {
             return false;
         }
 
-        std::shared_ptr<ngraph::Node> constant(m_const);
+        const auto channel_dim = output_pshape[1].get_length();
+
+        size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
+        if (constant_size != channel_dim) {
+            return false;
+        }
+
+        Output<Node> constant(m_const);
 
         if (constant_shape.size() > 1) {
-            constant = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {output_shape[1]}), true);
+            constant = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {channel_dim}), true);
         }
 
         if (m_conv->output(0).get_target_inputs().size() != 1) {
             return false;
         }
 
-        std::shared_ptr<Node> new_conv, new_weights, new_bias;
-        if (std::dynamic_pointer_cast<op::v1::Add>(eltwise)) {
+        Output<Node> new_conv, new_weights, new_bias;
+        if (std::dynamic_pointer_cast<opset1::Add>(eltwise)) {
             // Fuse: ConvolutionIE/DeconvolutionIE->Add
             if (m_conv->inputs().size() == 2) {
                 new_bias = constant;
             } else {
-                new_bias = std::make_shared<op::v1::Add>(constant, m_conv->input_value(2));
+                new_bias = std::make_shared<opset1::Add>(constant, m_conv->input_value(2));
             }
             new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), m_conv->input_value(1), new_bias});
-        } else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<op::v1::Multiply>(eltwise)) {
+        } else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<opset1::Multiply>(eltwise)) {
             // Fuse: ConvolutionIE->Mul
             auto weights_shape = m_conv->input(1).get_shape();
 
             Shape const_shape(weights_shape.size(), 1);
             const_shape[0] = weights_shape[0];
 
-            auto const_reshape = std::make_shared<op::v1::Reshape>(constant,
+            auto const_reshape = std::make_shared<opset1::Reshape>(constant,
                                                                    op::Constant::create(element::i64, Shape{const_shape.size()}, const_shape), true);
-            new_weights = std::make_shared<op::v1::Multiply> (m_conv->input_value(1), const_reshape);
+            new_weights = std::make_shared<opset1::Multiply> (m_conv->input_value(1), const_reshape);
             if (m_conv->inputs().size() == 2) {
                 new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights});
             } else {
-                auto bias_reshape = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
-                new_bias = std::make_shared<op::v1::Multiply>(bias_reshape, constant);
+                auto bias_reshape = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
+                new_bias = std::make_shared<opset1::Multiply>(bias_reshape, constant);
                 new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights, new_bias});
             }
         } else {
             return false;
         }
 
-        ngraph::copy_runtime_info({m_conv, eltwise}, new_conv);
-        new_conv->set_friendly_name(m.get_match_root()->get_friendly_name());
-        ngraph::replace_node(m.get_match_root(), new_conv);
+        ngraph::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr());
+        new_conv.get_node_shared_ptr()->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ngraph::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr());
         return true;
     };
     return callback;
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp
index 796642e3488..c802dcb3f2a 100644
--- a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp
@@ -17,5 +17,6 @@
 NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass)
 NGRAPH_PASS(ConvertNMS3, ::ngraph::pass)
 NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass)
+NGRAPH_PASS(ConvertShuffleChannels3, ::ngraph::pass)
 NGRAPH_PASS(ConvertTopK3, ::ngraph::pass)
 
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp
new file mode 100644
index 00000000000..3c6920a096c
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertShuffleChannels3);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite {
+public:
+    ConvertShuffleChannels3() : GraphRewrite() {
+        convert_shuffle_channels3();
+    }
+
+private:
+    void convert_shuffle_channels3();
+};
diff --git a/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp
index 75b397cce3d..f17bcd90060 100644
--- a/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp
+++ b/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp
@@ -54,43 +54,11 @@ op::ConvolutionIE::ConvolutionIE(const Output<Node>& data_batch,
 }
 
 void op::ConvolutionIE::validate_and_infer_types() {
-    const PartialShape& data_batch_pshape = get_input_partial_shape(0);
+    PartialShape data_batch_shape = get_input_partial_shape(0);
     element::Type data_batch_et = get_input_element_type(0);
-    const PartialShape& filters_pshape = get_input_partial_shape(1);
+    PartialShape filters_shape = get_input_partial_shape(1);
     element::Type filters_et = get_input_element_type(1);
 
-    PartialShape result_shape{PartialShape::dynamic()};
-
-    // we need to adjust filters_shape to reuse helpers for normal convolution
-    if (filters_pshape.is_static() && data_batch_pshape.is_static()) {
-        auto filters_shape = filters_pshape.to_shape();
-        auto groups = m_group;
-        auto data_batch_shape = data_batch_pshape.to_shape();
-        data_batch_shape[1] /= groups;
-
-        if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
-            m_pads_begin.clear();
-            m_pads_end.clear();
-            infer_auto_padding(
-                data_batch_shape,
-                Shape(filters_shape.begin() + 2, filters_shape.end()), // Remove {O,I}
-                m_strides,
-                m_dilations,
-                m_auto_pad,
-                m_pads_end,
-                m_pads_begin);
-        }
-
-        result_shape =
-            infer_convolution_forward(this,
-                                      data_batch_shape,
-                                      Strides(m_strides.size(), 1), // dummy data dilations
-                                      m_pads_begin,
-                                      m_pads_end,
-                                      filters_shape,
-                                      m_strides,
-                                      m_dilations);
-    }
     element::Type result_et;
 
     NODE_VALIDATION_CHECK(
@@ -102,6 +70,45 @@ void op::ConvolutionIE::validate_and_infer_types() {
         filters_et,
         ").");
 
+    PartialShape result_shape{PartialShape::dynamic()};
+
+    // In case if number of groups greater than 1 and channel dimension is dynamic we can't calculate output shape
+    if (m_group > 1) {
+        if (data_batch_shape.rank().is_dynamic() || data_batch_shape[1].is_dynamic()) {
+            set_output_type(0, result_et, result_shape);
+            return;
+        } else {
+            // Update channel dimension according to groups count
+            data_batch_shape[1] = data_batch_shape[1].get_length() / m_group;
+        }
+    }
+
+    // we need to adjust filters_shape to reuse helpers for normal convolution
+    if (filters_shape.is_static() && data_batch_shape.is_static()) {
+        if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
+            m_pads_begin.clear();
+            m_pads_end.clear();
+            auto filter_shape = filters_shape.to_shape();
+            filter_shape.erase(filter_shape.begin(), filter_shape.begin() + 2); // Remove {O,I}
+            infer_auto_padding(data_batch_shape.to_shape(),
+                               filter_shape,
+                               m_strides,
+                               m_dilations,
+                               m_auto_pad,
+                               m_pads_end,
+                               m_pads_begin);
+        }
+    }
+
+    result_shape = infer_convolution_forward(this,
+                                             data_batch_shape,
+                                             Strides(m_strides.size(), 1), // dummy data dilations
+                                             m_pads_begin,
+                                             m_pads_end,
+                                             filters_shape,
+                                             m_strides,
+                                             m_dilations);
+
     set_output_type(0, result_et, result_shape);
 }
 
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp
index 936bc73bfa6..c68a3e2b52f 100644
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp
@@ -47,10 +47,16 @@ void ngraph::pass::ConvertInterpolateToInterpOrResample::convert_interpolate_to_
             interpolate_attrs.pads_end =  std::vector<size_t>{0};
 
         std::vector<size_t> useless_axes;
-        for (const auto & axis : interpolate_axes)
-            if (input_shape[axis] == out_spatial_shape[axis] && axis < 2)
-                // keeping only those not spatial dimensions that are going to be changed
-                useless_axes.push_back(axis);
+        size_t axis_idx = 0;
+        for (auto axis = 0; axis < input_shape.size(); ++axis) {
+            if (interpolate_axes.count(axis)) {
+                if (input_shape[axis] == out_spatial_shape[axis_idx] && axis < 2)
+                    // keeping only those not spatial dimensions that are going to be changed
+                    useless_axes.push_back(axis);
+                ++axis_idx;
+            }
+        }
+
         std::reverse(useless_axes.begin(), useless_axes.end());
         for (const auto & axis : useless_axes) {
             interpolate_axes.erase(axis);
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp
index 0e6af43fc44..5a27e031769 100644
--- a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp
@@ -7,6 +7,7 @@
 #include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp"
 #include "transformations/convert_opset3_to_opset2/convert_nms3.hpp"
 #include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp"
 #include "transformations/convert_opset3_to_opset2/convert_topk3.hpp"
 
 #include <memory>
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.cpp
new file mode 100644
index 00000000000..ca66b192712
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.cpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+using namespace ngraph;
+
+void ngraph::pass::ConvertShuffleChannels3::convert_shuffle_channels3() {
+    auto input = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1, 1});
+    auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher &m) {
+        auto shuffle_channels = std::dynamic_pointer_cast<::opset3::ShuffleChannels>(m.get_match_root());
+        if (!shuffle_channels) {
+            return false;
+        }
+        if (shuffle_channels->input_value(0).get_partial_shape().rank().is_dynamic()) {
+            return false;
+        }
+
+        auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+        auto shuffle_axis = shuffle_channels->get_axis();
+        int64_t shuffle_group = static_cast<int64_t>(shuffle_channels->get_group());
+        int64_t input_rank = shuffle_channels->input_value(0).get_partial_shape().rank().get_length();
+        auto original_shape = std::make_shared<::opset2::ShapeOf>(shuffle_channels->input_value(0));
+        if (shuffle_axis < 0) {
+            shuffle_axis += input_rank;
+        }
+
+        // calculate split sizes based on shuffle axis and avoid splits of size 0
+        std::vector<int64_t> split_lengts;
+        if (shuffle_axis == 0) {
+            split_lengts = {1, input_rank - 1};
+        } else if (shuffle_axis + 1 == input_rank) {
+            split_lengts = {input_rank - 1, 1};
+        } else {
+            split_lengts = {shuffle_axis, 1, input_rank - shuffle_axis - 1};
+        }
+
+        // get input tensor dimensions divided into parts with help of VariadicSplit
+        auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+                original_shape->output(0),
+                ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+                ::opset2::Constant::create(element::i64, Shape({split_lengts.size()}), split_lengts));
+
+        // calculate new dimension of the reshape. Start with two elements of {group, -1}
+        ::OutputVector new_dimensions = {
+                ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{shuffle_group}),
+                ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1})};
+
+        // add more elements to the reshape output dimensions based on shuffle_axis
+        std::vector<int64_t> transpose_order;
+        if (shuffle_axis == 0) {
+            new_dimensions.push_back(
+                    std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true));
+            transpose_order = {1, 0, 2};
+        } else if (shuffle_axis + 1 == input_rank) {
+            new_dimensions.insert(new_dimensions.begin(),
+                                  std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0),
+                                                                         reduce_axis_const, true));
+            transpose_order = {0, 2, 1};
+        } else {
+            new_dimensions.insert(new_dimensions.begin(),
+                                  std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0),
+                                                                         reduce_axis_const, true));
+            new_dimensions.push_back(
+                    std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true));
+            transpose_order = {0, 2, 1, 3};
+        }
+        // reshape the tensor to a new shape
+        auto new_shape = std::make_shared<::opset2::Concat>(new_dimensions, 0);
+        auto reshape = std::make_shared<::opset2::Reshape>(shuffle_channels->input_value(0), new_shape, false);
+        // swap dimensions appearing after splitting the "shuffle_axis" dimension into two
+        auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+                                                               ::opset2::Constant::create(element::i64,
+                                                                                          Shape({transpose_order.size()}),
+                                                                                          transpose_order));
+        // restore original shape
+        auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+        ::NodeVector new_ops = {original_shape, split_input_dimensions, transpose, reshape, reshape_back, new_shape};
+        for (auto output : new_dimensions)
+            new_ops.insert(new_ops.begin(), output.get_node_shared_ptr());
+        reshape_back->set_friendly_name(shuffle_channels->get_friendly_name());
+        ::copy_runtime_info(shuffle_channels, new_ops);
+        ::replace_node(shuffle_channels, reshape_back);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(shuffle_channels, "ConvertShuffleChannels3");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
\ No newline at end of file
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
index 2692e67c1a9..2da46999685 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
@@ -125,6 +125,7 @@ struct CompilationConfig final {
 
 struct DataInfo final {
     std::unordered_map<std::string, int> offset;
+    std::unordered_map<std::string, ie::TensorDesc> descFromPlugin;
     int totalSize = 0;
 };
 
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
index 4554fc6853d..83bd2408d8f 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
@@ -264,6 +264,7 @@ public:
             const Data& newChild);
 
     void disconnectDatas(const DataToDataAllocation& edge);
+    void disconnectDatas(const DataToShapeAllocation& edge);
 
     //
     // Nodes removal
diff --git a/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp b/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp
index 475cf007d96..60537572c1f 100644
--- a/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp
@@ -26,6 +26,7 @@ void BackEnd::extractDataInfo(
             auto ioBufferOffset = data->attrs().get<int>("ioBufferOffset");
             IE_ASSERT(ioBufferOffset + data->totalByteSize() <= inputInfo.totalSize);
 
+            inputInfo.descFromPlugin[data->name()] = data->desc().toTensorDesc();
             inputInfo.offset[data->name()] = ioBufferOffset;
         } else if (DataUsage::Output == data->usage()) {
             IE_ASSERT(outputInfo.offset.count(data->name()) == 0);
@@ -33,6 +34,7 @@ void BackEnd::extractDataInfo(
             auto ioBufferOffset = data->attrs().get<int>("ioBufferOffset");
             IE_ASSERT(ioBufferOffset + data->totalByteSize() <= outputInfo.totalSize);
 
+            outputInfo.descFromPlugin[data->name()] = data->desc().toTensorDesc();
             outputInfo.offset[data->name()] = ioBufferOffset;
         }
     }
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp
index 87405114d92..d8a985db382 100644
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp
@@ -45,28 +45,39 @@ private:
 };
 
 StridedSliceParams PassImpl::parseInputParams(const Stage& stage) {
-    const auto beginInput   = stage->input(1);
-    const auto endInput     = stage->input(2);
-    const auto num_input_dims = stage->input(0)->desc().numDims();
+    const auto input          = stage->input(0);
+    const auto beginInput     = stage->input(1);
+    const auto endInput       = stage->input(2);
+    const auto num_input_dims = input->desc().numDims();
     StridedSliceParams params;
 
     IE_ASSERT(beginInput->content() != nullptr);
     IE_ASSERT(endInput->content() != nullptr);
 
-    auto vectorToDimValues = [](const std::vector<int>& v) {
-        auto dims = DimsOrder::fromNumDims(v.size()).toIndices();
-        int idx = v.size();
+    const auto numpyIdxVectorToDimValues = [&input](const std::vector<int>& values) {
+        auto dims = DimsOrder::fromNumDims(values.size()).toIndices();
+
+        // IE notation to GT notation
+        std::vector<int> revertedValues(values.size());
+        std::reverse_copy(values.begin(), values.end(), revertedValues.begin());
+
+        int idx = 0;
         for (auto& dim : dims) {
-            idx--;
-            dim.second = v[idx];
+            auto value = revertedValues[idx++];
+            if (value < 0) {
+                value = std::max(input->desc().dim(dim.first) + value + 1, 0);
+            }
+            value = std::min(input->desc().dim(dim.first), value);
+            dim.second = value;
         }
+
         return dims;
     };
 
-    params.begin = vectorToDimValues(
+    params.begin = numpyIdxVectorToDimValues(
         std::vector<int>(beginInput->content()->get<int>(),
                          beginInput->content()->get<int>() + beginInput->desc().dims().get(Dim::C, 0)));
-    params.end = vectorToDimValues(
+    params.end = numpyIdxVectorToDimValues(
         std::vector<int>(endInput->content()->get<int>(),
                          endInput->content()->get<int>() + endInput->desc().dims().get(Dim::C, 0)));
 
@@ -74,11 +85,11 @@ StridedSliceParams PassImpl::parseInputParams(const Stage& stage) {
     if (stage->numInputs() == 4) {
         const auto stridesInput = stage->input(3);
         IE_ASSERT(stridesInput->content() != nullptr);
-        params.strides = vectorToDimValues(
+        params.strides = numpyIdxVectorToDimValues(
             std::vector<int>(stridesInput->content()->get<int>(),
                              stridesInput->content()->get<int>() + stridesInput->desc().dims().get(Dim::C, 0)));
     } else {
-        params.strides = vectorToDimValues(std::vector<int>(num_input_dims, 1));
+        params.strides = numpyIdxVectorToDimValues(std::vector<int>(num_input_dims, 1));
     }
 
     IE_ASSERT(params.begin.size() == num_input_dims);
@@ -117,8 +128,8 @@ StridedSliceParams PassImpl::parseInputParams(const Stage& stage) {
         IE_ASSERT(c != '1') << "VPU doesn't support shrink_axis_mask for StridedSlice";
     }
 
-    params.begin_mask = vectorToDimValues(begin_mask_values);
-    params.end_mask = vectorToDimValues(end_mask_values);
+    params.begin_mask = numpyIdxVectorToDimValues(begin_mask_values);
+    params.end_mask = numpyIdxVectorToDimValues(end_mask_values);
 
     return params;
 }
@@ -135,20 +146,14 @@ StridedSliceInternalParams PassImpl::computeInternalParams(const Stage& stage, S
         m_params.strides_dms.set(dim, 1);
     }
 
-    auto clip = [](int value, int min, int max) {
-        return std::min(std::max(min, value), max);
-    };
-
     for (const auto& dim : input->desc().dimsOrder().toPermutation()) {
         m_params.strides_dms.set(dim, params.strides[dim]);
 
         IE_ASSERT(params.begin_mask[dim] == 1 || params.begin_mask[dim] == 0);
         IE_ASSERT(params.end_mask[dim] == 1 || params.end_mask[dim] == 0);
 
-        m_params.begin_dms.set(dim,
-            params.begin_mask[dim] ? clip(params.begin[dim], 0, input->desc().dim(dim)) : 0);
-        m_params.end_dms.set(dim,
-            params.end_mask[dim] ? clip(params.end[dim], 0, input->desc().dim(dim)) : input->desc().dim(dim));
+        m_params.begin_dms.set(dim, params.begin_mask[dim] ? params.begin[dim] : 0);
+        m_params.end_dms.set(dim, params.end_mask[dim] ? params.end[dim] : input->desc().dim(dim));
 
         IE_ASSERT(dim != Dim::N || numDims < 4 || m_params.strides_dms[dim] == 1)
             << "VPU doesn't support batch strides for StridedSlice";
@@ -296,6 +301,10 @@ void PassImpl::run(const Model& model) {
             input = intermediateOutputData;
         }
 
+        VPU_INTERNAL_CHECK(input->desc().dims() == output->desc().dims(),
+                           "StridedSlice pass: result tensor dims (%v) must be equal to output "
+                           "tensor dims (%v)", input->desc().dims(), output->desc().dims());
+
         _stageBuilder->addCopyStage(
             model,
             formatString("%s@copy-output", stage->name()),
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
index c2a2ed9ea91..c4ca128daca 100644
--- a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
@@ -1557,10 +1557,7 @@ void ModelObj::replaceDataToShapeParent(
 void ModelObj::replaceDataToShapeChild(
         const DataToShapeAllocation& edge,
         const Data& newChild) {
-    auto parent = edge->parent();
-    auto oldChild = edge->child();
-
-    oldChild->_parentDataToShapeEdge = nullptr;
+    edge->_child->_parentDataToShapeEdge = nullptr;
     edge->_child = newChild;
 
     VPU_THROW_UNLESS(newChild->_parentDataToShapeEdge == nullptr,
@@ -1634,6 +1631,17 @@ void ModelObj::disconnectDatas(const DataToDataAllocation& edge) {
     }
 }
 
+void ModelObj::disconnectDatas(const DataToShapeAllocation& edge) {
+    auto parent = edge->parent();
+    auto child = edge->child();
+
+    child->_parentDataToShapeEdge = nullptr;
+    parent->_childDataToShapeEdges.erase(edge);
+
+    IE_ASSERT(edge->_ptrPosInModel != _shapeEdgePtrList.end());
+    _shapeEdgePtrList.erase(edge->_ptrPosInModel);
+}
+
 void ModelObj::disconnectStage(const Stage& stage) {
     //
     // Check that objects belong to the same Model.
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
index cb95807a951..9bca6c756a2 100644
--- a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
@@ -34,8 +34,15 @@ void FrontEnd::parseDSR(const Model& model, const ie::CNNLayerPtr& layer, const
         layer->name, layer->type, 1, shape->name());
 
     model->replaceStageOutput(dataProducerEdge, dataOutput);
-    if (const auto& dataToShapeEdge = data->parentDataToShapeEdge()) {
-        model->replaceDataToShapeChild(dataToShapeEdge, dataOutput);
+    if (auto dataToShapeEdge = data->parentDataToShapeEdge()) {
+        const auto& parent = dataToShapeEdge->parent();
+        VPU_THROW_UNLESS(parent == shape, "Myriad plugin encountered layer of type \"{}\" and name \"{}\" with input #{} (data input with name \"{}\") that "
+            "already has parent in terms of data to shape connection. The parent is expected to be input #{} (shape input with name \"{}\") of the layer, so "
+            "it's a \"{}\" with already connected inputs, but actual parent is other data object with name \"{}\". The case of connected inputs is considered "
+            "as \"{}\" that goes directly to \"{}\" as a result of some optimization (operation between them has been optimized out). Other cases, when some "
+            "input already has a connection, but with other data object are prohibited.",
+            layer->type, layer->name, 0, data->name(), 1, shape->name(), layer->type, parent->name(), layer->type, layer->type);
+        model->disconnectDatas(dataToShapeEdge);
     }
     model->removeUnusedData(data);
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp
index 41d4b75c15b..a17d6e0660e 100644
--- a/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp
@@ -137,10 +137,12 @@ private:
     void initialCheckImpl() const override {
         const auto& operation = type();
         const auto& dataTypeInput0 = input(0)->desc().type();
+        const auto& dataTypeOutput = output(0)->desc().type();
 
         {
             auto supportedDataTypesInput0 = EnumSet<DataType>{DataType::FP16};
-            if (operation == StageType::Sum || operation == StageType::Greater_equal || operation == StageType::Select ||
+            if (operation == StageType::Sum || operation == StageType::Greater_equal ||
+                operation == StageType::Equal || operation == StageType::Select ||
                 operation == StageType::Prod || operation == StageType::Max) {
                 supportedDataTypesInput0.insert(DataType::S32);
             }
@@ -150,16 +152,18 @@ private:
                 static_cast<Handle<StageNode>>(this), dataTypeInput0, supportedDataTypesInput0);
         }
 
-        if (operation != StageType::Select || dataTypeInput0 == DataType::FP16) {
-            assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput0}, {dataTypeInput0}}, {{dataTypeInput0}});
-        } else {
+        if (operation == StageType::Select && dataTypeInput0 == DataType::S32) {
             auto supportedDataTypesInput1 = EnumSet<DataType>{DataType::FP16, DataType::S32};
             const auto& dataTypeInput1 = input(1)->desc().type();
             VPU_THROW_UNLESS(supportedDataTypesInput1.count(dataTypeInput1) != 0,
-                "Stage node %v types check error: input #1 has type %v, but one of %v is expected",
-                static_cast<Handle<StageNode>>(this), dataTypeInput1, supportedDataTypesInput1);
+                             "Stage node %v types check error: input #1 has type %v, but one of %v is expected",
+                             static_cast<Handle<StageNode>>(this), dataTypeInput1, supportedDataTypesInput1);
 
             assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput1}, {dataTypeInput1}}, {{dataTypeInput1}});
+        } else if (operation == StageType::Greater && dataTypeInput0 != dataTypeOutput) {
+            assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}}, {{DataType::S32}});
+        } else {
+            assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput0}, {dataTypeInput0}}, {{dataTypeInput0}});
         }
     }
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp
index 66d794caabf..25a2cb61584 100644
--- a/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp
@@ -96,7 +96,7 @@ void FrontEnd::parseOutShapeOfReshape(
             inputs,
             outputs);
 
-    auto specialZero = layer->GetParamAsInt("special_zero", 0);
+    auto specialZero = layer->GetParamAsBool("special_zero", false);
     outShapeOfReshapeStage->attrs().set<bool>("specialZero", specialZero);
 }
 
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
index 76978c89e32..ea29b5b43cd 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
@@ -22,6 +22,7 @@ namespace vpu {
 namespace MyriadPlugin {
 
 ExecutableNetwork::ExecutableNetwork(
+        std::shared_ptr<IMvnc> mvnc,
         std::vector<DevicePtr>& devicePool,
         const MyriadConfig& config) :
             _config(config) {
@@ -32,7 +33,7 @@ ExecutableNetwork::ExecutableNetwork(
         _config.logLevel(),
         defaultOutput(_config.pluginLogFilePath()));
 
-    _executor = std::make_shared<MyriadExecutor>(_config.forceReset(), _config.logLevel(), _log);
+    _executor = std::make_shared<MyriadExecutor>(_config.forceReset(), std::move(mvnc), _config.logLevel(), _log);
     _device = _executor->openDevice(devicePool, _config);
 
     const auto& compileConfig = config.compileConfig();
@@ -49,9 +50,11 @@ ExecutableNetwork::ExecutableNetwork(
 }
 
 ExecutableNetwork::ExecutableNetwork(
-        ICNNNetwork& network, std::vector<DevicePtr>& devicePool,
+        ICNNNetwork& network,
+        std::shared_ptr<IMvnc> mvnc,
+        std::vector<DevicePtr>& devicePool,
         const MyriadConfig& config) :
-            ExecutableNetwork(devicePool, config) {
+            ExecutableNetwork(std::move(mvnc), devicePool, config) {
     VPU_PROFILE(ExecutableNetwork);
 
     const auto compilerLog = std::make_shared<Logger>(
@@ -141,18 +144,20 @@ void ExecutableNetwork::Import(std::istream& strm,
 }
 
 ExecutableNetwork::ExecutableNetwork(std::istream& strm,
+                               std::shared_ptr<IMvnc> mvnc,
                                std::vector<DevicePtr> &devicePool,
                                const MyriadConfig& config) :
-    ExecutableNetwork(devicePool, config) {
+    ExecutableNetwork(std::move(mvnc), devicePool, config) {
     VPU_PROFILE(ExecutableNetwork);
     Import(strm, devicePool, config);
 }
 
 ExecutableNetwork::ExecutableNetwork(
         const std::string& blobFilename,
+        std::shared_ptr<IMvnc> mvnc,
         std::vector<DevicePtr>& devicePool,
         const MyriadConfig& config) :
-    ExecutableNetwork(devicePool, config) {
+    ExecutableNetwork(std::move(mvnc), devicePool, config) {
     VPU_PROFILE(ExecutableNetwork);
     std::ifstream blobFile{blobFilename, std::ios::binary};
     Import(blobFile, devicePool, config);
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
index 2e03ea02f9e..1e106c06cbc 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
@@ -33,14 +33,17 @@ public:
     typedef std::shared_ptr<ExecutableNetwork> Ptr;
 
     explicit ExecutableNetwork(InferenceEngine::ICNNNetwork &network,
+                               std::shared_ptr<IMvnc> mvnc,
                                std::vector<DevicePtr> &devicePool,
                                const MyriadConfig& config);
 
     explicit ExecutableNetwork(std::istream& strm,
+                               std::shared_ptr<IMvnc> mvnc,
                                std::vector<DevicePtr> &devicePool,
                                const MyriadConfig& config);
 
     explicit ExecutableNetwork(const std::string &blobFilename,
+                               std::shared_ptr<IMvnc> mvnc,
                                std::vector<DevicePtr> &devicePool,
                                const MyriadConfig& config);
 
@@ -126,8 +129,9 @@ private:
     const size_t _maxTaskExecutorGetResultCount = 1;
     std::queue<std::string> _taskExecutorGetResultIds;
 
-    ExecutableNetwork(std::vector<DevicePtr> &devicePool,
-                      const MyriadConfig& config);
+    ExecutableNetwork(std::shared_ptr<IMvnc> mvnc,
+        std::vector<DevicePtr> &devicePool,
+        const MyriadConfig& config);
 
     InferenceEngine::ITaskExecutor::Ptr getNextTaskExecutor() {
         std::string id = _taskExecutorGetResultIds.front();
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
index f553c18edcb..9b7b96cfabd 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
@@ -36,9 +36,10 @@ using namespace vpu;
 
 static std::mutex device_mutex;
 
-MyriadExecutor::MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log) {
+MyriadExecutor::MyriadExecutor(bool forceReset, std::shared_ptr<IMvnc> mvnc,
+    const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log), _mvnc(std::move(mvnc)) {
     VPU_PROFILE(MyriadExecutor);
-    _mvnc = std::make_shared<Mvnc>();
+    VPU_THROW_UNLESS(_mvnc, "mvnc is null");
     int ncResetAll = forceReset;
     auto status = ncGlobalSetOption(NC_RW_RESET_ALL, &ncResetAll, sizeof(ncResetAll));
     if (status != NC_OK) {
@@ -136,12 +137,17 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
         return statusOpen;
     }
 
+    ncDeviceOpenParams_t deviceOpenParams = {};
+    deviceOpenParams.watchdogHndl = _mvnc->watchdogHndl();
+    deviceOpenParams.watchdogInterval = config.watchdogInterval().count();
+    deviceOpenParams.customFirmwareDirectory = dirName.c_str();
+
     // Open new device with specific path to FW folder
     statusOpen = ncDeviceOpen(&device._deviceHandle,
-        in_deviceDesc, config.watchdogInterval().count(), dirName.c_str());
+        in_deviceDesc, deviceOpenParams);
 
     if (statusOpen != NC_OK) {
-        ncDeviceClose(&device._deviceHandle);
+        ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
         return statusOpen;
     }
 
@@ -154,7 +160,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
                                           reinterpret_cast<void*>(&device._platform), &dataLength);
     if (status != NC_OK || dataLength != sizeof(device._platform)) {
         _log->warning("Failed to get device platform");
-        ncDeviceClose(&device._deviceHandle);
+        ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
         return status != NC_OK ? status : NC_ERROR;     // for dataLength error
     }
 
@@ -163,7 +169,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
                                reinterpret_cast<void*>(&device._protocol), &dataLength);
     if (status != NC_OK || dataLength != sizeof(device._protocol)) {
         _log->warning("Failed to get device protocol");
-        ncDeviceClose(&device._deviceHandle);
+        ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
         return status != NC_OK ? status : NC_ERROR;     // for dataLength error
     }
 
@@ -173,7 +179,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
                                reinterpret_cast<void*>(&device._maxGraphNum), &dataLength);
     if (status != NC_OK || dataLength != sizeof(device._maxGraphNum)) {
         _log->warning("Failed to get maximum supported number of graphs");
-        ncDeviceClose(&device._deviceHandle);
+        ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
         return status != NC_OK ? status : NC_ERROR;     // for dataLength error
     }
 
@@ -184,7 +190,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
                                reinterpret_cast<void*>(&deviceName), &dataLength);
     if (status != NC_OK || dataLength > NC_MAX_NAME_SIZE) {
         _log->warning("Failed to get name of booted device");
-        ncDeviceClose(&device._deviceHandle);
+        ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
         return status != NC_OK ? status : NC_ERROR;     // for dataLength error
     } else {
         device._name = deviceName;
@@ -194,7 +200,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
 
     if (status != NC_OK) {
         _log->warning("Failed to set configuration for Power Manager");
-        ncDeviceClose(&device._deviceHandle);
+        ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
         return status;
     }
 
@@ -283,12 +289,12 @@ VPU_PACKED(bin_header {
     uint32_t frequency;
 };)
 
-void MyriadExecutor::closeDevices(std::vector<DevicePtr> &devicePool) {
+void MyriadExecutor::closeDevices(std::vector<DevicePtr> &devicePool, std::shared_ptr<IMvnc> mvnc) {
     VPU_PROFILE(closeDevices);
     std::lock_guard<std::mutex> lock(device_mutex);
     for (auto &device : devicePool) {
         if (device->_deviceHandle != nullptr) {
-            auto res = ncDeviceClose(&(device->_deviceHandle));
+            auto res = ncDeviceClose(&(device->_deviceHandle), mvnc->watchdogHndl());
             if (res != NC_OK)
                 printf("ncDeviceClose failed (%d)\n", static_cast<int>(res));
             device->_deviceHandle = nullptr;
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.h b/inference-engine/src/vpu/myriad_plugin/myriad_executor.h
index 9aa0d898888..d231c45b443 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.h
@@ -78,7 +78,8 @@ class MyriadExecutor {
     unsigned int _numStages = 0;
 
 public:
-    MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log);
+    MyriadExecutor(bool forceReset, std::shared_ptr<IMvnc> mvnc,
+                        const LogLevel& vpuLogLevel, const Logger::Ptr& log);
     ~MyriadExecutor() = default;
 
     /**
@@ -87,7 +88,7 @@ public:
      */
     DevicePtr openDevice(std::vector<DevicePtr> &devicePool, const MyriadConfig& config);
 
-    static void closeDevices(std::vector<DevicePtr> &devicePool);
+    static void closeDevices(std::vector<DevicePtr> &devicePool, std::shared_ptr<IMvnc> mvnc);
 
     void allocateGraph(DevicePtr &device,
                        GraphDesc &graphDesc,
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp
index 1edaa94a823..e69167c800e 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp
@@ -141,6 +141,42 @@ void MyriadInferRequest::InferAsync() {
                               _inputInfo.totalSize, nullptr, 0);
 }
 
+static void copyBlobAccordingUpperBound(
+    const Blob::Ptr& in,
+    const Blob::Ptr& out) {
+    const auto inLayout = in->getTensorDesc().getLayout();
+    const auto outLayout = out->getTensorDesc().getLayout();
+
+    const auto& inDims = in->getTensorDesc().getDims();
+    const auto& outDims = out->getTensorDesc().getDims();
+
+    IE_ASSERT(inLayout == outLayout);
+
+    auto inPtr = in->cbuffer().as<uint8_t *>();
+    IE_ASSERT(inPtr != nullptr);
+
+    auto outPtr = out->cbuffer().as<uint8_t *>();
+    IE_ASSERT(outPtr != nullptr);
+
+    if (inDims.size() == 1) {
+        std::copy_n(
+            in->cbuffer().as<uint8_t*>(),
+            in->byteSize(),
+            out->buffer().as<uint8_t*>());
+    } else if (inDims.size() == 2) {
+        size_t inLineSize = inDims[1] * in->element_size();
+        size_t outLineSize = outDims[1] * out->element_size();
+        for (size_t n = 0; n < outDims[0]; n++) {
+            std::copy_n(
+                in->cbuffer().as<uint8_t*>() + n * inLineSize,
+                outLineSize,
+                out->buffer().as<uint8_t*>() + n * outLineSize);
+        }
+    } else {
+        VPU_THROW_EXCEPTION << "Copying of blobs with dynamic shape and num dims greater than 2 unsupported yet";
+    }
+}
+
 void MyriadInferRequest::GetResult() {
     VPU_PROFILE(GetResult);
 
@@ -184,23 +220,50 @@ void MyriadInferRequest::GetResult() {
 
         const auto& ieOutDesc = ieBlob->getTensorDesc();
         const auto& ieOutPrc = ieOutDesc.getPrecision();
+
         auto ieOutDims = ieOutDesc.getDims();
+
         // Eject dynamic output shape (suffix "@shape") and copy it to vector of dimensions in reverse order
         const auto& shapeInfo = _outputInfo.offset.find(ieBlobName + "@shape");
+        // if (isDynamic)
         if (shapeInfo != _outputInfo.offset.end()) {
-            const auto shapeOffset = resultOffset(shapeInfo->first);
-            const auto shapePtr = reinterpret_cast<const int32_t*>(resultBuffer.data() + shapeOffset);
+            auto outData = networkOutputs[ieBlobName];
+            const auto& descFromPlugin = _outputInfo.descFromPlugin.find(ieBlobName);
+            VPU_THROW_UNLESS(descFromPlugin != _outputInfo.descFromPlugin.end(),
+                "Can not find tensor descriptor by plugin for {} output", ieBlobName);
+            const auto& dynOutputDesc = descFromPlugin->second;
 
-            const auto shapeRank = ieOutDims.size();
+            if (ieBlob->getTensorDesc().getLayout() != dynOutputDesc.getLayout()) {
+                ieBlob->deallocate();
+                ieBlob->getTensorDesc().reshape(dynOutputDesc.getDims(), dynOutputDesc.getLayout());
+                ieBlob->allocate();
+                outData->reshape(dynOutputDesc.getDims(), dynOutputDesc.getLayout());
+            }
+
+            const auto shapeResultOffset = resultOffset(shapeInfo->first);
+            const auto shapePtr = reinterpret_cast<const int32_t*>(resultBuffer.data() + shapeResultOffset);
+
+            auto shapeRank = dynOutputDesc.getDims().size();
+            ieOutDims.resize(shapeRank);
             for (size_t idx = 0; idx < shapeRank; ++idx) {
                 ieOutDims[idx] = shapePtr[shapeRank - idx - 1];
             }
-        }
-        // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
-        const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, ieOutDims, getVpuLayout(ieBlobName)};
-        const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName));
 
-        copyBlob(tmpBlob, ieBlob);
+            outData->setDims(ieOutDims);
+            ieBlob->getTensorDesc().setDims(ieOutDims);
+
+            // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
+            const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, dynOutputDesc.getDims(), dynOutputDesc.getLayout()};
+            const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName));
+
+            copyBlobAccordingUpperBound(tmpBlob, ieBlob);
+        } else {
+            // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
+            const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, ieOutDims, getVpuLayout(ieBlobName)};
+            const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName));
+
+            copyBlob(tmpBlob, ieBlob);
+        }
     }
 }
 
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp
index 66cf18cb933..5ba91ef4f86 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp
@@ -11,6 +11,17 @@ using namespace vpu::MyriadPlugin;
 // Implementation of methods of class Mvnc
 //------------------------------------------------------------------------------
 
+Mvnc::Mvnc() {
+    WatchdogHndl_t* watchdogHndl = nullptr;
+    if (watchdog_create(&watchdogHndl) != WD_ERRNO) {
+        THROW_IE_EXCEPTION << "Cannot create watchdog.";
+    }
+
+    m_watcdogPtr = WatchdogUniquePtr(watchdogHndl, [](WatchdogHndl_t* watchdogHndl) {
+        watchdog_destroy(watchdogHndl);
+    });
+}
+
 std::vector<ncDeviceDescr_t> Mvnc::AvailableDevicesDesc() const {
     int deviceCount = 0;
     std::vector<ncDeviceDescr_t> availableDevices(NC_MAX_DEVICES);
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h
index d0b1c602add..43fcaed69d6 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h
@@ -4,15 +4,19 @@
 
 #pragma once
 
+#include <mvnc.h>
+#include <watchdog.h>
+
 #include <functional>
 #include <vector>
 #include <memory>
 #include <string>
-#include <mvnc.h>
 
 namespace vpu {
 namespace MyriadPlugin {
 
+using WatchdogUniquePtr = std::unique_ptr<WatchdogHndl_t, std::function<void(WatchdogHndl_t*)>>;
+
 //------------------------------------------------------------------------------
 // class IMvnc
 // This is a class interface for accessing devices.
@@ -24,6 +28,8 @@ public:
     virtual std::vector<ncDeviceDescr_t> AvailableDevicesDesc() const = 0;
     virtual std::vector<std::string> AvailableDevicesNames() const = 0;
 
+    virtual WatchdogHndl_t* watchdogHndl() = 0;
+
     // Destructor
     virtual ~IMvnc() = default;
 };
@@ -35,9 +41,19 @@ public:
 
 class Mvnc : public IMvnc {
 public:
+    Mvnc();
+    ~Mvnc() override = default;
+
     // Operations
     std::vector<ncDeviceDescr_t> AvailableDevicesDesc() const override;
     std::vector<std::string> AvailableDevicesNames() const override;
+
+    WatchdogHndl_t* watchdogHndl() override {
+        return m_watcdogPtr.get();
+    }
+
+private:
+    WatchdogUniquePtr m_watcdogPtr;
 };
 
 }  // namespace MyriadPlugin
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
index 146c93cc433..1d30a7566b4 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
@@ -43,7 +43,7 @@ ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(
         vpu::DynamicToStaticShape().transform(function);
     }
 
-    return std::make_shared<ExecutableNetwork>(*clonedNetwork, _devicePool, parsedConfigCopy);
+    return std::make_shared<ExecutableNetwork>(*clonedNetwork, _mvnc, _devicePool, parsedConfigCopy);
 }
 
 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
@@ -103,9 +103,7 @@ void Engine::QueryNetwork(
 Engine::Engine(std::shared_ptr<IMvnc> mvnc) :
         _mvnc(std::move(mvnc)),
         _metrics(std::make_shared<MyriadMetrics>()) {
-    if (!_mvnc) {
-        THROW_IE_EXCEPTION << "mvnc is invalid";
-    }
+    VPU_THROW_UNLESS(_mvnc, "mvnc is null");
 
     _pluginName = "MYRIAD";
 
@@ -134,7 +132,7 @@ InferenceEngine::ExecutableNetwork Engine::ImportNetwork(
 
     const auto executableNetwork =
             std::make_shared<ExecutableNetwork>(
-                model, _devicePool, parsedConfigCopy);
+                model, _mvnc, _devicePool, parsedConfigCopy);
 
     return InferenceEngine::ExecutableNetwork{IExecutableNetwork::Ptr(
         new ExecutableNetworkBase<ExecutableNetworkInternal>(executableNetwork),
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
index 7567a5fde69..3d0d4eb9c14 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
@@ -22,7 +22,7 @@ public:
     explicit Engine(std::shared_ptr<IMvnc> mvnc);
 
     ~Engine() override {
-        MyriadExecutor::closeDevices(_devicePool);
+        MyriadExecutor::closeDevices(_devicePool, _mvnc);
     }
 
     void SetConfig(const std::map<std::string, std::string>& config) override;
diff --git a/inference-engine/tests/functional/inference_engine/transformations/conv_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/conv_fusion_test.cpp
new file mode 100644
index 00000000000..a31a2c3d7a6
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/conv_fusion_test.cpp
@@ -0,0 +1,167 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+using InputShape = ngraph::PartialShape;
+using WeightsShape = ngraph::Shape;
+using EltwiseType = ngraph::NodeTypeInfo;
+using EltwiseShape = ngraph::Shape;
+using IsNegative = bool;
+
+class ConvFusionTests: public CommonTestUtils::TestsCommon,
+                       public testing::WithParamInterface<std::tuple<InputShape, WeightsShape, EltwiseType, EltwiseShape, IsNegative> > {
+public:
+    std::shared_ptr<ngraph::Function> f, f_ref;
+
+    void SetUp() override {
+        const auto& input_shape = std::get<0>(GetParam());
+        const auto& weights_shape = std::get<1>(GetParam());
+        const auto& eltwise_type = std::get<2>(GetParam());
+        const auto& eltwise_shape = std::get<3>(GetParam());
+        const auto& is_negative = std::get<4>(GetParam());
+
+        f = get_initial_function(input_shape, weights_shape, eltwise_type, eltwise_shape);
+
+        if (is_negative) {
+            f_ref = get_initial_function(input_shape, weights_shape, eltwise_type, eltwise_shape);
+        } else {
+            f_ref = get_reference_function(input_shape, weights_shape, eltwise_type, eltwise_shape);
+        }
+    }
+
+private:
+    std::shared_ptr<ngraph::Function> get_initial_function(const InputShape&   input_shape,
+                                                           const WeightsShape& weights_shape,
+                                                           const EltwiseType&  eltwise_type,
+                                                           const EltwiseShape& eltwise_shape) {
+        auto spatial_dims = input_shape.rank().get_length() - 2;
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+        auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+        auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
+                ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+
+        auto const_node = ngraph::opset1::Constant::create(ngraph::element::f32, eltwise_shape, {1.1});
+        ngraph::Output<ngraph::Node> eltwise;
+        if (eltwise_type == ngraph::opset1::Add::type_info) {
+            eltwise = std::make_shared<ngraph::opset1::Add>(conv, const_node);
+        } else if (eltwise_type == ngraph::opset1::Multiply::type_info) {
+            eltwise = std::make_shared<ngraph::opset1::Multiply>(conv, const_node);
+        } else {
+            throw ngraph::ngraph_error("Unsupported element type");
+        }
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltwise.get_node_shared_ptr()}, ngraph::ParameterVector{input});
+    }
+
+    std::shared_ptr<ngraph::Function> get_reference_function(const InputShape&   input_shape,
+                                                             const WeightsShape& weights_shape,
+                                                             const EltwiseType&  eltwise_type,
+                                                             const EltwiseShape& eltwise_shape) {
+        auto spatial_dims = input_shape.rank().get_length() - 2;
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+        ngraph::Output<ngraph::Node> weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+        ngraph::Output<ngraph::Node> conv = std::make_shared<ngraph::op::ConvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1),
+                ngraph::Strides(spatial_dims, 1), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+
+        ngraph::Output<ngraph::Node> const_node;
+        const_node = ngraph::opset1::Constant::create(ngraph::element::f32, eltwise_shape, {1.1});
+        if (eltwise_type == ngraph::opset1::Add::type_info) {
+            if (eltwise_shape.size() != 1) {
+                const_node = ngraph::op::util::reshapeTo(const_node, ngraph::Shape{ngraph::shape_size(eltwise_shape)});
+            }
+            conv = conv.get_node_shared_ptr()->copy_with_new_inputs({input, weights, const_node});
+        } else if (eltwise_type == ngraph::opset1::Multiply::type_info) {
+            if (eltwise_shape.size() > 1) {
+                const_node = ngraph::op::util::reshapeTo(const_node, ngraph::Shape{ngraph::shape_size(eltwise_shape)});
+            }
+            ngraph::Shape const_shape(weights_shape.size(), 1);
+            const_shape[0] = weights_shape[0];
+            weights = std::make_shared<ngraph::opset1::Multiply>(weights, ngraph::op::util::reshapeTo(const_node, const_shape));
+            conv = conv.get_node_shared_ptr()->copy_with_new_inputs({input, weights});
+        } else {
+            throw ngraph::ngraph_error("Unsupported element type");
+        }
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv.get_node_shared_ptr()}, ngraph::ParameterVector{input});
+    }
+};
+
+TEST_P(ConvFusionTests, CompareFunctions) {
+    ngraph::pass::InitNodeInfo().run_on_function(f);
+    ngraph::pass::ConvFusion().run_on_function(f);
+    f->validate_nodes_and_infer_types();
+    // ASSERT_NO_THROW(check_rt_info(f));
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+using add = ngraph::opset1::Add;
+using mul = ngraph::opset1::Multiply;
+
+INSTANTIATE_TEST_CASE_P(ConvAddFusion, ConvFusionTests,
+        testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, add::type_info, EltwiseShape{9, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, DYN, 64, 64},  WeightsShape{6, 3, 3, 4, 2}, add::type_info, EltwiseShape{6, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, 64, DYN, 64},  WeightsShape{5, 3, 3, 4, 3}, add::type_info, EltwiseShape{5, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, 64, 64, DYN},  WeightsShape{5, 3, 3, 4, 3}, add::type_info, EltwiseShape{5, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{1, 3, 64, 64},       WeightsShape{6, 3, 1, 1},    add::type_info, EltwiseShape{6, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1},    add::type_info, EltwiseShape{7, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64},     WeightsShape{8, 3, 1, 2},    add::type_info, EltwiseShape{8, 1, 1}, false),
+                        std::make_tuple(InputShape{2, DYN, 64, 64},     WeightsShape{9, 3, 2, 3},    add::type_info, EltwiseShape{9, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, DYN, 64},      WeightsShape{6, 3, 3, 4},    add::type_info, EltwiseShape{6, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, 64, DYN},      WeightsShape{5, 3, 3, 4},    add::type_info, EltwiseShape{5, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, DYN, DYN},      WeightsShape{5, 3, 1},       add::type_info, EltwiseShape{5, 1}, false),
+                        std::make_tuple(InputShape{DYN, 3, 10},         WeightsShape{3, 3, 1},       add::type_info, EltwiseShape{3, 1}, false),
+                        std::make_tuple(InputShape{2, DYN, 9},          WeightsShape{2, 3, 2},       add::type_info, EltwiseShape{2, 1}, false),
+                        std::make_tuple(InputShape{3, 3, DYN},          WeightsShape{1, 3, 3},       add::type_info, EltwiseShape{1, 1}, false)));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_ConvAddFusionNegative, ConvFusionTests,
+        testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{2, 1}, true),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1}, true),
+                        std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, add::type_info, EltwiseShape{9, 1, 1, 1, 1}, true)));
+
+INSTANTIATE_TEST_CASE_P(ConvMulFusion, ConvFusionTests,
+        testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, mul::type_info, EltwiseShape{9, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, DYN, 64, 64},  WeightsShape{6, 3, 3, 4, 2}, mul::type_info, EltwiseShape{6, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, 64, DYN, 64},  WeightsShape{5, 3, 3, 4, 3}, mul::type_info, EltwiseShape{5, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, 64, 64, DYN},  WeightsShape{5, 3, 3, 4, 3}, mul::type_info, EltwiseShape{5, 1, 1, 1}, false),
+                        std::make_tuple(InputShape{1, 3, 64, 64},       WeightsShape{6, 3, 1, 1},    mul::type_info, EltwiseShape{6, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1},    mul::type_info, EltwiseShape{7, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64},     WeightsShape{8, 3, 1, 2},    mul::type_info, EltwiseShape{8, 1, 1}, false),
+                        std::make_tuple(InputShape{2, DYN, 64, 64},     WeightsShape{9, 3, 2, 3},    mul::type_info, EltwiseShape{9, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, DYN, 64},      WeightsShape{6, 3, 3, 4},    mul::type_info, EltwiseShape{6, 1, 1}, false),
+                        std::make_tuple(InputShape{3, 3, 64, DYN},      WeightsShape{5, 3, 3, 4},    mul::type_info, EltwiseShape{5, 1, 1}, false),
+                        std::make_tuple(InputShape{DYN, DYN, DYN},      WeightsShape{5, 3, 1},       mul::type_info, EltwiseShape{5, 1}, false),
+                        std::make_tuple(InputShape{DYN, 3, 10},         WeightsShape{3, 3, 1},       mul::type_info, EltwiseShape{3, 1}, false),
+                        std::make_tuple(InputShape{2, DYN, 9},          WeightsShape{2, 3, 2},       mul::type_info, EltwiseShape{2, 1}, false),
+                        std::make_tuple(InputShape{3, 3, DYN},          WeightsShape{1, 3, 3},       mul::type_info, EltwiseShape{1, 1}, false)));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_ConvMulFusionNegative, ConvFusionTests,
+        testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{2, 1}, true),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1}, true),
+                        std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, mul::type_info, EltwiseShape{9, 1, 1, 1, 1}, true)));
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_convolution_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_convolution_test.cpp
new file mode 100644
index 00000000000..2f2f50e0be7
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_convolution_test.cpp
@@ -0,0 +1,95 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <ngraph/pass/algebraic_simplification.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+#include <transformations/convert_opset1_to_legacy/convert_convolutions.hpp>
+#include <ngraph_ops/convolution_ie.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+using InputShape = ngraph::PartialShape;
+using WeightsShape = ngraph::Shape;
+
+class ConvertConvolutionsTest: public CommonTestUtils::TestsCommon,
+                               public testing::WithParamInterface<std::tuple<InputShape, WeightsShape> > {
+public:
+    std::shared_ptr<ngraph::Function> f, f_ref;
+
+    void SetUp() override {
+        const auto& input_shape = std::get<0>(GetParam());
+        const auto& weights_shape = std::get<1>(GetParam());
+
+        f = get_initial_function(input_shape, weights_shape);
+        f_ref = get_reference_function(input_shape, weights_shape);
+    }
+
+private:
+    std::shared_ptr<ngraph::Function> get_initial_function(const ngraph::PartialShape & input_shape,
+                                                           const ngraph::Shape & weights_shape) {
+        auto spatial_dims = input_shape.rank().get_length() - 2;
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+        auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+        auto conv = std::make_shared<ngraph::opset1::Convolution>(input, weights, ngraph::Strides(spatial_dims, 1),
+                ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::Strides(spatial_dims, 1));
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
+    }
+
+    std::shared_ptr<ngraph::Function> get_reference_function(const ngraph::PartialShape & input_shape,
+                                                             const ngraph::Shape & weights_shape) {
+        auto spatial_dims = input_shape.rank().get_length() - 2;
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+        auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+        auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
+                ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
+    }
+};
+
+TEST_P(ConvertConvolutionsTest, CompareFunctions) {
+    const auto & orig_shape = f->get_output_partial_shape(0);
+    ngraph::pass::InitNodeInfo().run_on_function(f);
+    ngraph::pass::ConvertConvolutions().run_on_function(f);
+    ASSERT_NO_THROW(check_rt_info(f));
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+    ASSERT_TRUE(orig_shape.same_scheme(f->get_output_partial_shape(0))) << "Shape " << orig_shape << " is not equal to " << f->get_output_partial_shape(0);
+}
+
+INSTANTIATE_TEST_CASE_P(ConvertConvolution, ConvertConvolutionsTest,
+        testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}),
+                        std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}),
+                        std::make_tuple(InputShape{3, 3, DYN, 64, 64},  WeightsShape{6, 3, 3, 4, 2}),
+                        std::make_tuple(InputShape{3, 3, 64, DYN, 64},  WeightsShape{5, 3, 3, 4, 3}),
+                        std::make_tuple(InputShape{3, 3, 64, 64, DYN},  WeightsShape{5, 3, 3, 4, 3}),
+                        std::make_tuple(InputShape{1, 3, 64, 64},       WeightsShape{6, 3, 1, 1}),
+                        std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}),
+                        std::make_tuple(InputShape{DYN, 3, 64, 64},     WeightsShape{8, 3, 1, 2}),
+                        std::make_tuple(InputShape{2, DYN, 64, 64},     WeightsShape{9, 3, 2, 3}),
+                        std::make_tuple(InputShape{3, 3, DYN, 64},      WeightsShape{6, 3, 3, 4}),
+                        std::make_tuple(InputShape{3, 3, 64, DYN},      WeightsShape{5, 3, 3, 4}),
+                        std::make_tuple(InputShape{DYN, DYN, DYN},      WeightsShape{5, 3, 1}),
+                        std::make_tuple(InputShape{DYN, 3, 10},         WeightsShape{3, 3, 1}),
+                        std::make_tuple(InputShape{2, DYN, 9},          WeightsShape{2, 3, 2}),
+                        std::make_tuple(InputShape{3, 3, DYN},          WeightsShape{1, 3, 3})));
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_shuffle_channels3_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_shuffle_channels3_test.cpp
new file mode 100644
index 00000000000..dbeccd5f247
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_shuffle_channels3_test.cpp
@@ -0,0 +1,182 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+std::shared_ptr<ngraph::Function> buildInputGraph(int64_t axis, int64_t group, const ::PartialShape& p) {
+    auto input = std::make_shared<::opset3::Parameter>(::element::f32, p);
+    auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input, axis, group);
+    shuffle_channels->set_friendly_name("shc");
+
+    auto f = std::make_shared<::Function>(::NodeVector{shuffle_channels}, ::ParameterVector{input});
+
+    ::pass::InitNodeInfo().run_on_function(f);
+    ::pass::ConvertShuffleChannels3().run_on_function(f);
+    f->validate_nodes_and_infer_types();
+    return f;
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsAxis0) {
+    int64_t group = 4;
+    auto ps = ::PartialShape{12, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()};
+    std::shared_ptr<ngraph::Function> f = buildInputGraph(0, group, ps), f_ref(nullptr);
+    ASSERT_NO_THROW(check_rt_info(f));
+
+    auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+    auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+    auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+    auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+            original_shape->output(0),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+            ::opset2::Constant::create(element::i64, Shape({2}), {1, 3}));
+
+    ::OutputVector new_dims = {
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1}),
+            std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true)};
+
+    auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+    auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+    auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+                                                           ::opset2::Constant::create(element::i64, Shape({3}),
+                                                                                      {1, 0, 2}));
+    auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+    f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsAxis1) {
+    int64_t group = 4;
+    auto ps = ::PartialShape{Dimension::dynamic(), 12, Dimension::dynamic(), Dimension::dynamic()};
+    std::shared_ptr<ngraph::Function> f = buildInputGraph(1, group, ps), f_ref(nullptr);
+    ASSERT_NO_THROW(check_rt_info(f));
+
+    auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+    auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+    auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+    auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+            original_shape->output(0),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+            ::opset2::Constant::create(element::i64, Shape({3}), {1, 1, 2}));
+
+    ::OutputVector new_dims = {
+            std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1}),
+            std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)};
+
+    auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+    auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+    auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+                                                           ::opset2::Constant::create(element::i64, Shape({4}),
+                                                                                      {0, 2, 1, 3}));
+    auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+    f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsAxis2) {
+    int64_t group = 4;
+    auto ps = ::PartialShape{Dimension::dynamic(), Dimension::dynamic(), 12, Dimension::dynamic()};
+    std::shared_ptr<ngraph::Function> f = buildInputGraph(2, group, ps), f_ref(nullptr);
+    ASSERT_NO_THROW(check_rt_info(f));
+
+    auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+    auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+    auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+    auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+            original_shape->output(0),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+            ::opset2::Constant::create(element::i64, Shape({3}), {2, 1, 1}));
+
+    ::OutputVector new_dims = {
+            std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1}),
+            std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)};
+
+    auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+    auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+    auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+                                                           ::opset2::Constant::create(element::i64, Shape({4}),
+                                                                                      {0, 2, 1, 3}));
+    auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+    f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsLastAxis) {
+    int64_t group = 4;
+    auto ps = ::PartialShape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 12};
+    std::shared_ptr<ngraph::Function> f = buildInputGraph(-1, group, ps), f_ref(nullptr);
+    ASSERT_NO_THROW(check_rt_info(f));
+
+    auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+    auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+    auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+    auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+            original_shape->output(0),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+            ::opset2::Constant::create(element::i64, Shape({2}), {3, 1}));
+
+    ::OutputVector new_dims = {
+            std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+            ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1})};
+
+    auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+    auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+    auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+                                                           ::opset2::Constant::create(element::i64, Shape({3}),
+                                                                                      {0, 2, 1}));
+    auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+    f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp
index 42194e7a496..3d432f8c9cb 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp
@@ -8,6 +8,7 @@
 #include <assert.h>
 
 #include <ngraph/function.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
 
 std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Function> & f1, const std::shared_ptr<ngraph::Function> & f2) {
     /*
@@ -27,6 +28,8 @@ std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Fun
         return std::string(typeInfo.name) + "/" + std::to_string(typeInfo.version);
     };
 
+    std::ostringstream err_log;
+
     std::queue<std::pair<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node> > > q;
     q.push({f1_results[0], f2_results[0]});
     while (!q.empty()) {
@@ -42,17 +45,29 @@ std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Fun
             return {false, "Number of inputs is different: " + std::to_string(node1->inputs().size()) + " and " + std::to_string(node2->inputs().size())};
         }
 
+        if (node1->outputs().size() != node2->outputs().size()) {
+            return {false, "Number of outputs is different: " + std::to_string(node1->outputs().size()) + " and " + std::to_string(node2->outputs().size())};
+        }
+
         for (int i = 0; i < node1->inputs().size(); ++i) {
-            if (!node1->input(i).get_partial_shape().compatible(node2->input(i).get_partial_shape())) {
-                std::ostringstream out("Different shape detected");
-                out << node1->input(i).get_partial_shape() << " and " << node2->input(i).get_partial_shape();
-                return {false, out.str()};
+            if (!node1->input(i).get_partial_shape().same_scheme(node2->input(i).get_partial_shape())) {
+                err_log << "Different shape detected" << std::endl
+                        << node1->description() << " Input(" << i << ") " << node1->input(i).get_partial_shape() << " and "
+                        << node2->description() << " Input(" << i << ") " << node2->input(i).get_partial_shape() << std::endl;
             }
 
             q.push({node1->input_value(i).get_node_shared_ptr(), node2->input_value(i).get_node_shared_ptr()});
         }
+
+        for (int i = 0; i < node1->outputs().size(); ++i) {
+            if (!node1->output(i).get_partial_shape().same_scheme(node2->output(i).get_partial_shape())) {
+                err_log << "Different shape detected" << std::endl
+                        << node1->description() << " Output(" << i << ") " << node1->output(i).get_partial_shape() << " and "
+                        << node2->description() << " Output(" << i << ") " << node2->output(i).get_partial_shape() << std::endl;
+            }
+        }
     }
-    return {true, ""};
+    return {err_log.str().empty(), err_log.str()};
 }
 
 void check_rt_info(const std::shared_ptr<ngraph::Function> & f) {
@@ -74,4 +89,9 @@ void check_rt_info(const std::shared_ptr<ngraph::Function> & f) {
     if (!err_msg.empty()) {
         throw ngraph::ngraph_error(err_msg);
     }
+}
+
+void visualize_function(std::shared_ptr<ngraph::Function> f, const std::string & file_name) {
+    std::vector<std::shared_ptr<ngraph::Function> > g{f};
+    ngraph::pass::VisualizeTree(file_name).run_on_module(g);
 }
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp
index 01a9ee266cd..a1845f99ddc 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp
@@ -17,4 +17,6 @@ using TransformationTests = CommonTestUtils::TestsCommon;
 
 std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Function> & f1, const std::shared_ptr<ngraph::Function> & f2);
 
-void check_rt_info(const std::shared_ptr<ngraph::Function> & f);
\ No newline at end of file
+void check_rt_info(const std::shared_ptr<ngraph::Function> & f);
+
+void visualize_function(std::shared_ptr<ngraph::Function> f, const std::string & file_name);
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/conv_bias_fusion.cpp b/inference-engine/tests/functional/inference_engine/transformations/primitives_priority_test.cpp
similarity index 98%
rename from inference-engine/tests/functional/inference_engine/transformations/conv_bias_fusion.cpp
rename to inference-engine/tests/functional/inference_engine/transformations/primitives_priority_test.cpp
index c89a7fdf33f..4c3c9e0e28f 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/conv_bias_fusion.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/primitives_priority_test.cpp
@@ -45,7 +45,7 @@ TEST(TransformationTests, ConvBiasFusion) {
 
     InferenceEngine::CNNNetwork network(f);
 
-    // Set PrimitivesPriority to all Convolutinos
+    // Set PrimitivesPriority to all Convolutions
     auto nGraph = network.getFunction();
     ASSERT_TRUE(nGraph);
     for (auto & op : nGraph->get_ops()) {
diff --git a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
index 7028797c313..2952e5cd492 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
@@ -98,6 +98,7 @@ private:
 TEST_P(TransposeToReshapeTests, CompareFunctions) {
     ngraph::pass::InitNodeInfo().run_on_function(f);
     ngraph::pass::AlgebraicSimplification().run_on_function(f);
+    f->validate_nodes_and_infer_types();
     ASSERT_NO_THROW(check_rt_info(f));
     auto res = compare_functions(f, f_ref);
     ASSERT_TRUE(res.first) << res.second;
@@ -125,7 +126,7 @@ INSTANTIATE_TEST_CASE_P(ReshapeWithConstant, TransposeToReshapeTests,
         testing::Values(std::make_tuple(InputShape{1, 3, 64, 1},   TransposeOrder{0, 1, 3, 2}, ReferenceParams({1, 3, 1, 64})),
                         std::make_tuple(InputShape{1, 3, 1, 64},   TransposeOrder{1, 0, 3, 2}, ReferenceParams({3, 1, 64, 1})),
                         std::make_tuple(InputShape{DYN, DYN, 1},   TransposeOrder{0, 2, 1},    ReferenceParams({0, 1, -1})),
-                        std::make_tuple(InputShape{1, 1, DYN},     TransposeOrder{2, 1, 0},    ReferenceParams({-1, 1, 1})),
+                        std::make_tuple(InputShape{1, 1, DYN},     TransposeOrder{2, 1, 0},    ReferenceParams({-1, 0, 1})),
                         std::make_tuple(InputShape{DYN, 1, 64, 1}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({1, -1, 1, 64}))));
 
 INSTANTIATE_TEST_CASE_P(ReshapeWithGather, TransposeToReshapeTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp
new file mode 100644
index 00000000000..46f74faec2c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_constants.hpp>
+#include "behavior/add_output.hpp"
+#include "functional_test_utils/test_model/test_model.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+InferenceEngine::CNNNetwork getTargetNetwork() {
+    auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32);
+    auto ie = PluginCache::get().ie();
+    return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
+}
+
+addOutputsParams testCases[] = {addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_CPU)};
+
+INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputsTest, ::testing::ValuesIn(testCases),
+                        AddOutputsTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_preprocess.cpp
new file mode 100644
index 00000000000..0946bba776d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_preprocess.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi-device/multi_device_config.hpp"
+
+#include "behavior/set_preprocess.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32,
+            InferenceEngine::Precision::FP16
+    };
+
+    const std::vector<std::map<std::string, std::string>> configs = {
+            {},
+            {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
+            {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "0"}, {InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, "1"}}
+    };
+
+    const std::vector<std::map<std::string, std::string>> multiConfigs = {
+            {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                    ::testing::ValuesIn(configs)),
+                            PreProcessTests::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+                                    ::testing::ValuesIn(multiConfigs)),
+                            PreProcessTests::getTestCaseName);
+}  // namespace
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp
deleted file mode 100644
index 008d7588194..00000000000
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (C) 2020 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "other/add_output.hpp"
-
-const auto addOutputParams =
-    ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_CPU));
-
-INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
-                        AddOutputTestsCommonClass::getTestCaseName);
-
-TEST_P(AddOutputTestsCommonClass, basic) {
-    run_test();
-}
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
new file mode 100644
index 00000000000..9ef51d02473
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<size_t> numOutChannels = {1, 5, 16};
+
+/* ============= 2D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 3, 30, 30},
+                                                         {1, 16, 10, 10},
+                                                         {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}, {3, 5}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}, {1, 3}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}, {1, 1}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}, {2, 2}};
+
+const auto conv2DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv2DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv2DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv2DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 3, 10, 10, 10},
+                                                         {1, 16, 5, 5, 5},
+                                                         {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}, {1, 1, 1}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}, {2, 2, 2}};
+
+const auto conv3DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv3DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv3DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv3DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp
new file mode 100644
index 00000000000..2c88ef93189
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/cum_sum.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<std::vector<size_t>> shapes = {
+    {16},
+    {9, 15},
+    {16, 10, 12},
+    {5, 14, 5, 7},
+    {7, 8, 6, 7, 13}
+};
+
+const std::vector<InferenceEngine::Precision> inputPrecision = {
+    InferenceEngine::Precision::I8,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<int64_t> axes = { 0, 1, 2, 3, 4 };
+const std::vector<int64_t> negativeAxes = { -1, -2, -3, -4, -5 };
+
+const std::vector<bool> exclusive = {true, false};
+const std::vector<bool> reverse =   {true, false};
+
+const auto testCasesNegativeAxis = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5}),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::ValuesIn(negativeAxes),
+    ::testing::ValuesIn(exclusive),
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_0 = ::testing::Combine(
+    ::testing::ValuesIn(shapes),
+    ::testing::ValuesIn(inputPrecision),
+    ::testing::Values(axes[0]),
+    ::testing::ValuesIn(exclusive),
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_1 = ::testing::Combine(
+    ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 1, shapes.end())),
+    ::testing::ValuesIn(inputPrecision),
+    ::testing::Values(axes[1]),
+    ::testing::ValuesIn(exclusive),
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_2 = ::testing::Combine(
+    ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 2, shapes.end())),
+    ::testing::ValuesIn(inputPrecision),
+    ::testing::Values(axes[2]),
+    ::testing::ValuesIn(exclusive),
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_3 = ::testing::Combine(
+    ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 3, shapes.end())),
+    ::testing::ValuesIn(inputPrecision),
+    ::testing::Values(axes[3]),
+    ::testing::ValuesIn(exclusive),
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_4 = ::testing::Combine(
+    ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 4, shapes.end())),
+    ::testing::ValuesIn(inputPrecision),
+    ::testing::Values(axes[4]),
+    ::testing::ValuesIn(exclusive),
+    ::testing::ValuesIn(reverse),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_negative_axis, CumSumLayerTest, testCasesNegativeAxis, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_0, CumSumLayerTest, testCasesAxis_0, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_1, CumSumLayerTest, testCasesAxis_1, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_2, CumSumLayerTest, testCasesAxis_2, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_3, CumSumLayerTest, testCasesAxis_3, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_4, CumSumLayerTest, testCasesAxis_4, CumSumLayerTest::getTestCaseName);
+
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp
new file mode 100644
index 00000000000..bd1b411250d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::vector<EltwiseOpType> operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY };
+std::vector<ParameterInputIdx> primary_input_idx = { 0, 1 };
+std::vector<InputLayerType> secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER };
+std::vector<InferenceEngine::Precision> net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 };
+std::vector<InferenceEngine::SizeVector> flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} };
+std::vector<InferenceEngine::SizeVector> non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} };
+std::map<std::string, std::string> additional_config = {};
+
+const auto FlatEltwiseParams =
+::testing::Combine(
+    ::testing::ValuesIn(operations),
+    ::testing::ValuesIn(primary_input_idx),
+    ::testing::ValuesIn(secondary_input_types),
+    ::testing::ValuesIn(net_precisions),
+    ::testing::ValuesIn(flat_shapes),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU),
+    ::testing::Values(additional_config));
+
+const auto NonFlatEltwiseParams =
+::testing::Combine(
+    ::testing::ValuesIn(operations),
+    ::testing::ValuesIn(primary_input_idx),
+    ::testing::ValuesIn(secondary_input_types),
+    ::testing::ValuesIn(net_precisions),
+    ::testing::ValuesIn(non_flat_shapes),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU),
+    ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams,
+    EltwiseLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams,
+    EltwiseLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
new file mode 100644
index 00000000000..d4f5e4117a0
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/fake_quantize.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6}};
+const std::vector<std::vector<size_t>> constShapes = {{1}};
+const std::vector<size_t> levels = {16, 255, 256};
+
+const auto fqParams = ::testing::Combine(
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(constShapes)
+);
+
+INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
+                        ::testing::Combine(
+                                fqParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        FakeQuantizeLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..275a506d2b9
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
@@ -0,0 +1,113 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/group_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+const std::vector<size_t> numGroups = {2, 8, 16};
+
+/* ============= 2D GroupConvolution ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10},
+                                                         {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData2D_ExplicitPadding, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData2DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData2D_AutoPadValid, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData2DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D GroupConvolution ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5},
+                                                         {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData3D_ExplicitPadding, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData3DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData3D_AutoPadValid, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData3DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index 536f0bb224a..a0e85efd686 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -10,6 +10,16 @@
 std::vector<std::string> disabledTestPatterns() {
     return {
         // TODO: Issue 26264
-        R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)"
+        R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)",
+        // TODO: Issue 31839
+        R"(.*(QuantConvBackpropData3D).*)",
+        // TODO: Issue 31841
+        R"(.*(QuantGroupConvBackpropData3D).*)",
+        // TODO: Issue 31843
+        R"(.*(QuantGroupConvBackpropData2D)*QG=Perchannel.*)",
+        // TODO: Issue 32023
+        R"(.*(QuantGroupConvBackpropData2D)*QG=Pertensor.*)",
+        // TODO: Issue 31845
+        R"(.*(FakeQuantize).*)"
     };
 }
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..cd1c42caccd
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/quantized_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::helpers;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+
+const std::vector<size_t > levels = {256};
+// FIXME: Perchannel tests fail because of bug in LPT
+const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
+
+/* ============= 2D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+
+const auto quantConvBackpropData2DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantConvBackpropData2D, QuantConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantConvBackpropData2DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        QuantConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto quantConvBackpropData3DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantConvBackpropData3D, QuantConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantConvBackpropData3DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        QuantConvBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..0caf6ddf7fe
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::helpers;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+const std::vector<size_t> numGroups = {2, 8, 16};
+
+const std::vector<size_t > levels = {256};
+const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
+
+/* ============= 2D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+
+const auto quantGroupConvBackpropData2DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantGroupConvBackpropData2D, QuantGroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantGroupConvBackpropData2DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        QuantGroupConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto quantGroupConvBackpropData3DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantGroupConvBackpropData3D, QuantGroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantGroupConvBackpropData3DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        QuantGroupConvBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/add_output.cpp
new file mode 100644
index 00000000000..2ff8dbbf543
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/add_output.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_constants.hpp>
+#include "behavior/add_output.hpp"
+#include "functional_test_utils/test_model/test_model.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+InferenceEngine::CNNNetwork getTargetNetwork() {
+    auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32);
+    auto ie = PluginCache::get().ie();
+    return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
+}
+addOutputsParams testCases[] = {addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_GNA)};
+
+INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputsTest, ::testing::ValuesIn(testCases), AddOutputsTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp
deleted file mode 100644
index b98bb423a6d..00000000000
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "other/add_output.hpp"
-
-const auto addOutputParams =
-    ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_GNA));
-
-INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
-                        AddOutputTestsCommonClass::getTestCaseName);
-
-TEST_P(AddOutputTestsCommonClass, basic) {
-    run_test();
-}
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/eltwise.cpp
new file mode 100644
index 00000000000..bd63495af04
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/eltwise.cpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::vector<EltwiseOpType> operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY };
+std::vector<ParameterInputIdx> primary_input_idx = { 0, 1 };
+std::vector<InputLayerType> secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER };
+std::vector<InferenceEngine::Precision> net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 };
+std::vector<InferenceEngine::SizeVector> flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} };
+std::vector<InferenceEngine::SizeVector> non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} };
+std::map<std::string, std::string> additional_config = { {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+                                                         {"GNA_SCALE_FACTOR_0", "1638.4"}, {"GNA_SCALE_FACTOR_1", "1638.4"} };
+
+const auto FlatEltwiseParams =
+::testing::Combine(
+    ::testing::ValuesIn(operations),
+    ::testing::ValuesIn(primary_input_idx),
+    ::testing::ValuesIn(secondary_input_types),
+    ::testing::ValuesIn(net_precisions),
+    ::testing::ValuesIn(flat_shapes),
+    ::testing::Values(CommonTestUtils::DEVICE_GNA),
+    ::testing::Values(additional_config));
+
+const auto NonFlatEltwiseParams =
+::testing::Combine(
+    ::testing::ValuesIn(operations),
+    ::testing::ValuesIn(primary_input_idx),
+    ::testing::ValuesIn(secondary_input_types),
+    ::testing::ValuesIn(net_precisions),
+    ::testing::ValuesIn(non_flat_shapes),
+    ::testing::Values(CommonTestUtils::DEVICE_GNA),
+    ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams,
+    EltwiseLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(DISABLED_Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams,
+    EltwiseLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
index 43a2a659d85..c17bf7445ce 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -12,4 +12,4 @@ std::vector<std::string> disabledTestPatterns() {
         // TODO: FIX BUG 31661
         ".*Behavior.*Callback.*"
     };
-}
\ No newline at end of file
+}
diff --git a/inference-engine/tests/functional/plugin/gpu/behavior/core_threading_tests.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp
similarity index 100%
rename from inference-engine/tests/functional/plugin/gpu/behavior/core_threading_tests.cpp
rename to inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/set_preprocess.cpp
new file mode 100644
index 00000000000..411861cd098
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/set_preprocess.cpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi-device/multi_device_config.hpp"
+
+#include "behavior/set_preprocess.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32,
+            InferenceEngine::Precision::FP16
+    };
+
+    const std::vector<std::map<std::string, std::string>> configs = {
+            {},
+    };
+
+    const std::vector<std::map<std::string, std::string>> multiConfigs = {
+            {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                                    ::testing::ValuesIn(configs)),
+                            PreProcessTests::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+                                    ::testing::ValuesIn(multiConfigs)),
+                            PreProcessTests::getTestCaseName);
+}  // namespace
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
new file mode 100644
index 00000000000..2e19bec73bb
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<size_t> numOutChannels = {1, 5, 16};
+
+/* ============= 2D ConvolutionBackpropData ============= */
+const std::vector<InferenceEngine::Precision> netPrecisions2D = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 3, 30, 30},
+                                                         {1, 16, 10, 10},
+                                                         {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}, {3, 5}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 3}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}, {1, 1}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto conv2DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv2DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv2DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions2D),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv2DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions2D),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<InferenceEngine::Precision> netPrecisions3D = {
+        InferenceEngine::Precision::FP32,
+};
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 3, 10, 10, 10},
+                                                         {1, 16, 5, 5, 5},
+                                                         {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}, {1, 1, 1}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto conv3DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv3DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv3DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions3D),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+                        ::testing::Combine(
+                                conv3DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions3D),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp
new file mode 100644
index 00000000000..f9688395115
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::vector<EltwiseOpType> operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY };
+std::vector<ParameterInputIdx> primary_input_idx = { 0, 1 };
+std::vector<InputLayerType> secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER };
+std::vector<InferenceEngine::Precision> net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 };
+std::vector<InferenceEngine::SizeVector> flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} };
+std::vector<InferenceEngine::SizeVector> non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} };
+std::map<std::string, std::string> additional_config = {};
+
+const auto FlatEltwiseParams =
+::testing::Combine(
+    ::testing::ValuesIn(operations),
+    ::testing::ValuesIn(primary_input_idx),
+    ::testing::ValuesIn(secondary_input_types),
+    ::testing::ValuesIn(net_precisions),
+    ::testing::ValuesIn(flat_shapes),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU),
+    ::testing::Values(additional_config));
+
+const auto NonFlatEltwiseParams =
+::testing::Combine(
+    ::testing::ValuesIn(operations),
+    ::testing::ValuesIn(primary_input_idx),
+    ::testing::ValuesIn(secondary_input_types),
+    ::testing::ValuesIn(net_precisions),
+    ::testing::ValuesIn(non_flat_shapes),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU),
+    ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams,
+    EltwiseLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams,
+    EltwiseLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/set_preprocess.cpp
new file mode 100644
index 00000000000..7df275ae6a5
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/set_preprocess.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi-device/multi_device_config.hpp"
+
+#include "behavior/set_preprocess.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP16
+    };
+
+    const std::vector<std::map<std::string, std::string>> configs = {
+            {},
+    };
+
+    const std::vector<std::map<std::string, std::string>> multiConfigs = {
+            {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_MYRIAD}}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
+                                    ::testing::ValuesIn(configs)),
+                            PreProcessTests::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+                                    ::testing::ValuesIn(multiConfigs)),
+                            PreProcessTests::getTestCaseName);
+}  // namespace
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/equal.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/equal.cpp
new file mode 100644
index 00000000000..77ac1e42b23
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/equal.cpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/equal.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+
+#include <vector>
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+std::vector<std::vector<InferenceEngine::SizeVector>> inShapes = {
+        {{200}, {200}},
+        {{1000}, {1}},
+        {{1, 256, 512}, {1, 256, 512}},
+        {{1}, {1, 256, 512}},
+};
+
+INSTANTIATE_TEST_CASE_P(equalS32, EqualLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(inShapes),
+                ::testing::Values(InferenceEngine::Precision::I32),
+                ::testing::Values(InferenceEngine::Precision::I32),
+                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+        EqualLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/greater.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/greater.cpp
new file mode 100644
index 00000000000..7abfa6fead2
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/greater.cpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/greater.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+
+#include <vector>
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+std::vector<std::vector<InferenceEngine::SizeVector>> inShapes = {
+        {{200}, {200}},
+        {{1000}, {1}},
+        {{1, 256, 512}, {1, 256, 512}},
+        {{1}, {1, 256, 512}},
+};
+
+INSTANTIATE_TEST_CASE_P(greaterS32, GreaterLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(inShapes),
+                ::testing::Values(InferenceEngine::Precision::FP16),
+                ::testing::Values(InferenceEngine::Precision::I32),
+                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+        GreaterLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/add_output.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/add_output.hpp
new file mode 100644
index 00000000000..81a5770d804
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/add_output.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+
+#include "common_test_utils/test_common.hpp"
+#include <ie_core.hpp>
+
+typedef std::tuple<
+        InferenceEngine::CNNNetwork, // CNNNetwork to work with
+        std::vector<std::string>,    // Target layers to add as outputs
+        std::string>                 // Target device name
+        addOutputsParams;
+
+class AddOutputsTest : public CommonTestUtils::TestsCommon,
+                       public testing::WithParamInterface<addOutputsParams> {
+protected:
+    InferenceEngine::CNNNetwork net;
+    std::vector<std::string> outputsToAdd;
+    std::string deviceName;
+
+    void SetUp();
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<addOutputsParams> &obj);
+};
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
new file mode 100644
index 00000000000..4c3402d6166
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include "ie_extension.h"
+#include <condition_variable>
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+    typedef std::tuple<
+            InferenceEngine::Precision,         // Network precision
+            std::string,                        // Device name
+            std::map<std::string, std::string>  // Config
+    > PreProcessParams;
+
+class PreProcessTests : public testing::WithParamInterface<PreProcessParams>,
+        public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<PreProcessParams> obj);
+
+protected:
+    void SetUp() override;
+    void TearDown() override;
+};
+
+}  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp b/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp
deleted file mode 100644
index b1fc8d20104..00000000000
--- a/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (C) 2020 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <map>
-
-#include "common_test_utils/common_layers_params.hpp"
-#include "common_test_utils/common_utils.hpp"
-#include "common_test_utils/test_common.hpp"
-#include "common_test_utils/test_constants.hpp"
-#include "common_test_utils/xml_net_builder/ir_net.hpp"
-#include "common_test_utils/xml_net_builder/xml_filler.hpp"
-#include "ie_core.hpp"
-
-class AddOutputTestsCommonClass : public CommonTestUtils::TestsCommon,
-                                  public testing::WithParamInterface<std::tuple<std::string, std::string>> {
-private:
-    static std::string generate_model();
-
-public:
-    static std::string getTestCaseName(testing::TestParamInfo<std::tuple<std::string, std::string>> obj);
-    void run_test();
-};
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp
new file mode 100644
index 00000000000..4b8940bae76
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+        InferenceEngine::SizeVector,    // Kernel size
+        InferenceEngine::SizeVector,    // Strides
+        std::vector<ptrdiff_t>,         // Pad begin
+        std::vector<ptrdiff_t>,         // Pad end
+        InferenceEngine::SizeVector,    // Dilation
+        size_t,                         // Num out channels
+        ngraph::op::PadType             // Padding type
+> convBackpropDataSpecificParams;
+typedef std::tuple<
+        convBackpropDataSpecificParams,
+        InferenceEngine::Precision,     // Net precision
+        InferenceEngine::SizeVector,    // Input shapes
+        LayerTestsUtils::TargetDevice   // Device name
+> convBackpropDataLayerTestParamsSet;
+namespace LayerTestsDefinitions {
+
+
+class ConvolutionBackpropDataLayerTest : public testing::WithParamInterface<convBackpropDataLayerTestParamsSet>,
+                                         public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/cum_sum.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/cum_sum.hpp
new file mode 100644
index 00000000000..161e4ddfa2e
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/cum_sum.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        InferenceEngine::SizeVector, // Input shapes
+        InferenceEngine::Precision,  // Input precision
+        int64_t,                     // Axis
+        bool,                        // Exclusive
+        bool,                        // Reverse
+        std::string> cumSumParams;   // Device name
+
+class CumSumLayerTest : public testing::WithParamInterface<cumSumParams>, public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<cumSumParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/eltwise.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/eltwise.hpp
new file mode 100644
index 00000000000..c7519b8d7f4
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/eltwise.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "ie_core.hpp"
+
+namespace EltwiseTestNamespace {
+
+    using ParameterInputIdx = int;
+    enum class InputLayerType {
+        CONSTANT,
+        PARAMETER
+    };
+    enum class EltwiseOpType {
+        ADD,
+        SUBSTRACT,
+        MULTIPLY
+    };
+    const char* InputLayerType_to_string(InputLayerType lt);
+    const char* EltwiseOpType_to_string(EltwiseOpType eOp);
+}// namespace EltwiseTestNamespace
+
+typedef std::tuple<
+    EltwiseTestNamespace::EltwiseOpType,       // eltwise op type
+    EltwiseTestNamespace::ParameterInputIdx,   // primary input idx
+    EltwiseTestNamespace::InputLayerType,      // secondary input type
+    InferenceEngine::Precision,                // Net precision
+    InferenceEngine::SizeVector,               // Input shapes
+    std::string,                               // Device name
+    std::map<std::string, std::string>         // Additional network configuration
+> eltwiseLayerTestParamsSet;
+
+class EltwiseLayerTest : public testing::WithParamInterface<eltwiseLayerTestParamsSet>,
+    public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<eltwiseLayerTestParamsSet> obj);
+};
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/equal.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/equal.hpp
new file mode 100644
index 00000000000..d2b04edc03e
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/equal.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+using EqualTestParam = typename std::tuple<
+        std::vector<InferenceEngine::SizeVector>,  // Input shapes
+        InferenceEngine::Precision,                // Input precision
+        InferenceEngine::Precision,                // Output precision
+        LayerTestsUtils::TargetDevice>;            // Config
+
+class EqualLayerTest : public testing::WithParamInterface<EqualTestParam>,
+                       public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<EqualTestParam>& obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
new file mode 100644
index 00000000000..32e1816bfdf
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+        size_t,             // levels
+        std::vector<size_t> // const inputs shape
+> fqSpecificParams;
+typedef std::tuple<
+        fqSpecificParams,
+        InferenceEngine::Precision,   // Net precision
+        InferenceEngine::SizeVector,  // Input shapes
+        LayerTestsUtils::TargetDevice // Device name
+> fqLayerTestParamsSet;
+namespace LayerTestsDefinitions {
+
+
+class FakeQuantizeLayerTest : public testing::WithParamInterface<fqLayerTestParamsSet>,
+                              public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/greater.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/greater.hpp
new file mode 100644
index 00000000000..4d740ce218d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/greater.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+using GreaterTestParam = typename std::tuple<
+        std::vector<InferenceEngine::SizeVector>,  // Input shapes
+        InferenceEngine::Precision,                // Input precision
+        InferenceEngine::Precision,                // Output precision
+        LayerTestsUtils::TargetDevice>;            // Config
+
+class GreaterLayerTest : public testing::WithParamInterface<GreaterTestParam>,
+                         public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<GreaterTestParam>& obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/group_convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/group_convolution_backprop_data.hpp
new file mode 100644
index 00000000000..cfe92d9e039
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/group_convolution_backprop_data.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+        InferenceEngine::SizeVector,
+        InferenceEngine::SizeVector,
+        std::vector<ptrdiff_t>,
+        std::vector<ptrdiff_t>,
+        InferenceEngine::SizeVector,
+        size_t,
+        size_t,
+        ngraph::op::PadType> groupConvBackpropDataSpecificParams;
+typedef std::tuple<
+        groupConvBackpropDataSpecificParams,
+        InferenceEngine::Precision,
+        InferenceEngine::SizeVector,
+        LayerTestsUtils::TargetDevice> groupConvBackpropDataLayerTestParamsSet;
+
+namespace LayerTestsDefinitions {
+
+class GroupConvBackpropDataLayerTest : public testing::WithParamInterface<groupConvBackpropDataLayerTestParamsSet>,
+                                       public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<groupConvBackpropDataLayerTestParamsSet> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_convolution_backprop_data.hpp
new file mode 100644
index 00000000000..d5b01edc63c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_convolution_backprop_data.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+        InferenceEngine::SizeVector,
+        InferenceEngine::SizeVector,
+        std::vector<ptrdiff_t>,
+        std::vector<ptrdiff_t>,
+        InferenceEngine::SizeVector,
+        size_t,
+        ngraph::op::PadType,
+        size_t,
+        ngraph::helpers::QuantizationGranularity> quantConvBackpropDataSpecificParams;
+typedef std::tuple<
+        quantConvBackpropDataSpecificParams,
+        InferenceEngine::Precision,
+        InferenceEngine::SizeVector,
+        LayerTestsUtils::TargetDevice> quantConvBackpropDataLayerTestParamsSet;
+
+namespace LayerTestsDefinitions {
+
+class QuantConvBackpropDataLayerTest : public testing::WithParamInterface<quantConvBackpropDataLayerTestParamsSet>,
+                                            public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<quantConvBackpropDataLayerTestParamsSet> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_group_convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_group_convolution_backprop_data.hpp
new file mode 100644
index 00000000000..99212d9fcdd
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_group_convolution_backprop_data.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+        InferenceEngine::SizeVector,
+        InferenceEngine::SizeVector,
+        std::vector<ptrdiff_t>,
+        std::vector<ptrdiff_t>,
+        InferenceEngine::SizeVector,
+        size_t,
+        size_t,
+        ngraph::op::PadType,
+        size_t,
+        ngraph::helpers::QuantizationGranularity> quantGroupConvBackpropDataSpecificParams;
+typedef std::tuple<
+        quantGroupConvBackpropDataSpecificParams,
+        InferenceEngine::Precision,
+        InferenceEngine::SizeVector,
+        LayerTestsUtils::TargetDevice> quantGroupConvBackpropDataLayerTestParamsSet;
+
+namespace LayerTestsDefinitions {
+
+class QuantGroupConvBackpropDataLayerTest : public testing::WithParamInterface<quantGroupConvBackpropDataLayerTestParamsSet>,
+                                            public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<quantGroupConvBackpropDataLayerTestParamsSet> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp
new file mode 100644
index 00000000000..df2f09eba51
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <common_test_utils/common_utils.hpp>
+#include "behavior/add_output.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+std::string AddOutputsTest::getTestCaseName(const testing::TestParamInfo<addOutputsParams> &obj) {
+    std::ostringstream results;
+    InferenceEngine::CNNNetwork net;
+    std::vector<std::string> outputsToAdd;
+    std::string deviceName;
+    std::tie(net, outputsToAdd, deviceName) = obj.param;
+    results << "Outputs:" << CommonTestUtils::vec2str<std::string>(outputsToAdd);
+    return results.str();
+}
+
+void AddOutputsTest::SetUp() {
+    std::tie(net, outputsToAdd, deviceName) = GetParam();
+}
+
+TEST_P(AddOutputsTest, smoke_CheckOutputExist) {
+    std::vector<std::string> expectedOutputs = outputsToAdd;
+    for (const auto &out : net.getOutputsInfo()) {
+        expectedOutputs.push_back(out.first);
+    }
+    for (const auto &out : outputsToAdd) {
+        net.addOutput(out);
+    }
+    auto ie = PluginCache::get().ie(deviceName);
+    auto executableNet = ie->LoadNetwork(net, deviceName);
+    auto outputs = executableNet.GetOutputsInfo();
+
+    for (const auto &out : expectedOutputs) {
+        ASSERT_TRUE(outputs.count(out)) << "Layer " << out << " expected to be in network outputs but it's not!";
+    }
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp
index b7a14a54819..544b3729096 100644
--- a/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp
@@ -290,4 +290,4 @@ TEST_P(CallbackTests, returnGeneralErrorIfCallbackThrowException) {
     ASSERT_NE(std::string(resp.msg).find("returnGeneralErrorIfCallbackThrowException"), std::string::npos);
 }
 
-}  // namespace LayerTestsDefinitions
\ No newline at end of file
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/set_preprocess.cpp
new file mode 100644
index 00000000000..8eea245b423
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/set_preprocess.cpp
@@ -0,0 +1,102 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+#include <cpp_interfaces/exception2status.hpp>
+#include "common_test_utils/test_assertions.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ie_preprocess.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "behavior/set_preprocess.hpp"
+
+namespace LayerTestsDefinitions {
+    std::string PreProcessTests::getTestCaseName(testing::TestParamInfo<PreProcessParams> obj) {
+        InferenceEngine::Precision  netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::tie(netPrecision, targetDevice, configuration) = obj.param;
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice;
+        if (!configuration.empty()) {
+            result << "configItem=" << configuration.begin()->first << "_" << configuration.begin()->second;
+        }
+        return result.str();
+    }
+
+    void PreProcessTests::SetUp() {
+        InferenceEngine::Precision netPrecision;
+        std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
+        function = ngraph::builder::subgraph::makeConvPoolRelu();
+    }
+
+    void PreProcessTests::TearDown() {
+        if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
+            PluginCache::get().reset();
+        }
+    }
+
+TEST_P(PreProcessTests, SetPreProcessToInputInfo) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    // Create CNNNetwork from ngrpah::Function
+    InferenceEngine::CNNNetwork cnnNet(function);
+
+    auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess();
+    preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
+
+    // Get Core from cache
+    auto ie = PluginCache::get().ie();
+    // Load CNNNetwork to target plugins
+    auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
+    // Create InferRequest
+    auto req = execNet.CreateInferRequest();
+    {
+        InferenceEngine::ConstInputsDataMap inputsMap = execNet.GetInputsInfo();
+        const auto& name = inputsMap.begin()->second->name();
+        const InferenceEngine::PreProcessInfo *info = &req.GetPreProcess(name.c_str());
+        ASSERT_EQ(info->getResizeAlgorithm(), InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
+        ASSERT_PREPROCESS_INFO_EQ(preProcess, *info);
+    }
+    function.reset();
+    }
+
+TEST_P(PreProcessTests, SetPreProcessToInferRequest) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    // Create CNNNetwork from ngrpah::Function
+    InferenceEngine::CNNNetwork cnnNet(function);
+
+    auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess();
+    preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
+
+    // Get Core from cache
+    auto ie = PluginCache::get().ie();
+    // Load CNNNetwork to target plugins
+    auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
+    // Create InferRequest
+    auto req = execNet.CreateInferRequest();
+    InferenceEngine::ConstInputsDataMap inputsMap = execNet.GetInputsInfo();
+    const auto& name = inputsMap.begin()->second->name();
+    auto inputBlob = FuncTestUtils::createAndFillBlob(
+            cnnNet.getInputsInfo().begin()->second->getTensorDesc());
+    req.SetBlob(cnnNet.getInputsInfo().begin()->first, inputBlob);
+    {
+        const InferenceEngine::PreProcessInfo *info = &req.GetPreProcess(name.c_str());
+        ASSERT_EQ(cnnNet.getInputsInfo().begin()->second->getPreProcess().getResizeAlgorithm(),
+                info->getResizeAlgorithm());
+    }
+    function.reset();
+    }
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp b/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp
deleted file mode 100644
index 3c66b8d0ce6..00000000000
--- a/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (C) 2020 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "other/add_output.hpp"
-
-// TODO: Replace IRBuilder with NGraph when it supports Memory Layer
-std::string AddOutputTestsCommonClass::generate_model() {
-    CommonTestUtils::IRBuilder_v6 test_model_builder("model");
-
-    auto precision = InferenceEngine::Precision::FP32;
-
-    auto Memory_1_layer =
-        test_model_builder.AddLayer("Memory_1", "Memory", precision, {{"id", "r_1-3"}, {"index", "1"}, {"size", "2"}})
-            .AddOutPort({1, 200})
-            .getLayer();
-    auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", precision).AddOutPort({1, 200}).getLayer();
-    auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", precision, {{"operation", "mul"}})
-                               .AddInPort({1, 200})
-                               .AddInPort({1, 200})
-                               .AddOutPort({1, 200})
-                               .getLayer();
-
-    auto Activation_4_layer =
-        test_model_builder.AddLayer("Activation_4", "Activation", precision, {{"type", "sigmoid"}})
-            .AddInPort({1, 200})
-            .AddOutPort({1, 200})
-            .getLayer();
-    auto Memory_5_layer =
-        test_model_builder.AddLayer("Memory_5", "Memory", precision, {{"id", "r_1-3"}, {"index", "0"}, {"size", "2"}})
-            .AddInPort({1, 200})
-            .getLayer();
-
-    test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
-    test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
-    test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
-    test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
-
-    auto serial = test_model_builder.serialize();
-
-    return serial;
-}
-
-std::string AddOutputTestsCommonClass::getTestCaseName(
-    testing::TestParamInfo<std::tuple<std::string, std::string>> obj) {
-    std::string layer;
-    std::string engine;
-
-    std::tie(layer, engine) = obj.param;
-    return layer + "_" + engine;
-}
-
-void AddOutputTestsCommonClass::run_test() {
-    std::string layer_name;
-    std::string engine_type;
-
-    std::tie(layer_name, engine_type) = this->GetParam();
-
-    auto model = this->generate_model();
-
-    InferenceEngine::Core ie;
-    InferenceEngine::CNNNetwork network;
-    InferenceEngine::ExecutableNetwork executableNet;
-
-    auto null_blob = CommonTestUtils::getWeightsBlob(0);
-    network = ie.ReadNetwork(model, null_blob);
-    network.addOutput(layer_name);
-    executableNet = ie.LoadNetwork(network, engine_type);
-
-    auto outputs = executableNet.GetOutputsInfo();
-
-    auto layer_output = outputs[layer_name];
-
-    ASSERT_EQ(true, layer_output && "layer not found in outputs");
-}
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution_backprop_data.cpp
new file mode 100644
index 00000000000..d18cd6736b2
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution_backprop_data.cpp
@@ -0,0 +1,73 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/convolution_backprop_data.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet> obj) {
+    convBackpropDataSpecificParams convBackpropDataParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::tie(convBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+    result << "S" << CommonTestUtils::vec2str(stride) << "_";
+    result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+    result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+    result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+    result << "O=" << convOutChannels << "_";
+    result << "AP=" << padType << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void ConvolutionBackpropDataLayerTest::SetUp() {
+    convBackpropDataSpecificParams convBackpropDataParams;
+    std::vector<size_t> inputShape;
+    auto netPrecision   = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(convBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams;
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(
+            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+    auto convBackpropData = std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(
+            ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin,
+                                                         padEnd, dilation, padType, convOutChannels));
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(convBackpropData)};
+    function = std::make_shared<ngraph::Function>(results, params, "convolutionBackpropData");
+}
+
+TEST_P(ConvolutionBackpropDataLayerTest, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/cum_sum.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/cum_sum.cpp
new file mode 100644
index 00000000000..ebb7d46a610
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/cum_sum.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/cum_sum.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string CumSumLayerTest::getTestCaseName(testing::TestParamInfo<cumSumParams> obj) {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    int64_t axis;
+    bool exclusive, reverse;
+    std::string targetDevice;
+    std::tie(inputShapes, inputPrecision, axis, exclusive, reverse, targetDevice) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "Precision=" << inputPrecision.name() << "_";
+    result << "Axis=" << axis << "_";
+    result << "Exclusive=" << (exclusive ? "TRUE" : "FALSE") << "_";
+    result << "Reverse=" << (reverse ? "TRUE" : "FALSE") << "_";
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void CumSumLayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    bool exclusive, reverse;
+    int64_t axis;
+    std::tie(inputShapes, inputPrecision, axis, exclusive, reverse, targetDevice) = this->GetParam();
+    auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+    ngraph::ParameterVector paramVector;
+    auto paramData = std::make_shared<ngraph::opset1::Parameter>(inType, ngraph::Shape(inputShapes));
+    paramVector.push_back(paramData);
+
+    auto axisNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector<int64_t>{axis})->output(0);
+
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramVector));
+    auto cumSum = std::dynamic_pointer_cast<ngraph::op::CumSum>(ngraph::builder::makeCumSum(paramOuts[0], axisNode, exclusive, reverse));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(cumSum)};
+    function = std::make_shared<ngraph::Function>(results, paramVector, "cumsum");
+}
+
+TEST_P(CumSumLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/eltwise.cpp
new file mode 100644
index 00000000000..a92d607239b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/eltwise.cpp
@@ -0,0 +1,155 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::string EltwiseLayerTest::getTestCaseName(testing::TestParamInfo<eltwiseLayerTestParamsSet> obj) {
+    EltwiseOpType op;
+    ParameterInputIdx primary_input_idx;
+    InputLayerType secondary_input_type;
+    InferenceEngine::Precision prec;
+    InferenceEngine::SizeVector vec;
+    LayerTestsUtils::TargetDevice dev;
+    std::map<std::string, std::string> additional_config;
+    std::tie(op, primary_input_idx, secondary_input_type, prec, vec, dev, additional_config) = obj.param;
+
+    std::ostringstream result;
+    result << "operation=" << EltwiseOpType_to_string(op) << "_";
+    result << "netPRC=" << prec.name() << "_";
+    result << "primaryInputIdx=" << primary_input_idx << "_";
+    result << "secondaryInputType=" << InputLayerType_to_string(secondary_input_type) << "_";
+    result << "inputShapes=" << CommonTestUtils::vec2str(vec) << "_";
+    result << "targetDevice=" << dev;
+    return result.str();
+}
+
+void EltwiseLayerTest::SetUp() {
+    EltwiseOpType op;
+    ParameterInputIdx primary_input_idx;
+    InputLayerType secondary_input_type;
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    ngraph::ParameterVector parameter_inputs;
+    std::map<std::string, std::string> additional_config;
+    std::tie(op, primary_input_idx, secondary_input_type, netPrecision, inputShape, targetDevice, additional_config) = this->GetParam();
+    configuration.insert(additional_config.begin(), additional_config.end());
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    std::shared_ptr<ngraph::Node> input0_node;
+    std::shared_ptr<ngraph::Node> input1_node;
+    auto primary_input = ngraph::builder::makeParams(ngPrc, { inputShape })[0];
+
+    switch (secondary_input_type) {
+    case InputLayerType::CONSTANT:
+    {
+        auto shape_total = 1;
+        for (auto dim : inputShape) {
+            shape_total *= dim;
+        }
+
+        const float min = -10;
+        const float max = 10;
+        const float range = max - min;
+        const float step = range / shape_total;
+
+        std::vector<float> const_vec(shape_total);
+        for (int i = 0; i < shape_total; i++) {
+            const_vec[i] = min + step * i;
+        }
+
+        auto const_vals = ngraph::builder::makeConstant(ngPrc, inputShape, const_vec);
+        parameter_inputs.push_back(primary_input);
+
+        if (primary_input_idx == 0) {
+            input0_node = primary_input;
+            input1_node = const_vals;
+        } else {
+            input0_node = const_vals;
+            input1_node = primary_input;
+        }
+        break;
+    }
+    case InputLayerType::PARAMETER:
+    {
+        auto secondary_input = ngraph::builder::makeParams(ngPrc, { inputShape })[0];
+        if (primary_input_idx == 0) {
+            parameter_inputs.push_back(primary_input);
+            parameter_inputs.push_back(secondary_input);
+            input0_node = primary_input;
+            input1_node = secondary_input;
+        } else {
+            parameter_inputs.push_back(secondary_input);
+            parameter_inputs.push_back(primary_input);
+            input0_node = secondary_input;
+            input1_node = primary_input;
+        }
+        break;
+    }
+    default:
+        ASSERT_EQ("unknown input type", "");
+        break;
+    }
+
+    std::shared_ptr<ngraph::op::util::BinaryElementwiseArithmetic> ngraph_op = nullptr;
+    switch (op) {
+    case EltwiseOpType::ADD:
+        ngraph_op = std::make_shared<ngraph::op::Add>(input0_node, input1_node);
+        break;
+    case EltwiseOpType::MULTIPLY:
+        ngraph_op = std::make_shared<ngraph::op::Multiply>(input0_node, input1_node);
+        break;
+    case EltwiseOpType::SUBSTRACT:
+        ngraph_op = std::make_shared<ngraph::op::Subtract>(input0_node, input1_node);
+        break;
+    default:
+        ASSERT_EQ(std::string("Unknown Eltwise operation type: ") + EltwiseOpType_to_string(op), "");
+        break;
+    }
+    function = std::make_shared<ngraph::Function>(ngraph_op, parameter_inputs, "Eltwise_op");
+}
+
+const char* EltwiseTestNamespace::InputLayerType_to_string(InputLayerType lt) {
+    switch (lt) {
+    case InputLayerType::CONSTANT:
+        return "CONSTANT";
+    case InputLayerType::PARAMETER:
+        return "PARAMETER";
+    default:
+        return "NOT_SUPPORTED_INPUT_LAYER_TYPE";
+    }
+}
+
+const char* EltwiseTestNamespace::EltwiseOpType_to_string(EltwiseOpType eOp) {
+    switch (eOp) {
+    case EltwiseOpType::ADD:
+        return "Sum";
+    case EltwiseOpType::MULTIPLY:
+        return "Prod";
+    case EltwiseOpType::SUBSTRACT:
+        return "Sub";
+    default:
+        return "NOT_SUPPORTED_ELTWISE_OPERATION";
+    }
+}
+
+TEST_P(EltwiseLayerTest, basic) {
+    Run();
+}
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/equal.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/equal.cpp
new file mode 100644
index 00000000000..06907239540
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/equal.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/equal.hpp"
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <ie_core.hpp>
+
+
+namespace LayerTestsDefinitions {
+
+std::string EqualLayerTest::getTestCaseName(const testing::TestParamInfo<EqualTestParam>& obj) {
+    InferenceEngine::Precision inPrecision;
+    InferenceEngine::Precision outPrecision;
+    std::vector<InferenceEngine::SizeVector> inputShapes;
+    std::string targetDevice;
+
+    std::tie(inputShapes, inPrecision, outPrecision, targetDevice) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "inPrc=" << inPrecision.name() << "_";
+    result << "outPrc=" << outPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+
+    return result.str();
+}
+
+void EqualLayerTest::SetUp() {
+    std::vector<InferenceEngine::SizeVector> inputShapes;
+    InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+
+    std::tie(inputShapes, inputPrecision, outPrc, targetDevice) = this->GetParam();
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+    auto paramsVector = ngraph::builder::makeParams(ngPrc, {inputShapes});
+    IE_ASSERT(paramsVector.size() == 2);
+
+    auto equalOp = std::make_shared<ngraph::opset3::Equal>(paramsVector[0], paramsVector[1]);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(equalOp)};
+
+    function = std::make_shared<ngraph::Function>(results, paramsVector, "Equal");
+}
+
+TEST_P(EqualLayerTest, CompareWithRefs) {
+    Run();
+
+    if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+        PluginCache::get().reset();
+    }
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp
new file mode 100644
index 00000000000..e16eab43fae
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/fake_quantize.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
+    fqSpecificParams fqParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::tie(fqParams, netPrecision, inputShapes, targetDevice) = obj.param;
+    size_t levels;
+    std::vector<size_t> constShape;
+    std::tie(levels, constShape) = fqParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "CS=" << CommonTestUtils::vec2str(constShape) << "_";
+    result << "LEVELS=" << levels << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void FakeQuantizeLayerTest::SetUp() {
+    fqSpecificParams fqParams;
+    std::vector<size_t> inputShape;
+    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(fqParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    size_t levels;
+    std::vector<size_t> constShape;
+    std::tie(levels, constShape) = fqParams;
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(fq)};
+    function = std::make_shared<ngraph::Function>(results, params, "fakeQuantize");
+}
+
+TEST_P(FakeQuantizeLayerTest, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/greater.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/greater.cpp
new file mode 100644
index 00000000000..49b680b9233
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/greater.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/greater.hpp"
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <ie_core.hpp>
+
+
+namespace LayerTestsDefinitions {
+
+std::string GreaterLayerTest::getTestCaseName(const testing::TestParamInfo<GreaterTestParam>& obj) {
+    InferenceEngine::Precision inPrecision;
+    InferenceEngine::Precision outPrecision;
+    std::vector<InferenceEngine::SizeVector> inputShapes;
+    std::string targetDevice;
+
+    std::tie(inputShapes, inPrecision, outPrecision, targetDevice) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "inPrc=" << inPrecision.name() << "_";
+    result << "outPrc=" << outPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+
+    return result.str();
+}
+
+void GreaterLayerTest::SetUp() {
+    std::vector<InferenceEngine::SizeVector> inputShapes;
+    InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+
+    std::tie(inputShapes, inputPrecision, outPrc, targetDevice) = this->GetParam();
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+    auto paramsVector = ngraph::builder::makeParams(ngPrc, {inputShapes});
+    IE_ASSERT(paramsVector.size() == 2);
+
+    auto equalOp = std::make_shared<ngraph::opset3::Greater>(paramsVector[0], paramsVector[1]);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(equalOp)};
+
+    function = std::make_shared<ngraph::Function>(results, paramsVector, "Greater");
+}
+
+TEST_P(GreaterLayerTest, CompareWithRefs) {
+    Run();
+
+    if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+        PluginCache::get().reset();
+    }
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/group_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..a3731c8f0e5
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/group_convolution_backprop_data.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/group_convolution_backprop_data.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string GroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<groupConvBackpropDataLayerTestParamsSet> obj) {
+    groupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels, numGroups;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+    result << "S" << CommonTestUtils::vec2str(stride) << "_";
+    result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+    result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+    result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+    result << "O=" << convOutChannels << "_";
+    result << "G=" << numGroups << "_";
+    result << "AP=" << padType << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void GroupConvBackpropDataLayerTest::SetUp() {
+    groupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+    std::vector<size_t> inputShape;
+    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels, numGroups;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams;
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(
+            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+    auto groupConvBackpropData = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(
+            ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin,
+                                             padEnd, dilation, padType, convOutChannels, numGroups));
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(groupConvBackpropData)};
+    function = std::make_shared<ngraph::Function>(results, params, "GroupConvolutionBackpropData");
+}
+
+TEST_P(GroupConvBackpropDataLayerTest, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..4c89df886a2
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_convolution_backprop_data.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "subgraph_tests/quantized_convolution_backprop_data.hpp"
+
+using ngraph::helpers::QuantizationGranularity;
+
+namespace LayerTestsDefinitions {
+
+std::string QuantConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<quantConvBackpropDataLayerTestParamsSet> obj) {
+    quantConvBackpropDataSpecificParams groupConvBackpropDataParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels;
+    size_t quantLevels;
+    QuantizationGranularity quantGranularity;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+    result << "S" << CommonTestUtils::vec2str(stride) << "_";
+    result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+    result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+    result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+    result << "O=" << convOutChannels << "_";
+    result << "AP=" << padType << "_";
+    result << "Levels=" << quantLevels << "_";
+    result << "QG=" << quantGranularity << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void QuantConvBackpropDataLayerTest::SetUp() {
+    quantConvBackpropDataSpecificParams groupConvBackpropDataParams;
+    std::vector<size_t> inputShape;
+    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels;
+    size_t quantLevels;
+    QuantizationGranularity quantGranularity;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    std::vector<size_t> dataFqConstShapes(inputShape.size(), 1);
+    if (quantGranularity == ngraph::helpers::Perchannel)
+        dataFqConstShapes[1] = inputShape[1];
+    auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes);
+
+    std::vector<size_t> weightsShapes = {inputShape[1], convOutChannels};
+    weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end());
+
+    std::vector<float> weightsData;
+    auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty());
+
+    std::vector<size_t> weightsFqConstShapes(weightsShapes.size(), 1);
+    if (quantGranularity == ngraph::helpers::Perchannel)
+        weightsFqConstShapes[0] = weightsShapes[0];
+
+    auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes);
+
+    auto convBackpropData = std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(
+            ngraph::builder::makeConvolutionBackpropData(dataFq, weightsFq, ngPrc, stride, padBegin, padEnd, dilation, padType));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(convBackpropData)};
+    function = std::make_shared<ngraph::Function>(results, params, "QuantConvolutionBackpropData");
+}
+
+TEST_P(QuantConvBackpropDataLayerTest, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_group_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..5274c712bab
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_group_convolution_backprop_data.cpp
@@ -0,0 +1,104 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp"
+
+using ngraph::helpers::QuantizationGranularity;
+
+namespace LayerTestsDefinitions {
+
+std::string QuantGroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<quantGroupConvBackpropDataLayerTestParamsSet> obj) {
+    quantGroupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels, numGroups;
+    size_t quantLevels;
+    QuantizationGranularity quantGranularity;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+    result << "S" << CommonTestUtils::vec2str(stride) << "_";
+    result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+    result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+    result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+    result << "O=" << convOutChannels << "_";
+    result << "G=" << numGroups << "_";
+    result << "AP=" << padType << "_";
+    result << "Levels=" << quantLevels << "_";
+    result << "QG=" << quantGranularity << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void QuantGroupConvBackpropDataLayerTest::SetUp() {
+    quantGroupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+    std::vector<size_t> inputShape;
+    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd;
+    size_t convOutChannels, numGroups;
+    size_t quantLevels;
+    QuantizationGranularity quantGranularity;
+    std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    std::vector<size_t> dataFqConstShapes(inputShape.size(), 1);
+    if (quantGranularity == ngraph::helpers::Perchannel)
+        dataFqConstShapes[1] = inputShape[1];
+    auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes);
+
+    std::vector<size_t> weightsShapes = {inputShape[1], convOutChannels};
+    if (weightsShapes[0] % numGroups || weightsShapes[1] % numGroups)
+        throw std::runtime_error("incorrect shape for QuantGroupConvolutionBackpropData");
+    weightsShapes[0] /= numGroups;
+    weightsShapes[1] /= numGroups;
+    weightsShapes.insert(weightsShapes.begin(), numGroups);
+    weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end());
+
+    std::vector<float> weightsData;
+    auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty());
+
+    std::vector<size_t> weightsFqConstShapes(weightsShapes.size(), 1);
+    if (quantGranularity == ngraph::helpers::Perchannel)
+        weightsFqConstShapes[0] = weightsShapes[0];
+
+    auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes);
+
+    auto groupConvBackpropData = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(
+            ngraph::builder::makeGroupConvolutionBackpropData(dataFq, weightsFq, ngPrc, stride, padBegin, padEnd, dilation, padType));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(groupConvBackpropData)};
+    function = std::make_shared<ngraph::Function>(results, params, "QuantGroupConvolutionBackpropData");
+}
+
+TEST_P(QuantGroupConvBackpropDataLayerTest, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
index 516765ba9ea..600a7955373 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
@@ -15,7 +15,7 @@ template<typename vecElementType>
 inline std::string vec2str(const std::vector<vecElementType> &vec) {
     std::ostringstream result;
     result << "(";
-    std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<size_t>(result, "."));
+    std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<vecElementType>(result, "."));
     result << vec.back() << ")";
     return result.str();
 }
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
index 00eb541fa73..cbb4857b0ab 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
@@ -675,5 +675,43 @@ TestModel getConvReluNormPoolFcModel(InferenceEngine::Precision netPrc) {
             convWeigthsSize + convBiasesSize + fcWeigthsSize + fcBiasesSize));
 }
 
+
+TestModel getModelWithMemory(InferenceEngine::Precision netPrc) {
+    CommonTestUtils::IRBuilder_v6 test_model_builder("model");
+
+    auto Memory_1_layer =
+            test_model_builder.AddLayer("Memory_1", "Memory", netPrc, {{"id",    "r_1-3"},
+                                                                       {"index", "1"},
+                                                                       {"size",  "2"}})
+                    .AddOutPort({1, 200})
+                    .getLayer();
+    auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", netPrc).AddOutPort({1, 200}).getLayer();
+    auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", netPrc, {{"operation", "mul"}})
+            .AddInPort({1, 200})
+            .AddInPort({1, 200})
+            .AddOutPort({1, 200})
+            .getLayer();
+
+    auto Activation_4_layer =
+            test_model_builder.AddLayer("Activation_4", "Activation", netPrc, {{"type", "sigmoid"}})
+                    .AddInPort({1, 200})
+                    .AddOutPort({1, 200})
+                    .getLayer();
+    auto Memory_5_layer =
+            test_model_builder.AddLayer("Memory_5", "Memory", netPrc, {{"id",    "r_1-3"},
+                                                                       {"index", "0"},
+                                                                       {"size",  "2"}})
+                    .AddInPort({1, 200})
+                    .getLayer();
+
+    test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
+    test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
+    test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
+    test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
+
+    auto serial = test_model_builder.serialize();
+
+    return TestModel(serial, CommonTestUtils::getWeightsBlob(0));
+}
 }  // namespace TestModel
 }  // namespace FuncTestUtils
\ No newline at end of file
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
index 4373f07e5c8..b2224f7489b 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
@@ -41,6 +41,8 @@ const TestModel convReluNormPoolFcModelFP32 = getConvReluNormPoolFcModel(Inferen
 const TestModel convReluNormPoolFcModelFP16 = getConvReluNormPoolFcModel(InferenceEngine::Precision::FP16);
 const TestModel convReluNormPoolFcModelQ78 = getConvReluNormPoolFcModel(InferenceEngine::Precision::Q78);
 
+TestModel getModelWithMemory(InferenceEngine::Precision netPrc);
+
 const char incorrect_input_name[] = "incorrect_input_name";
 
 }  // namespace TestModel
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
index c5185eca442..54274830b1b 100644
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -49,6 +49,55 @@ std::shared_ptr<ngraph::Node> makeGroupConvolution(const ngraph::Output<Node> &i
                                                    const std::vector<float> &filterWeights = {},
                                                    const std::vector<float> &biasesWeights = {});
 
+std::shared_ptr<ngraph::Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                          const element::Type &type,
+                                                          const std::vector<size_t> &filterSize,
+                                                          const std::vector<size_t> &strides,
+                                                          const std::vector<ptrdiff_t> &padsBegin,
+                                                          const std::vector<ptrdiff_t> &padsEnd,
+                                                          const std::vector<size_t> &dilations,
+                                                          const op::PadType &autoPad,
+                                                          size_t numOutChannels,
+                                                          bool addBiases = false,
+                                                          const std::vector<float> &filterWeights = {},
+                                                          const std::vector<float> &biasesWeights = {});
+
+std::shared_ptr<ngraph::Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                          const ngraph::Output<Node> &weights,
+                                                          const element::Type &type,
+                                                          const std::vector<size_t> &strides,
+                                                          const std::vector<ptrdiff_t> &padsBegin,
+                                                          const std::vector<ptrdiff_t> &padsEnd,
+                                                          const std::vector<size_t> &dilations,
+                                                          const op::PadType &autoPad,
+                                                          bool addBiases = false,
+                                                          const std::vector<float> &biasesWeights = {});
+
+std::shared_ptr<ngraph::Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                               const element::Type &type,
+                                                               const std::vector<size_t> &filterSize,
+                                                               const std::vector<size_t> &strides,
+                                                               const std::vector<ptrdiff_t> &padsBegin,
+                                                               const std::vector<ptrdiff_t> &padsEnd,
+                                                               const std::vector<size_t> &dilations,
+                                                               const op::PadType &autoPad,
+                                                               size_t numOutChannels,
+                                                               size_t numGroups,
+                                                               bool addBiases = false,
+                                                               const std::vector<float> &filterWeights = {},
+                                                               const std::vector<float> &biasesWeights = {});
+
+std::shared_ptr<ngraph::Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                               const ngraph::Output<Node> &weights,
+                                                               const element::Type &type,
+                                                               const std::vector<size_t> &strides,
+                                                               const std::vector<ptrdiff_t> &padsBegin,
+                                                               const std::vector<ptrdiff_t> &padsEnd,
+                                                               const std::vector<size_t> &dilations,
+                                                               const op::PadType &autoPad,
+                                                               bool addBiases = false,
+                                                               const std::vector<float> &biasesWeights = {});
+
 std::shared_ptr<ngraph::Node> makeSplit(const ngraph::Output<Node> &in,
                                         const element::Type &type,
                                         size_t numSplits,
@@ -116,5 +165,24 @@ std::shared_ptr<ngraph::Node> makeProposal(const ngraph::Output<Node> &class_pro
 std::shared_ptr<ngraph::Node> makeSelect(std::vector<ngraph::Output<Node>> &in,
                                          const ngraph::op::AutoBroadcastSpec& auto_broadcast);
 
+std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
+                                       const element::Type &type,
+                                       std::size_t levels,
+                                       std::vector<size_t> constShapes,
+                                       const std::vector<float> &inputLowData,
+                                       const std::vector<float> &inputHighData,
+                                       const std::vector<float> &outputLowData,
+                                       const std::vector<float> &outputHighData);
+
+std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
+                                       const element::Type &type,
+                                       std::size_t levels,
+                                       std::vector<size_t> constShapes);
+
+std::shared_ptr<ngraph::Node> makeCumSum(const ngraph::Output<Node> &in,
+                                         const ngraph::Output<Node> &axis,
+                                         bool exclusive,
+                                         bool reverse);
+
 }  // namespace builder
 }  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
index 6619120fe34..dcc06ac3256 100644
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@@ -92,6 +92,28 @@ enum ActivationTypes {
     Gelu
 };
 
+enum QuantizationGranularity {
+    Pertensor,
+    Perchannel
+};
+
+inline std::string quantizationGranularityToString(const QuantizationGranularity& granularity) {
+    static std::map<QuantizationGranularity, std::string> names = {
+        {Pertensor, "Pertensor"},
+        {Perchannel, "Perchannel"},
+    };
+
+    auto i = names.find(granularity);
+    if (i != names.end())
+        return i->second;
+    else
+        throw std::runtime_error("Unsupported QuantizationGranularity type");
+}
+
+inline std::ostream& operator<<(std::ostream& out, const QuantizationGranularity& granularity) {
+    return out << quantizationGranularityToString(granularity);
+}
+
 ngraph::OutputVector convert2OutputVector(const std::vector<std::shared_ptr<ngraph::Node>> &nodes);
 
 template<class opType>
diff --git a/inference-engine/tests/ngraph_functions/src/convolution_backprop_data.cpp b/inference-engine/tests/ngraph_functions/src/convolution_backprop_data.cpp
new file mode 100644
index 00000000000..dd0bdf82994
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/src/convolution_backprop_data.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                  const element::Type &type,
+                                                  const std::vector<size_t> &filterSize,
+                                                  const std::vector<size_t> &strides,
+                                                  const std::vector<ptrdiff_t> &padsBegin,
+                                                  const std::vector<ptrdiff_t> &padsEnd,
+                                                  const std::vector<size_t> &dilations,
+                                                  const op::PadType &autoPad,
+                                                  size_t numOutChannels,
+                                                  bool addBiases,
+                                                  const std::vector<float> &filterWeights,
+                                                  const std::vector<float> &biasesWeights) {
+    bool randomFilterWeights = filterWeights.empty();
+    auto shape = in.get_shape();
+    std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+    filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end());
+    auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights);
+
+    return makeConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights);
+}
+
+std::shared_ptr<Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                  const ngraph::Output<Node> &weights,
+                                                  const element::Type &type,
+                                                  const std::vector<size_t> &strides,
+                                                  const std::vector<ptrdiff_t> &padsBegin,
+                                                  const std::vector<ptrdiff_t> &padsEnd,
+                                                  const std::vector<size_t> &dilations,
+                                                  const op::PadType &autoPad,
+                                                  bool addBiases,
+                                                  const std::vector<float> &biasesWeights) {
+    auto deconv = std::make_shared<opset1::ConvolutionBackpropData>(in, weights, strides, padsBegin, padsEnd, dilations, autoPad);
+
+    if (addBiases) {
+        bool randomBiases = biasesWeights.empty();
+        auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases);
+        auto add = std::make_shared<ngraph::opset1::Add>(deconv, biasesWeightsNode);
+        return add;
+    } else {
+        return deconv;
+    }
+}
+
+}  // namespace builder
+}  // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/tests/ngraph_functions/src/cum_sum.cpp b/inference-engine/tests/ngraph_functions/src/cum_sum.cpp
new file mode 100644
index 00000000000..3ac47f2ac16
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/src/cum_sum.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<ngraph::Node> makeCumSum(const ngraph::Output<Node> &in,
+                                         const ngraph::Output<Node> &axis,
+                                         bool exclusive,
+                                         bool reverse) {
+    return std::make_shared<ngraph::op::CumSum>(in, axis, exclusive, reverse);
+}
+
+}  // namespace builder
+}  // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp b/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp
new file mode 100644
index 00000000000..aab3c67b8a0
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
+                                       const element::Type &type,
+                                       std::size_t levels,
+                                       std::vector<size_t> constShapes,
+                                       const std::vector<float> &inputLowData,
+                                       const std::vector<float> &inputHighData,
+                                       const std::vector<float> &outputLowData,
+                                       const std::vector<float> &outputHighData) {
+    auto inputLowNode = makeConstant(type, constShapes, inputLowData, inputLowData.empty());
+    auto inputHighNode = makeConstant(type, constShapes, inputHighData, inputHighData.empty());
+    auto outputLowNode = makeConstant(type, constShapes, outputLowData, outputLowData.empty());
+    auto outputHighNode = makeConstant(type, constShapes, outputHighData, outputHighData.empty());
+
+    auto fq = std::make_shared<opset1::FakeQuantize>(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels);
+
+    return fq;
+}
+
+std::shared_ptr<ngraph::Node> makeFakeQuantize(const ngraph::Output<ngraph::Node> &in,
+                                               const ngraph::element::Type &type,
+                                               std::size_t levels,
+                                               std::vector<size_t> constShapes) {
+    size_t constDataSize = ngraph::shape_size(constShapes);
+    std::vector<float> inputLowData, inputHighData, outputLowData, outputHighData;
+    inputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+    if (levels != 2) {
+        inputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+        outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+        outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+    } else {
+        inputHighData = inputLowData;
+        outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+        outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+
+        for (int i = 0; i < constDataSize; i++) {
+            if (outputLowData[i] > outputHighData[i]) {
+                outputLowData[i] = 1;
+                outputHighData[i] = 0;
+            } else {
+                outputLowData[i] = 0;
+                outputHighData[i] = 1;
+            }
+        }
+    }
+
+    for (int i = 0; i < constDataSize; i++) {
+        inputLowData[i] = std::min(inputLowData[i], inputHighData[i]);
+        inputHighData[i] = std::max(inputLowData[i], inputHighData[i]);
+        if (inputLowData[i] == inputHighData[i])
+            inputHighData[i] += 1;
+    }
+
+    for (int i = 0; i < constDataSize; i++) {
+        outputLowData[i] = std::min(outputLowData[i], outputHighData[i]);
+        outputHighData[i] = std::max(outputLowData[i], outputHighData[i]);
+        if (outputLowData[i] == outputHighData[i])
+            outputHighData[i] += 1;
+    }
+
+    auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty());
+    auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty());
+    auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty());
+    auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty());
+
+    auto fq = std::make_shared<ngraph::opset1::FakeQuantize>(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels);
+
+    return fq;
+}
+
+}  // namespace builder
+}  // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/tests/ngraph_functions/src/group_convolution_backprop_data.cpp b/inference-engine/tests/ngraph_functions/src/group_convolution_backprop_data.cpp
new file mode 100644
index 00000000000..9d807d0963f
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/src/group_convolution_backprop_data.cpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                       const element::Type &type,
+                                                       const std::vector<size_t> &filterSize,
+                                                       const std::vector<size_t> &strides,
+                                                       const std::vector<ptrdiff_t> &padsBegin,
+                                                       const std::vector<ptrdiff_t> &padsEnd,
+                                                       const std::vector<size_t> &dilations,
+                                                       const op::PadType &autoPad,
+                                                       size_t numOutChannels,
+                                                       size_t numGroups,
+                                                       bool addBiases,
+                                                       const std::vector<float> &filterWeights,
+                                                       const std::vector<float> &biasesWeights) {
+    bool randomFilterWeights = filterWeights.empty();
+    auto shape = in.get_shape();
+    std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+    if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups)
+        throw std::runtime_error("incorrected shape for GroupConvolutionBackpropData");
+    filterWeightsShape[0] /= numGroups;
+    filterWeightsShape[1] /= numGroups;
+    filterWeightsShape.insert(filterWeightsShape.begin(), numGroups);
+    filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end());
+    auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights);
+
+    return makeGroupConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights);
+}
+
+std::shared_ptr<Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+                                                       const ngraph::Output<Node> &weights,
+                                                       const element::Type &type,
+                                                       const std::vector<size_t> &strides,
+                                                       const std::vector<ptrdiff_t> &padsBegin,
+                                                       const std::vector<ptrdiff_t> &padsEnd,
+                                                       const std::vector<size_t> &dilations,
+                                                       const op::PadType &autoPad,
+                                                       bool addBiases,
+                                                       const std::vector<float> &biasesWeights) {
+    auto deconv = std::make_shared<opset1::GroupConvolutionBackpropData>(in, weights, strides, padsBegin, padsEnd, dilations, autoPad);
+    if (addBiases) {
+        bool randomBiases = biasesWeights.empty();
+        auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases);
+        auto add = std::make_shared<ngraph::opset1::Add>(deconv, biasesWeightsNode);
+        return add;
+    } else {
+        return deconv;
+    }
+}
+
+}  // namespace builder
+}  // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
index 838b0f76eda..79b39583836 100644
--- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
@@ -450,6 +450,37 @@ std::vector<std::uint8_t> convertOutputPrecision(std::vector<std::uint8_t> &outp
                                                                                                         element::Type(toPrecision).get_type_name());
             }
         }
+        case element::Type_t::boolean: {
+            switch (toPrecision) {
+            case element::Type_t::u8: {
+                return convertPrecision<bool, uint8_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::u16: {
+                return convertPrecision<bool, uint16_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::i8: {
+                return convertPrecision<bool, int8_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::i16: {
+                return convertPrecision<bool, int16_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::i32: {
+                return convertPrecision<bool, int32_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::i64: {
+                return convertPrecision<bool, int64_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::f32: {
+                return convertPrecision<bool, float>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            case element::Type_t::u64: {
+                return convertPrecision<bool, uint64_t>(output, elementsCount, element::Type(toPrecision).size());
+            }
+            default:
+                throw std::runtime_error("convertOutputPrecision can't convert from: " + element::Type(fromPrecision).get_type_name() + " to: " +
+                                         element::Type(toPrecision).get_type_name());
+            }
+        }
         default:
             throw std::runtime_error("convertOutputPrecision can't convert from: " + element::Type(fromPrecision).get_type_name() + " precision");
     }
diff --git a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp
index 00ade9069bb..647b32083c0 100644
--- a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp
@@ -4,19 +4,394 @@
 
 #include <ie_blob.h>
 #include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
 
-using namespace InferenceEngine;
+#include "unit_test_utils/mocks/mock_allocator.hpp"
 
-using BlobTests = ::testing::Test;
+#ifdef WIN32
+#define UNUSED
+#else
+#define UNUSED  __attribute__((unused))
+#endif
+
+class BlobTests: public ::testing::Test {
+protected:
+    virtual void TearDown() {}
+
+    virtual void SetUp() {}
+
+    std::shared_ptr<MockAllocator> createMockAllocator() {
+        return std::shared_ptr<MockAllocator>(new MockAllocator());
+    }
+};
 
 // Testing TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0)
-TEST(BlobTests, TBlobThrowsIfPtrForPreAllocatorIsNullPtr) {
-    ASSERT_THROW(TBlob<float>({ Precision::FP32, {1}, C }, nullptr),
-            InferenceEngine::details::InferenceEngineException);
+TEST_F(BlobTests, TBlobThrowsIfPtrForPreAllocatorIsNullPtr) {
+    ASSERT_THROW(InferenceEngine::TBlob<float>({InferenceEngine::Precision::FP32, {1}, InferenceEngine::C}, nullptr),
+                 InferenceEngine::details::InferenceEngineException);
 }
 
-// Testing TBlob(const TensorDesc& tensorDesc, const std::shared_ptr<IAllocator>& alloc)
-TEST(BlobTests, TBlobThrowsIfAllocatorIsNullPtr) {
-    ASSERT_THROW(TBlob<float>({ Precision::FP32, {1}, C }, std::shared_ptr<IAllocator> ()),
-        InferenceEngine::details::InferenceEngineException);
+// Testing TBlob(const TensorDesc& tensorDesc, const std::std::shared_ptr<IAllocator>& alloc)
+TEST_F(BlobTests, TBlobThrowsIfAllocatorIsNullPtr) {
+    ASSERT_THROW(InferenceEngine::TBlob<float>(
+            {InferenceEngine::Precision::FP32, {1}, InferenceEngine::C}, std::shared_ptr<InferenceEngine::IAllocator>()),
+           InferenceEngine::details::InferenceEngineException);
+}
+
+
+TEST_F(BlobTests, canCreateBlobUsingDefaultAllocator) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+    {
+        InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+                                           std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+        blob.allocate();
+    }
+}
+
+TEST_F(BlobTests, secondAllocateWontMemLeak) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).Times(2).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(2).WillRepeatedly(testing::Return(true));
+
+    {
+        InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+                                           std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+        blob.allocate();
+        blob.allocate();
+    }
+}
+
+
+TEST_F(BlobTests, doesNotUnlockIfLockFailed) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), lock(reinterpret_cast<void*>(1), InferenceEngine::LOCK_FOR_WRITE)).Times(1);
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+                                       std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+    blob.allocate();
+    {
+        float UNUSED *ptr = blob.data();
+    }
+}
+
+TEST_F(BlobTests, canAccessDataUsingAllocator) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    float data[] = {5.f, 6.f, 7.f};
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), lock(reinterpret_cast<void*>(1), InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data));
+    EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+                                       std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+    blob.allocate();
+    {
+        float *ptr = blob.data();
+        ASSERT_EQ(ptr[2] , 7);
+    }
+}
+
+
+TEST_F(BlobTests, canLockReadOnlyDataForRead) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    float data[] = {5, 6, 7};
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_READ)).WillRepeatedly(testing::Return(data));
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+    EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+                                       std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+    blob.allocate();
+
+    const float *ptr = blob.readOnly();
+    ASSERT_EQ(ptr[2] , 7);
+}
+
+TEST_F(BlobTests, canAccessDataUsingBufferBaseMethod) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    float data[] = {5, 6, 7};
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data));
+    EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+                                       std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+    blob.allocate();
+    auto buffer = blob.rwmap();
+    const float *ptr = buffer.as<const float *>();
+    ASSERT_EQ(ptr[2] , 7);
+}
+
+TEST_F(BlobTests, canMoveFromTBlobWithSameType) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    uint8_t data[] = {5, 6};
+
+    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(uint8_t))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+    EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data));
+    EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+    EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+    InferenceEngine::TBlob<uint8_t > blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW },
+                                          std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+    blob.allocate();
+
+    InferenceEngine::TBlob<uint8_t > newBlob(std::move(blob));
+
+    auto buffer = newBlob.rwmap();
+    uint8_t *ptr = buffer.as <uint8_t *>();
+    ASSERT_EQ(ptr[0] , data[0]);
+}
+
+TEST_F(BlobTests, saveDimsAndSizeAfterMove) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    InferenceEngine::TBlob<uint8_t > blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW },
+                                          std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+
+    InferenceEngine::TBlob<uint8_t > newBlob(std::move(blob));
+
+    ASSERT_EQ(newBlob.size(), 1 * 2 * 3);
+    ASSERT_EQ(newBlob.getTensorDesc().getDims()[0], 1);
+    ASSERT_EQ(newBlob.getTensorDesc().getDims()[1], 2);
+    ASSERT_EQ(newBlob.getTensorDesc().getDims()[2], 3);
+}
+
+TEST_F(BlobTests, canCopyBlob) {
+    InferenceEngine::SizeVector v = {1, 3};
+    InferenceEngine::TBlob<uint8_t> blob({ InferenceEngine::Precision::U8, v, InferenceEngine::HW });
+    blob.allocate();
+    blob.data()[0] = 1;
+    blob.data()[1] = 2;
+    blob.data()[2] = 3;
+
+    InferenceEngine::TBlob<uint8_t> blob2(blob);
+
+    ASSERT_EQ(blob2.getTensorDesc().getDims().size(),  blob.getTensorDesc().getDims().size());
+    ASSERT_EQ(blob2.getTensorDesc().getDims()[0],  blob.getTensorDesc().getDims()[0]);
+    ASSERT_EQ(blob2.getTensorDesc().getDims()[1],  blob.getTensorDesc().getDims()[1]);
+    ASSERT_EQ(blob2.size(),  blob.size());
+    ASSERT_EQ(blob2.data()[0],  blob.data()[0]);
+    ASSERT_EQ(blob2.data()[1],  blob.data()[1]);
+    ASSERT_EQ(blob2.data()[2],  blob.data()[2]);
+}
+
+TEST_F(BlobTests, canCompareToNullPtrWithoutDereferencing) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    auto allocator = createMockAllocator();
+
+    InferenceEngine::TBlob<uint8_t> blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW },
+                                         std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+
+    ASSERT_TRUE(blob.readOnly() == nullptr);
+    ASSERT_TRUE(blob.data() == nullptr);
+    ASSERT_TRUE(blob.rwmap() == nullptr);
+
+    ASSERT_TRUE(nullptr == blob.readOnly());
+    ASSERT_TRUE(nullptr == blob.data());
+    ASSERT_TRUE(nullptr == blob.rwmap());
+}
+
+TEST_F(BlobTests, canCreateBlob) {
+    InferenceEngine::SizeVector size = { 1, 1, 1 };
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+    ASSERT_NE(blob.size(), 0);
+    ASSERT_EQ(blob.rwmap(), nullptr);
+}
+
+TEST_F(BlobTests, canAllocateBlob) {
+    InferenceEngine::SizeVector size = { 1, 1, 1 };
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+    blob.allocate();
+    float* buffer = static_cast<float*>(blob.data());
+    ASSERT_NE(buffer, nullptr);
+}
+
+TEST_F(BlobTests, canDeallocateBlob) {
+    InferenceEngine::SizeVector size = { 1, 1, 1 };
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+    blob.allocate();
+    blob.deallocate();
+    ASSERT_EQ(nullptr, blob.data().as<float*>());
+}
+
+TEST_F(BlobTests, canCreateBlobWithoutDims) {
+    InferenceEngine::TBlob<float> blob(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::NCHW));
+    ASSERT_EQ(blob.getTensorDesc().getDims().size(), 0);
+}
+
+TEST_F(BlobTests, canReadDataFromConstBlob) {
+    InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, { 1, 1, 1 }, InferenceEngine::CHW });
+    blob.allocate();
+    blob.data()[0] = 1.0f;
+    InferenceEngine::TBlob<float> const blob2 = blob;
+    const float* buf = blob2.readOnly();
+    ASSERT_NE(buf, nullptr);
+}
+
+TEST_F(BlobTests, canMakeSharedBlob) {
+    InferenceEngine::SizeVector size = { 1, 1, 1 };
+    InferenceEngine::TBlob<float>::Ptr blob1 = InferenceEngine::make_shared_blob<float>(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::NCHW));
+    InferenceEngine::TBlob<float>::Ptr blob2 = InferenceEngine::make_shared_blob<float>(
+            { InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+    InferenceEngine::TBlob<float>::Ptr blob3
+            = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, { 0 }, InferenceEngine::C });
+    ASSERT_EQ(blob1->size(), 0);
+    ASSERT_EQ(blob2->size(), 1);
+    ASSERT_EQ(blob3->size(), 0);
+}
+
+TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) {
+    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, InferenceEngine::Layout::NCHW);
+    ASSERT_THROW(InferenceEngine::make_shared_blob<float>(desc), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(BlobTests, canUseBlobInMoveSemantics) {
+    InferenceEngine::TBlob<float> b(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::C));
+
+    b.getTensorDesc().setDims({3});
+    b.allocate();
+    b.data()[0] = 1.0f;
+    b.data()[1] = 2.0f;
+    b.data()[2] = 3.0f;
+
+    std::vector<float> dump;
+
+    for (const auto& e : b) {
+        dump.push_back(e);
+    }
+
+    ASSERT_EQ(dump.size(), 3);
+
+    ASSERT_EQ(dump[0], 1.0f);
+    ASSERT_EQ(dump[1], 2.0f);
+    ASSERT_EQ(dump[2], 3.0f);
+}
+
+TEST_F(BlobTests, DISABLED_canUseLockedMemoryAsRvalueReference) {
+    std::vector<float> dump;
+    std::vector<float> v({1.0f, 2.0f, 3.0f});
+    auto blob = InferenceEngine::make_shared_blob<float>(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::C), &v[0], v.size());
+    for (auto e : *blob) {
+        dump.push_back(e);
+    }
+
+    ASSERT_EQ(dump.size(), 3);
+
+    ASSERT_EQ(dump[0], 1.0f);
+    ASSERT_EQ(dump[1], 2.0f);
+    ASSERT_EQ(dump[2], 3.0f);
+}
+
+TEST_F(BlobTests, canCreateBlobOnExistedMemory) {
+    float input[] = {0.1f, 0.2f, 0.3f};
+    {
+        auto b = InferenceEngine::make_shared_blob<float>(
+                InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {1, 2}, InferenceEngine::HW), input);
+        auto i = b->begin();
+        ASSERT_NEAR(*i, 0.1, 0.00001);
+        i++;
+        ASSERT_NEAR(*i, 0.2, 0.00001);
+        i++;
+        ASSERT_EQ(i, b->end());
+
+        ASSERT_EQ(&*b->begin(), input);
+    }
+}
+
+
+TEST_F(BlobTests, canModifyDataInRangedFor) {
+    InferenceEngine::SizeVector v = {1, 2, 3};
+    InferenceEngine::TBlob<int> blob({ InferenceEngine::Precision::I32, v, InferenceEngine::CHW });
+    blob.allocate();
+
+    for (auto & data : blob) {
+        data = 5;
+    }
+
+    for (int i = 0; i < v.size(); i++) {
+        ASSERT_EQ(5, blob.data()[i]) << "Mismatch at" << i;
+    }
+}
+
+TEST_F(BlobTests, makeRoiBlobNchw) {
+    // we create main blob with NCHW layout. We will crop ROI from this blob.
+    InferenceEngine::SizeVector dims = {1, 3, 6, 5};  // RGB picture of size (WxH) = 5x6
+    InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<uint8_t>(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NCHW));
+    blob->allocate();
+
+    // create ROI blob based on the already created blob
+    InferenceEngine::ROI roi = {0, 2, 1, 2, 4};  // cropped picture with: id = 0, (x,y) = (2,1), sizeX (W) = 2, sizeY (H) = 4
+    InferenceEngine::Blob::Ptr roiBlob = make_shared_blob(blob, roi);
+
+    // check that BlockingDesc is constructed properly for the ROI blob
+    InferenceEngine::SizeVector refDims = {1, 3, 4, 2};
+    InferenceEngine::SizeVector refOrder = {0, 1, 2, 3};
+    size_t refOffset = 7;
+    InferenceEngine::SizeVector refStrides = {90, 30, 5, 1};
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
+}
+
+TEST_F(BlobTests, makeRoiBlobNhwc) {
+    // we create main blob with NHWC layout. We will crop ROI from this blob.
+    InferenceEngine::SizeVector dims = {1, 3, 4, 8};  // RGB picture of size (WxH) = 8x4
+    InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<uint8_t>(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NHWC));
+    blob->allocate();
+
+    // create ROI blob based on the already created blob
+    InferenceEngine::ROI roi = {0, 3, 2, 5, 2};  // cropped picture with: id = 0, (x,y) = (3,2), sizeX (W) = 5, sizeY (H) = 2
+    InferenceEngine::Blob::Ptr roiBlob = make_shared_blob(blob, roi);
+
+    // check that BlockingDesc is constructed properly for the ROI blob
+    InferenceEngine::SizeVector refDims = {1, 2, 5, 3};
+    InferenceEngine::SizeVector refOrder = {0, 2, 3, 1};
+    size_t refOffset = 57;
+    InferenceEngine::SizeVector refStrides = {96, 24, 3, 1};
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
+    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
+}
+
+TEST_F(BlobTests, makeRoiBlobWrongSize) {
+    // we create main blob with NCHW layout. We will crop ROI from this blob.
+    InferenceEngine::SizeVector dims = {1, 3, 4, 4};  // RGB picture of size (WxH) = 4x4
+    InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<uint8_t>(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NCHW));
+    blob->allocate();
+
+    // try to create ROI blob with wrong size
+    InferenceEngine::ROI roi = {0, 1, 1, 4, 4};  // cropped picture with: id = 0, (x,y) = (1,1), sizeX (W) = 4, sizeY (H) = 4
+    ASSERT_THROW(make_shared_blob(blob, roi), InferenceEngine::details::InferenceEngineException);
 }
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/blob_test.cpp b/inference-engine/tests/unit/inference_engine/ie_compound_blob_test.cpp
similarity index 62%
rename from inference-engine/tests_deprecated/unit/inference_engine_tests/blob_test.cpp
rename to inference-engine/tests/unit/inference_engine/ie_compound_blob_test.cpp
index a93420ed66f..2ffb7a0721e 100644
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/blob_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_compound_blob_test.cpp
@@ -2,42 +2,15 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <ie_blob.h>
 #include <ie_compound_blob.h>
 #include <gtest/gtest.h>
 #include <random>
 #include <chrono>
 
-#include <gmock/gmock-spec-builders.h>
-
-#include "unit_test_utils/mocks/mock_allocator.hpp"
-
-#ifdef WIN32
-#define UNUSED
-#else
-#define UNUSED  __attribute__((unused))
-#endif
-
 using namespace ::testing;
 using namespace std;
 using namespace InferenceEngine;
 
-class BlobTests: public ::testing::Test {
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
-    shared_ptr<MockAllocator> createMockAllocator() {
-        return shared_ptr<MockAllocator>(new MockAllocator());
-    }
-
-public:
-
-};
-
 class CompoundBlobTests : public ::testing::Test {
 protected:
     Blob::Ptr _test_blob;
@@ -75,25 +48,6 @@ public:
 class NV12BlobTests : public CompoundBlobTests {};
 class I420BlobTests : public CompoundBlobTests {};
 
-struct ScopedTimer
-{
-    chrono::high_resolution_clock::time_point t0;
-    function<void(int)> cb;
-
-    ScopedTimer(function<void(int)> callback)
-    : t0(chrono::high_resolution_clock::now())
-    , cb(callback)
-    {
-    }
-    ~ScopedTimer(void)
-    {
-        auto  t1 = chrono::high_resolution_clock::now();
-        auto milli = chrono::duration_cast<chrono::microseconds>(t1-t0).count();
-
-        cb((int)milli);
-    }
-};
-
 TEST(BlobConversionTests, canWorkWithMemoryBlob) {
     Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {1, 3, 4, 4}, NCHW));
     ASSERT_TRUE(blob->is<MemoryBlob>());
@@ -182,360 +136,6 @@ TEST(BlobConversionTests, blobSharesOwnershipOnCast) {
     ASSERT_EQ(stored_value, tblob->data()[0]);
 }
 
-TEST_F(BlobTests, canCreateBlobUsingDefaultAllocator)
-{
-    SizeVector v = {1,2,3};
-    auto allocator = createMockAllocator();
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
-    {
-        TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-        blob.allocate();
-    }
-}
-
-TEST_F(BlobTests, secondAllocateWontMemLeak) {
-    SizeVector v = {1,2,3};
-    auto allocator = createMockAllocator();
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).Times(2).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), free(_)).Times(2).WillRepeatedly(Return(true));
-
-    {
-        TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-        blob.allocate();
-        blob.allocate();
-    }
-}
-
-
-TEST_F(BlobTests, doesNotUnlockIfLockFailed)
-{
-    SizeVector v = {1,2,3};
-    auto allocator = createMockAllocator();
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), lock((void*)1,LOCK_FOR_WRITE)).Times(1);
-    EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
-    TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-    blob.allocate();
-    {
-        float UNUSED *ptr = blob.data();
-    }
-}
-
-TEST_F(BlobTests, canAccessDataUsingAllocator)
-{
-    SizeVector v = {1,2,3};
-    auto allocator = createMockAllocator();
-
-    float data[] = {5.f,6.f,7.f};
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), lock((void*)1, LOCK_FOR_WRITE)).WillRepeatedly(Return(data));
-    EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
-    EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
-    TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-    blob.allocate();
-    {
-        float *ptr = blob.data();
-        ASSERT_EQ(ptr[2] , 7);
-    }
-
-}
-
-
-TEST_F(BlobTests, canLockReadOnlyDataForRead)
-{
-    SizeVector v = {1, 2, 3};
-    auto allocator = createMockAllocator();
-
-    float data[] = {5,6,7};
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_READ)).WillRepeatedly(Return(data));
-    EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-    EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
-
-    TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-    blob.allocate();
-
-    const float *ptr = blob.readOnly();
-    ASSERT_EQ(ptr[2] , 7);
-}
-
-TEST_F(BlobTests, canAccessDataUsingBufferBaseMethod)
-{
-    SizeVector v = {1, 2, 3};
-    auto allocator = createMockAllocator();
-
-    float data[] = {5,6,7};
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_WRITE)).WillRepeatedly(Return(data));
-    EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
-    EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
-    TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-    blob.allocate();
-    auto buffer = blob.rwmap();
-    const float *ptr = buffer.as<const float *>();
-    ASSERT_EQ(ptr[2] , 7);
-}
-
-TEST_F(BlobTests, canMoveFromTBlobWithSameType)
-{
-    SizeVector v = {1, 2, 3};
-    auto allocator = createMockAllocator();
-
-    uint8_t data[] = {5,6};
-
-    EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(uint8_t))).WillRepeatedly(Return((void*)1));
-    EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_WRITE)).WillRepeatedly(Return(data));
-    EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
-    EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
-    TBlob<uint8_t > blob({ Precision::U8, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-    blob.allocate();
-
-    TBlob<uint8_t > newBlob(std::move(blob));
-
-    auto buffer = newBlob.rwmap();
-    uint8_t *ptr = buffer.as <uint8_t *>();
-    ASSERT_EQ(ptr[0] , data[0]);
-}
-
-TEST_F(BlobTests, saveDimsAndSizeAfterMove)
-{
-    SizeVector v = {1, 2, 3};
-    auto allocator = createMockAllocator();
-
-    TBlob<uint8_t > blob({ Precision::U8, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-
-    TBlob<uint8_t > newBlob(std::move(blob));
-
-    ASSERT_EQ(newBlob.size(), 1 * 2 * 3);
-    ASSERT_EQ(newBlob.getTensorDesc().getDims()[0], 1);
-    ASSERT_EQ(newBlob.getTensorDesc().getDims()[1], 2);
-    ASSERT_EQ(newBlob.getTensorDesc().getDims()[2], 3);
-}
-
-TEST_F(BlobTests, canCopyBlob)
-{
-    SizeVector v = {1, 3};
-    TBlob<uint8_t> blob({ Precision::U8, v, HW });
-    blob.allocate();
-    blob.data()[0] = 1;
-    blob.data()[1] = 2;
-    blob.data()[2] = 3;
-
-    TBlob<uint8_t> blob2(blob);
-
-    ASSERT_EQ(blob2.getTensorDesc().getDims().size(),  blob.getTensorDesc().getDims().size());
-    ASSERT_EQ(blob2.getTensorDesc().getDims()[0],  blob.getTensorDesc().getDims()[0]);
-    ASSERT_EQ(blob2.getTensorDesc().getDims()[1],  blob.getTensorDesc().getDims()[1]);
-    ASSERT_EQ(blob2.size(),  blob.size());
-    ASSERT_EQ(blob2.data()[0],  blob.data()[0]);
-    ASSERT_EQ(blob2.data()[1],  blob.data()[1]);
-    ASSERT_EQ(blob2.data()[2],  blob.data()[2]);
-}
-
-TEST_F(BlobTests, canCompareToNullPtrWithoutDereferencing) {
-    SizeVector v = {1, 2, 3};
-    auto allocator = createMockAllocator();
-
-    TBlob<uint8_t> blob({ Precision::U8, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-
-    ASSERT_TRUE(blob.readOnly() == nullptr);
-    ASSERT_TRUE(blob.data() == nullptr);
-    ASSERT_TRUE(blob.rwmap() == nullptr);
-
-    ASSERT_TRUE(nullptr == blob.readOnly());
-    ASSERT_TRUE(nullptr == blob.data());
-    ASSERT_TRUE(nullptr == blob.rwmap());
-}
-
-TEST_F(BlobTests, canCreateBlob) {
-    InferenceEngine::SizeVector size = { 1, 1, 1 };
-    InferenceEngine::TBlob<float> blob({ Precision::FP32, size, CHW });
-    ASSERT_NE(blob.size(), 0);
-    ASSERT_EQ(blob.rwmap(), nullptr);
-}
-
-TEST_F(BlobTests, canAllocateBlob) {
-    InferenceEngine::SizeVector size = { 1, 1, 1 };
-    InferenceEngine::TBlob<float> blob({ Precision::FP32, size, CHW });
-    blob.allocate();
-    float* buffer = static_cast<float*>(blob.data());
-    ASSERT_NE(buffer, nullptr);
-}
-
-TEST_F(BlobTests, canDeallocateBlob) {
-    InferenceEngine::SizeVector size = { 1, 1, 1 };
-    InferenceEngine::TBlob<float> blob({ Precision::FP32, size, CHW });
-    blob.allocate();
-    blob.deallocate();
-    ASSERT_EQ(nullptr, blob.data().as<float*>());
-}
-
-TEST_F(BlobTests, canCreateBlobWithoutDims) {
-    InferenceEngine::TBlob<float> blob(TensorDesc(Precision::FP32, NCHW));
-    ASSERT_EQ(blob.getTensorDesc().getDims().size(), 0);
-}
-
-TEST_F(BlobTests, canReadDataFromConstBlob) {
-    InferenceEngine::TBlob<float> blob({ Precision::FP32, { 1, 1, 1 }, CHW });
-    blob.allocate();
-    blob.data()[0] = 1.0f;
-    InferenceEngine::TBlob<float> const blob2 = blob;
-    const float* buf = blob2.readOnly();
-    ASSERT_NE(buf, nullptr);
-}
-
-TEST_F(BlobTests, canMakeSharedBlob) {
-    InferenceEngine::SizeVector size = { 1, 1, 1 };
-    InferenceEngine::TBlob<float>::Ptr blob1 = InferenceEngine::make_shared_blob<float>(TensorDesc(Precision::FP32, NCHW));
-    InferenceEngine::TBlob<float>::Ptr blob2 = InferenceEngine::make_shared_blob<float>({ Precision::FP32, size, CHW });
-    InferenceEngine::TBlob<float>::Ptr blob3
-        = InferenceEngine::make_shared_blob<float>({ Precision::FP32, { 0 }, C });
-    ASSERT_EQ(blob1->size(), 0);
-    ASSERT_EQ(blob2->size(), 1);
-    ASSERT_EQ(blob3->size(), 0);
-}
-
-TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) {
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, Layout::NCHW);
-    ASSERT_THROW(InferenceEngine::make_shared_blob<float>(desc), InferenceEngine::details::InferenceEngineException);
-}
-
-TEST_F(BlobTests, canUseBlobInMoveSemantics) {
-
-    TBlob<float> b(TensorDesc(Precision::FP32, C));
-
-    b.getTensorDesc().setDims({3});
-    b.allocate();
-    b.data()[0] = 1.0f;
-    b.data()[1] = 2.0f;
-    b.data()[2] = 3.0f;
-
-    std::vector<float> dump;
-
-    for (const auto & e: b) {
-        dump.push_back(e);
-    }
-
-    ASSERT_EQ(dump.size(), 3);
-
-    ASSERT_EQ(dump[0], 1.0f);
-    ASSERT_EQ(dump[1], 2.0f);
-    ASSERT_EQ(dump[2], 3.0f);
-
-}
-
-TEST_F(BlobTests, DISABLED_canUseLockedMemoryAsRvalueReference) {
-
-    std::vector<float> dump;
-    std::vector<float> v({1.0f, 2.0f, 3.0f});
-    for (auto e: *make_shared_blob<float>(TensorDesc(Precision::FP32, C), &v[0], v.size())) {
-        dump.push_back(e);
-    }
-
-    ASSERT_EQ(dump.size(), 3);
-
-    ASSERT_EQ(dump[0], 1.0f);
-    ASSERT_EQ(dump[1], 2.0f);
-    ASSERT_EQ(dump[2], 3.0f);
-}
-
-TEST_F(BlobTests, canCreateBlobOnExistedMemory) {
-
-    float input[] = {0.1f, 0.2f, 0.3f};
-    {
-        auto  b = make_shared_blob<float>(TensorDesc(Precision::FP32, {1, 2}, HW), input);
-        auto i = b->begin();
-        ASSERT_NEAR(*i, 0.1, 0.00001);
-        i++;
-        ASSERT_NEAR(*i, 0.2, 0.00001);
-        i++;
-        ASSERT_EQ(i, b->end());
-
-        ASSERT_EQ(&*b->begin(), input);
-    }
-}
-
-
-TEST_F(BlobTests, canModifyDataInRangedFor) {
-
-    SizeVector v = {1,2,3};
-    TBlob<int> blob({ Precision::I32, v, CHW });
-    blob.allocate();
-
-    for (auto & data : blob) {
-        data = 5;
-    }
-
-    for(int i=0;i<v.size();i++) {
-        ASSERT_EQ(5, blob.data()[i]) << "Mismatch at" << i;
-    }
-}
-
-TEST_F(BlobTests, makeRoiBlobNchw) {
-    // we create main blob with NCHW layout. We will crop ROI from this blob.
-    SizeVector dims = {1, 3, 6, 5};  // RGB picture of size (WxH) = 5x6
-    Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, dims, NCHW));
-    blob->allocate();
-
-    // create ROI blob based on the already created blob
-    ROI roi = {0, 2, 1, 2, 4};  // cropped picture with: id = 0, (x,y) = (2,1), sizeX (W) = 2, sizeY (H) = 4
-    Blob::Ptr roiBlob = make_shared_blob(blob, roi);
-
-    // check that BlockingDesc is constructed properly for the ROI blob
-    SizeVector refDims = {1, 3, 4, 2};
-    SizeVector refOrder = {0, 1, 2, 3};
-    size_t refOffset = 7;
-    SizeVector refStrides = {90, 30, 5, 1};
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
-}
-
-TEST_F(BlobTests, makeRoiBlobNhwc) {
-    // we create main blob with NHWC layout. We will crop ROI from this blob.
-    SizeVector dims = {1, 3, 4, 8};  // RGB picture of size (WxH) = 8x4
-    Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, dims, NHWC));
-    blob->allocate();
-
-    // create ROI blob based on the already created blob
-    ROI roi = {0, 3, 2, 5, 2};  // cropped picture with: id = 0, (x,y) = (3,2), sizeX (W) = 5, sizeY (H) = 2
-    Blob::Ptr roiBlob = make_shared_blob(blob, roi);
-
-    // check that BlockingDesc is constructed properly for the ROI blob
-    SizeVector refDims = {1, 2, 5, 3};
-    SizeVector refOrder = {0, 2, 3, 1};
-    size_t refOffset = 57;
-    SizeVector refStrides = {96, 24, 3, 1};
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
-    ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
-}
-
-TEST_F(BlobTests, makeRoiBlobWrongSize) {
-    // we create main blob with NCHW layout. We will crop ROI from this blob.
-    SizeVector dims = {1, 3, 4, 4};  // RGB picture of size (WxH) = 4x4
-    Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, dims, NCHW));
-    blob->allocate();
-
-    // try to create ROI blob with wrong size
-    ROI roi = {0, 1, 1, 4, 4};  // cropped picture with: id = 0, (x,y) = (1,1), sizeX (W) = 4, sizeY (H) = 4
-    ASSERT_THROW(make_shared_blob(blob, roi), InferenceEngine::details::InferenceEngineException);
-}
-
 TEST_F(CompoundBlobTests, cannotCreateCompoundBlobFromNullptr) {
     Blob::Ptr valid = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {1, 3, 4, 4}, NCHW));
     EXPECT_THROW(make_shared_blob<CompoundBlob>(std::vector<Blob::Ptr>({valid, nullptr})),
@@ -769,8 +369,8 @@ TEST_F(I420BlobTests, cannotCreateI420BlobFromCompoundBlobs) {
     auto c_v_blob = make_cblob(v_blob);
     using ie_exception_t = InferenceEngine::details::InferenceEngineException;
 
-    EXPECT_THROW(make_shared_blob<I420Blob>(c_y_blob, u_blob,   v_blob  ), ie_exception_t);
-    EXPECT_THROW(make_shared_blob<I420Blob>(y_blob,   c_u_blob, v_blob  ), ie_exception_t);
+    EXPECT_THROW(make_shared_blob<I420Blob>(c_y_blob, u_blob,   v_blob), ie_exception_t);
+    EXPECT_THROW(make_shared_blob<I420Blob>(y_blob,   c_u_blob, v_blob), ie_exception_t);
     EXPECT_THROW(make_shared_blob<I420Blob>(y_blob,   u_blob,   c_v_blob), ie_exception_t);
 }
 
@@ -804,7 +404,6 @@ TEST_F(I420BlobTests, cannotCreateI420BlobFromPlanesWithWrongChannelNumber) {
     Blob::Ptr v_blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {1, 1, 3, 4}, NHWC));
     EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, u_blob, v_blob), InferenceEngine::details::InferenceEngineException);
     EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, v_blob, u_blob), InferenceEngine::details::InferenceEngineException);
-
 }
 
 TEST_F(I420BlobTests, cannotCreateI420BlobFromPlanesWithWrongWidthRatio) {
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
deleted file mode 100644
index 14d5d0339fc..00000000000
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin_set_preprocess.hpp"
-#include "cldnn_test_data.hpp"
-
-INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
-                        BehaviorPluginTestPreProcess,
-                        ValuesIn(supportedValues),
-                        getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
deleted file mode 100644
index 17369e12bf3..00000000000
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin_set_preprocess.hpp"
-#include "mkldnn_test_data.hpp"
-
-INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
-                        BehaviorPluginTestPreProcess,
-                        ValuesIn(requestsSupportedValues),
-                        getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp
deleted file mode 100644
index 0881e51dd57..00000000000
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin.h"
-
-using namespace std;
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-namespace {
-    std::string getTestCaseName(testing::TestParamInfo<BehTestParams> obj) {
-        return obj.param.device + "_" + obj.param.input_blob_precision.name()
-               + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : "");
-    }
-}
-
-TEST_P(BehaviorPluginTestPreProcess, SetPreProcessToInputInfo) {
-    InferenceEngine::Core core;
-
-    CNNNetwork cnnNetwork = core.ReadNetwork(GetParam().model_xml_str, GetParam().weights_blob);
-
-    auto &preProcess = cnnNetwork.getInputsInfo().begin()->second->getPreProcess();
-    preProcess.setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR);
-
-    InferenceEngine::IExecutableNetwork::Ptr exeNetwork;
-    ASSERT_NO_THROW(exeNetwork = core.LoadNetwork(cnnNetwork, GetParam().device, GetParam().config));
-
-    IInferRequest::Ptr inferRequest;
-    ASSERT_EQ(StatusCode::OK, exeNetwork->CreateInferRequest(inferRequest, &response));
-
-    {
-        ConstInputsDataMap inputsMap;
-        ASSERT_EQ(StatusCode::OK, exeNetwork->GetInputsInfo(inputsMap, &response));
-        const auto& name = inputsMap.begin()->second->name();
-        const PreProcessInfo *info;
-        inferRequest->GetPreProcess(name.c_str(), &info, &response);
-
-        ASSERT_EQ(info->getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR);
-        ASSERT_PREPROCESS_INFO_EQ(preProcess, *info);
-    }
-}
-
-TEST_P(BehaviorPluginTestPreProcess, SetPreProcessToInferRequest) {
-    TestEnv::Ptr testEnv;
-    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
-    ResponseDesc response;
-
-    auto& request = testEnv->inferRequest;
-    PreProcessInfo preProcessInfo;
-    preProcessInfo.setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR);
-
-    IInferRequest::Ptr untouched_request = testEnv->exeNetwork.CreateInferRequest();
-
-    ConstInputsDataMap inputs = testEnv->exeNetwork.GetInputsInfo();
-    auto input_name = inputs.begin()->second->name();
-    auto inputBlob = prepareInputBlob(GetParam().input_blob_precision, testEnv->inputDims);
-
-    ASSERT_EQ(StatusCode::OK, request->SetBlob(input_name.c_str(), inputBlob, preProcessInfo, &response));
-
-    {
-        const PreProcessInfo *info = nullptr;
-        ASSERT_EQ(StatusCode::OK, request->GetPreProcess(input_name.c_str(), &info, &response));
-        ASSERT_EQ(info->getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR);
-        ASSERT_PREPROCESS_INFO_EQ(preProcessInfo, *info);
-    }
-
-    {
-        const PreProcessInfo *info = nullptr;
-        ASSERT_EQ(StatusCode::OK, untouched_request->GetPreProcess(input_name.c_str(), &info, &response));
-        ASSERT_EQ(testEnv->network.getInputsInfo()[input_name]->getPreProcess().getResizeAlgorithm(),info->getResizeAlgorithm());
-    }
-}
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp
index 40eb1684d49..8740cd52cee 100644
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp
@@ -38,6 +38,7 @@ extern "C" void initialize_usb_boot();
 
 class AOTBehaviorTests : public BehaviorPluginTest {
  public:
+    WatchdogHndl_t* m_watchdogHndl = nullptr;
     typedef std::chrono::high_resolution_clock Time;
     typedef std::chrono::milliseconds ms;
 
@@ -48,6 +49,12 @@ class AOTBehaviorTests : public BehaviorPluginTest {
 
     void SetUp() override {
         initialize_usb_boot();
+
+        ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
+    }
+
+    void TearDown() override {
+        watchdog_destroy(m_watchdogHndl);
     }
 
     void dumpBlob() {
@@ -136,10 +143,15 @@ class AOTBehaviorTests : public BehaviorPluginTest {
         deviceDesc.protocol = NC_ANY_PROTOCOL;
         deviceDesc.platform = NC_ANY_PLATFORM;
 
-        statusOpen = ncDeviceOpen(&device, deviceDesc, 1000, pathToFw);
+        ncDeviceOpenParams_t deviceOpenParams = {};
+        deviceOpenParams.watchdogHndl = m_watchdogHndl;
+        deviceOpenParams.watchdogInterval = 1000;
+        deviceOpenParams.customFirmwareDirectory = pathToFw;
+
+        statusOpen = ncDeviceOpen(&device, deviceDesc, deviceOpenParams);
 
         if (statusOpen != NC_OK) {
-            ncDeviceClose(&device);
+            ncDeviceClose(&device, m_watchdogHndl);
             return false;
         }
 
@@ -174,7 +186,7 @@ TEST_P(AOTBehaviorTests, canLoadGraphWithoutPlugin) {
                                sizeof(ElfN_Ehdr) + sizeof(mv_blob_header));
 
     ncGraphDestroy(&graphHandle);
-    ncDeviceClose(&device);
+    ncDeviceClose(&device, m_watchdogHndl);
 
     ASSERT_EQ(NC_OK, res);
 }
@@ -195,7 +207,7 @@ TEST_P(AOTBehaviorTests, deviceSideErrorImportingIfVersionIncorrect) {
                                sizeof(ElfN_Ehdr) + sizeof(mv_blob_header));
 
     ncGraphDestroy(&graphHandle);
-    ncDeviceClose(&device);
+    ncDeviceClose(&device, m_watchdogHndl);
 
     ASSERT_NE(NC_OK, res);
 }
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp
index eec34b12bbf..8340d2a9065 100644
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp
@@ -37,11 +37,18 @@ extern "C" void initialize_usb_boot();
 class MYRIADWatchdog :  public BehaviorPluginTest,
                         public MyriadDevicesInfo {
  public:
+    WatchdogHndl_t* m_watchdogHndl = nullptr;
     typedef std::chrono::high_resolution_clock Time;
     typedef std::chrono::milliseconds ms;
 
     void SetUp() override {
         initialize_usb_boot();
+
+        ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
+    }
+
+    void TearDown() override {
+        watchdog_destroy(m_watchdogHndl);
     }
 
     struct DevicesState {
@@ -59,7 +66,7 @@ class MYRIADWatchdog :  public BehaviorPluginTest,
 
     ncDeviceHandle_t *device = nullptr;
     void resetOneDevice() {
-        ncDeviceClose(&device);
+        ncDeviceClose(&device, m_watchdogHndl);
         device = nullptr;
     }
 
@@ -77,10 +84,15 @@ class MYRIADWatchdog :  public BehaviorPluginTest,
         deviceDesc.protocol = NC_ANY_PROTOCOL;
         deviceDesc.platform = NC_ANY_PLATFORM;
 
-        statusOpen = ncDeviceOpen(&device, deviceDesc, watchdogInterval, pathToFw);
+        ncDeviceOpenParams_t deviceOpenParams = {};
+        deviceOpenParams.watchdogHndl = m_watchdogHndl;
+        deviceOpenParams.watchdogInterval = watchdogInterval;
+        deviceOpenParams.customFirmwareDirectory = pathToFw;
+
+        statusOpen = ncDeviceOpen(&device, deviceDesc, deviceOpenParams);
 
         if (statusOpen != NC_OK) {
-            ncDeviceClose(&device);
+            ncDeviceClose(&device, m_watchdogHndl);
         }
     }
 };
@@ -175,7 +187,7 @@ TEST_P(MYRIADWatchdog, watchDogIntervalDefault) {
         ExecutableNetwork ret;
         ctime = Time::now();
         ASSERT_THROW(ret = core.LoadNetwork(network, GetParam().device, {
-            {KEY_LOG_LEVEL, LOG_DEBUG}}),
+            {KEY_LOG_LEVEL, LOG_INFO}}),
             InferenceEngine::details::InferenceEngineException);
 
         ASSERT_BOOTED_DEVICES_ONE_MORE();
@@ -208,7 +220,7 @@ TEST_P(MYRIADWatchdog, canTurnoffWatchDogViaConfig) {
         ExecutableNetwork ret;
         ctime = Time::now();
         ASSERT_THROW(ret = core.LoadNetwork(network, GetParam().device, {
-            {KEY_LOG_LEVEL, LOG_DEBUG},
+            {KEY_LOG_LEVEL, LOG_INFO},
             {KEY_VPU_MYRIAD_WATCHDOG, NO}}),
             InferenceEngine::details::InferenceEngineException);
 
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
deleted file mode 100644
index 8fa9bbbd09f..00000000000
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin_set_preprocess.hpp"
-#include "vpu_test_data.hpp"
-
-INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
-                        BehaviorPluginTestPreProcess,
-                        ValuesIn(supportedValues),
-                        getTestCaseName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
index 6930beb4039..7cd7e473138 100644
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
@@ -15,18 +15,11 @@
 
 #include "network_i8.hpp"
 
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-#include "../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
-
 /*************************************************
  * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
  * All ref values was obtained from Caffe scoring
  * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
  *************************************************/
-#ifndef ENABLE_MKL_DNN
-#include "disable_tests.hpp"
-#endif
 
 TEST_P(ModelTransformationsTest, LPT) {}
 
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp
index af91077932d..f4ff8e9c498 100644
--- a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp
@@ -24,10 +24,7 @@
 #include "ie_util_internal.hpp"
 
 #include "cnn_network_ngraph_impl.hpp"
-
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-#include "../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
+#include <ie_system_conf.h>
 
 using namespace ::testing;
 using namespace InferenceEngine;
@@ -512,7 +509,7 @@ protected:
             if (transformationsParam.modelParams.referenceOutputDataWithTransformations.size() == 1) {
                 referenceValues = transformationsParam.modelParams.referenceOutputDataWithTransformations[0];
             } else {
-                referenceValues = Xbyak::util::Cpu().has(Xbyak::util::Cpu::tAVX512F) ?
+                referenceValues = InferenceEngine::with_cpu_x86_avx512f() ?
                                   transformationsParam.modelParams.referenceOutputDataWithTransformations[1] :
                                   transformationsParam.modelParams.referenceOutputDataWithTransformations[0];
             }
@@ -520,7 +517,7 @@ protected:
             if (transformationsParam.modelParams.referenceOutputDataWithoutTransformations.size() == 1) {
                 referenceValues = transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0];
             } else {
-                referenceValues = Xbyak::util::Cpu().has(Xbyak::util::Cpu::tAVX512F) ?
+                referenceValues = InferenceEngine::with_cpu_x86_avx512f() ?
                                   transformationsParam.modelParams.referenceOutputDataWithoutTransformations[1] :
                                   transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0];
             }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h
index 4f324e979cd..b4e8fc2d531 100644
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h
@@ -26,7 +26,7 @@ struct strided_slice_test_param {
 };
 
 class myriadLayersTestsStridedSlice_smoke: public myriadLayersTests_nightly,
-                                             public testing::WithParamInterface<strided_slice_test_param> {
+                                           public testing::WithParamInterface<strided_slice_test_param> {
 public:
     std::string model_t = R"V0G0N(
 <net Name="StridedSlice_net" version="2" precision="FP16" batch="1">
@@ -241,7 +241,7 @@ TEST_P(myriadLayersTestsStridedSlice_smoke, TestsStridedSlice) {
     // Load network.
     StatusCode st = GENERAL_ERROR;
     ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(
-        _exeNetwork, network, { {VPU_CONFIG_KEY(PERF_REPORT_MODE), VPU_CONFIG_VALUE(PER_STAGE)} },
+        _exeNetwork, network, { {VPU_CONFIG_KEY(DETECT_NETWORK_BATCH), CONFIG_VALUE(NO)} },
         &_resp));
     ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
     ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
@@ -297,4 +297,8 @@ static std::vector<strided_slice_test_param> s_stridedSliceParams = {
     strided_slice_test_param{ { 2, 8, 32, 32}, 4, { 0, 0, 0, 2 }, { 2, 8, 32, 32 }, { 1, 1, 1, 3 }, {}, {}, {}, {}, {}, { 2, 8, 32, 10 } },
     strided_slice_test_param{ { 1, 32, 128, 128 }, 4, {0, 0, 0, 0 }, { 1, 32, 128, 128 }, { 1, 2, 4, 8 }, {}, {}, {}, {}, {}, { 1, 16, 32, 16 } },
     strided_slice_test_param{ { 1, 32, 128, 128 }, 4, {0, 16, 0, 0 }, { 1, 32, 128, 128 }, {}, {}, {}, {}, {}, {}, { 1, 16, 128, 128 } },
+
+    strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, 9999 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 1000 } },
+    strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, -1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 1000 } },
+    strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, -3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 998 } },
 };
diff --git a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp
index 1aced5f00bb..16ea2aaee5c 100644
--- a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp
@@ -1445,22 +1445,29 @@ void ref_strided_slice(const InferenceEngine::Blob::Ptr& src,
     _begin_mask.insert(_begin_mask.end(), num_dims - _begin_mask.size(), 1);
     _end_mask.insert(_end_mask.end(), num_dims - _end_mask.size(), 1);
 
-    auto clip = [](int value, int min, int max) {
-        return std::min(std::max(min, value), max);
+    const auto numpyIdxVectorToIdxVector = [&num_dims, &src_dims](const std::vector<int32_t>& values) {
+        std::vector<int32_t> convertedDims(num_dims);
+        for (size_t i = 0; i < num_dims; i++) {
+            auto value = values[i];
+            if (value < 0) {
+                value = std::max<int32_t>(src_dims[i] + value + 1, 0);
+            }
+            value = std::min<int32_t>(src_dims[i], value);
+            convertedDims[i] = value;
+        }
+
+        return convertedDims;
     };
 
-    auto begin_dms = begin;
-    auto end_dms = end;
+    auto begin_dms = numpyIdxVectorToIdxVector(begin);
+    auto end_dms = numpyIdxVectorToIdxVector(end);
 
     for (size_t i = 0; i < num_dims; i++) {
         IE_ASSERT(_begin_mask[i] == 1 || _begin_mask[i] == 0);
         IE_ASSERT(_end_mask[i] == 1 || _end_mask[i] == 0);
 
-        begin_dms[i] = _begin_mask[i] ? begin[i] : 0;
-        begin_dms[i] = clip(begin_dms[i], 0, src_dims[i]);
-
-        end_dms[i] = _end_mask[i] ? end[i] : src_dims[i];
-        end_dms[i] = clip(end_dms[i], 0, src_dims[i]);
+        begin_dms[i] = _begin_mask[i] ? begin_dms[i] : 0;
+        end_dms[i] = _end_mask[i] ? end_dms[i] : src_dims[i];
 
         IE_ASSERT(begin_dms[i] >= 0 && begin_dms[i] < end_dms[i]);
         IE_ASSERT(end_dms[i] <= src_dims[i]);
diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt
index 0f6a70005f2..9f614a276c8 100644
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@@ -75,8 +75,6 @@ if (ENABLE_MKL_DNN)
             MKLDNN_TESTS_INCLUDE engines/mkldnn/graph/*.hpp)
     include_directories(
             ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include
-            ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/common
-            ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu
             engines/mkldnn/graph
             ${CMAKE_BINARY_DIR}/include/)
 
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp b/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp
index 53947cd87da..d4bdd1492f9 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp
@@ -193,6 +193,17 @@ TEST_F(FP32NonQuantizedTest, CropWithOffsetPropagateForwardWithSuccessOnCPU) {
         .called_with_input_and_expected_output(input_data, expected_result);
 }
 
+TEST_F(FP32NonQuantizedTest, CropWithOffsetAndSecondDimPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {7.0, 7.0, 7.0, 7.0, 7.0,
+                                          7.0, 7.0, 7.0, 7.0, 7.0};
+
+    assert_that().onInferModel(cropWithOffsetAndSecondDimModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
 TEST_F(FP32NonQuantizedTest, CropWithMaxOffsetPropagateForwardWithSuccessOnCPU) {
     std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                                      0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
@@ -368,7 +379,8 @@ TEST_F(FP32NonQuantizedTest, TI1PropagateForwardWithoutScaleShift) {
             .called_with_input(input_data).equals_to(expected_result1).equals_to(expected_result2);
 }
 
-TEST_F(FP32NonQuantizedTest, TI1AlignedPropagateForward) {
+// DISABLED DUE TO (31901)
+TEST_F(FP32NonQuantizedTest, DISABLED_TI1AlignedPropagateForward) {
     std::vector<float> input_data(32, 0.1f);
     std::vector<float> expected_result1(32, 0.25883245);
     std::vector<float> expected_result2(12, 0.59515548f);
@@ -378,7 +390,8 @@ TEST_F(FP32NonQuantizedTest, TI1AlignedPropagateForward) {
             .called_with_input(input_data).equals_to(expected_result1).And().equals_to(expected_result2);
 }
 
-TEST_F(FP32NonQuantizedTest, TI3AlignedPropagateForward) {
+// DISABLED DUE TO (31901)
+TEST_F(FP32NonQuantizedTest, DISABLED_TI3AlignedPropagateForward) {
     std::vector<float> input_data(96, 0.1f);
     std::vector<float> expected_result1(32, 0.42592844f);
     std::vector<float> expected_result2(12, 0.97069889f);
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp
index f02c111766c..f34fe2668a1 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp
@@ -2980,6 +2980,60 @@ std::string cropWithOffsetModel() {
     )V0G0N";
 }
 
+std::string cropWithOffsetAndSecondDimModel() {
+    return R"V0G0N(
+<Net Name="cropWithOffsetModel" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input_1" type="input" id="0" precision="FP32">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>20</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="Crop1" type="Crop" id="1" precision="FP32">
+            <data axis="0,1" dim="1,10" offset="0,5"/>
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>20</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="FullyConnected1" id="2" type="InnerProduct" precision="FP32">
+            <fc out-size="10" />
+            <biases offset="0" size="40" />
+            <weights offset="40" size="400" />
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="0" />
+    </edges>
+</Net>
+)V0G0N";
+}
+
+
 std::string cropWithMaxOffsetModel() {
     return R"V0G0N(
     <Net Name="cropWithOffsetModel" version="2" precision="FP32" batch="1">
@@ -4734,10 +4788,10 @@ std::string TIModelWithLSTMCell1WithoutScaleShift() {
 								<dim>32</dim>
 							</port>
 						</output>
-						<blobs>
-							<weights offset="1724" size="32768"/>
-							<biases offset="34492" size="4096"/>
-						</blobs>
+                        <blobs>
+                            <weights offset="1724" size="32768"/>
+                            <biases offset="34492" size="512"/>
+                        </blobs>
 					</layer>
 				</layers>
 				<edges>
@@ -4868,7 +4922,7 @@ std::string TIModelWithLSTMCell1WithoutScaleShift() {
 				</port>
 			</output>
 			<port_map>
-				<input axis="0" external_port_id="0" internal_layer_id="0" internal_port_id="0" start="0"/>
+				<input axis="1" external_port_id="0" internal_layer_id="0" internal_port_id="0" start="0"/>
 				<input external_port_id="1" internal_layer_id="1" internal_port_id="1"/>
 				<input external_port_id="2" internal_layer_id="1" internal_port_id="2"/>
 				<output external_port_id="3" internal_layer_id="1" internal_port_id="5"/>
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp
index 5d52e1a8ccb..bd0c995903e 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp
@@ -43,6 +43,7 @@ std::string TFSoftsignUnfoldedModel();
 std::string cropWithoutOffsetModel();
 std::string cropWithAlignedOffsetModel();
 std::string cropWithOffsetModel();
+std::string cropWithOffsetAndSecondDimModel();
 std::string cropWithMaxOffsetModel();
 std::string cropWithOffsetExtendedModel();
 std::string twoCropsModel();
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp
index 99d1212ef17..9c91c9cdc64 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp
@@ -2,14 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
 #include <ie_iextension.h>
 #include <ie_core.hpp>
 #include <ie_common.h>
 #include <ie_layers.h>
-#include <tests_common.hpp>
-#include <mkldnn_extension_mngr.h>
 #include "graph/test_graph.hpp"
+#include <tests_common.hpp>
 
 using namespace ::testing;
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
index 7e986aba103..f4c20b088ef 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
@@ -2,10 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp
index b12376aa96e..3f0a9a9f6c8 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp
@@ -2,15 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 #include <ie_core.hpp>
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 
 #include <algorithm>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp
index 18703e86867..5cbfc981d49 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
index df2d3c5e247..cd1839f5438 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
@@ -7,7 +7,6 @@
 #include <map>
 #include <memory>
 #include <algorithm>
-#include "nodes/list.hpp"
 #include "nodes/base.hpp"
 
 using namespace InferenceEngine;
@@ -92,7 +91,7 @@ class FakeExtensions : public IExtension {
     }
 };
 
- class FakeLayerPLNImpl: public Cpu::ExtLayerBase {
+class FakeLayerPLNImpl: public Cpu::ExtLayerBase {
 public:
     explicit FakeLayerPLNImpl(const CNNLayer* layer) {
         try {
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
index d2d5bc9e24e..944f724251c 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
index bb5e607a8d0..68d28f26706 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
@@ -2,10 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
index 4f4b00c9dc5..132bbbcf705 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
@@ -2,15 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include <ie_iextension.h>
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
-#include <mkldnn_extension_mngr.h>
 #include "tests_common.hpp"
 
 #include "unit_test_utils/mocks/mock_error_listener.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp
index 9d5c1902abd..e7e900a7d01 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp
@@ -2,14 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
 
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp
index 32bf55eae13..4a103227b7a 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
index 570abc94a0e..930fea530ce 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
@@ -2,13 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-
-#include "common_test_utils/data_utils.hpp"
-#include "mkldnn_graph.h"
 #include "test_graph.hpp"
+#include "common_test_utils/data_utils.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
index 42aa7d5c9cf..ee0643916a8 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
@@ -2,30 +2,23 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include "ir_gen_helper.hpp"
 #include <ie_core.hpp>
 
 #include <nodes/base.hpp>
-#include <cpu_isa_traits.hpp>
+#include <ie_system_conf.h>
 
 using namespace InferenceEngine;
 using namespace ::testing;
 using namespace std;
-using namespace mkldnn;
 using namespace single_layer_tests;
 
 using namespace Extensions;
 using namespace ::Cpu;
-using namespace mkldnn::impl;
 
 struct mvn_test_params {
     vector<size_t> dims;
@@ -416,7 +409,7 @@ public:
             InferenceEngine::Precision precision = data_desc.getPrecision();
             Layout layout;
             if (is_blocked) {
-                int blk_size = cpu::mayiuse(cpu::avx512_common) ? 16 : 8;
+                int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8;
 
                 std::vector<size_t> blocks = data_dims;
                 std::vector<size_t> order(blocks.size());
@@ -446,8 +439,6 @@ public:
     }
 };
 
-REG_FACTORY_FOR(Cpu::ImplFactory<FakeLayerImpl_MVN>, FakeLayer_MVN);
-
 class MKLDNNCPUExtMVNTests_Blocked: public TestsCommon, public WithParamInterface<mvn_test_params> {
     std::string layers_t = R"V0G0N(
         <layer name="fakeLayer1" id="1" type="FakeLayer_MVN">
@@ -534,7 +525,16 @@ protected:
             ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
 
             MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
+            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+            {
+                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+                defaultExt->AddExt("FakeLayer_MVN",
+                    [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+                                    return new Cpu::ImplFactory<FakeLayerImpl_MVN>(layer);
+                                });
+                manager->AddExtension(defaultExt);
+            }
+            graph.CreateGraph(network, manager);
 
             auto& nodes = graph.getNodes();
             nodes = graph.getNodes();
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
index 9643fa814fb..063033cca71 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp
index de0caf8c198..f8063517593 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp
@@ -2,32 +2,26 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include "common_test_utils/data_utils.hpp"
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
+
+#include "common_test_utils/data_utils.hpp"
 #include "ir_gen_helper.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
 #include <nodes/base.hpp>
-#include <cpu_isa_traits.hpp>
+#include <ie_system_conf.h>
 
 
 using namespace InferenceEngine;
 using namespace ::testing;
 using namespace std;
-using namespace mkldnn;
 using namespace single_layer_tests;
 
 using namespace Extensions;
 using namespace ::Cpu;
-using namespace mkldnn::impl;
 
 struct normalize_test_params {
     struct {
@@ -367,7 +361,7 @@ public:
             InferenceEngine::Precision precision = data_desc.getPrecision();
             Layout layout;
             if (is_blocked) {
-                int blk_size = cpu::mayiuse(cpu::avx512_common) ? 16 : 8;
+                int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8;
 
                 std::vector<size_t> blocks = data_dims;
                 std::vector<size_t> order(blocks.size());
@@ -397,8 +391,6 @@ public:
     }
 };
 
-REG_FACTORY_FOR(Cpu::ImplFactory<FakeLayerImpl_Normalize>, FakeLayer_Normalize);
-
 class MKLDNNCPUExtNormalizeTests_Blocked: public TestsCommon, public WithParamInterface<normalize_test_params> {
     std::string model_t = R"V0G0N(
         <layer name="fakeLayer1" id="1" type="FakeLayer_Normalize">
@@ -516,7 +508,16 @@ protected:
             ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
 
             MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
+            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+            {
+                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+                defaultExt->AddExt("FakeLayer_Normalize",
+                    [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+                                    return new Cpu::ImplFactory<FakeLayerImpl_Normalize>(layer);
+                                });
+                manager->AddExtension(defaultExt);
+            }
+            graph.CreateGraph(network, manager);
 
             auto& nodes = graph.getNodes();
             nodes = graph.getNodes();
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
index 9c22a2afed8..3dca9308d33 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 
 #include "single_layer_common.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
index 0a55c5388b4..8faef838f06 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
index c172334d431..121294e42ab 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp
index def0f7dfe51..e46a14b7f31 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp
@@ -2,29 +2,22 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include "ir_gen_helper.hpp"
 #include <ie_core.hpp>
 
 #include <nodes/base.hpp>
-#include <cpu_isa_traits.hpp>
 
 using namespace InferenceEngine;
 using namespace ::testing;
 using namespace std;
-using namespace mkldnn;
 using namespace single_layer_tests;
 
 using namespace Extensions;
 using namespace ::Cpu;
-using namespace mkldnn::impl;
 
 struct resample_test_params {
     std::vector<size_t> in_dims;
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
index 926d8f2eb96..4219168c096 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
index 1ab0823779c..5a5413853a8 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp
index a154be8b9c3..16638f05d45 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
index ce4a30c6e97..16a38d61ef6 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
index 7510958a262..33400465e0b 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp
index adde6ece6f2..b545bd79064 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp
index cb820b4728c..755d28d33f3 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp
index 3da2b0cbcfe..9516b9d6200 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
index 0a3e12112c9..f1e231b24e1 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
index 75a53e7e094..2e89f20de76 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <stdio.h>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
index 962af31d239..962515cf636 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
index 2a235b71ad9..a9c1db717d3 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
@@ -2,13 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
 #include "test_graph.hpp"
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
index b7edba06ff6..8f409cd85c5 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
@@ -2,11 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include <cnn_network_impl.hpp>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
index 2faade07eac..750f952f760 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
 #include "tests_common.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
index 2e5d388b414..d44cc2d0dd6 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include <unordered_set>
 #include <cnn_network_impl.hpp>
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
index b81239450c7..3102143798c 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
@@ -2,21 +2,13 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
-
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-#include "../../../../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
+#include <ie_system_conf.h>
 
 using namespace InferenceEngine;
 using namespace ::testing;
@@ -278,11 +270,8 @@ protected:
                         p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
                     }
                     ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    Xbyak::util::Cpu cpu;
-                    if (cpu.has(Xbyak::util::Cpu::tAVX512F)
-                            && cpu.has(Xbyak::util::Cpu::tAVX512BW)
-                            && cpu.has(Xbyak::util::Cpu::tAVX512VL)
-                            && cpu.has(Xbyak::util::Cpu::tAVX512DQ)
+                    if (InferenceEngine::with_cpu_x86_avx512f() &&
+                            InferenceEngine::with_cpu_x86_avx512_core()
                             && !p.preferTypes.empty()
                             && p.preferTypes[0] == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd) {
                         isWino = true;
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp
index d06993d0060..60d6b1b02cd 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
index d85b94c13a7..738f0068a08 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "ir_gen_helper.hpp"
 #include "tests_common.hpp"
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
index 2c115859312..57170941a48 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
@@ -2,13 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
 #include "test_graph.hpp"
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
index 4e4dfbf927d..1c311cb3d06 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
@@ -6,12 +6,10 @@
 #define NOMINMAX
 #endif
 
-#include <ie_plugin_config.hpp>
-#include "common_test_utils/data_utils.hpp"
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
+#include <ie_plugin_config.hpp>
+#include "common_test_utils/data_utils.hpp"
 #include "single_layer_common.hpp"
 #include <mkldnn_extension_utils.h>
 #include <cnn_network_impl.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
index 5c4da1c7f5b..b940a2694bb 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
index 53b9c05d011..de920b21353 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
index 27ea6569ad3..ee69f83594a 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
@@ -2,13 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
index 169b27de475..17838b3447d 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
@@ -2,15 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <mkldnn_plugin.h>
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-#include "mkldnn_exec_network.h"
-#include <ie_core.hpp>
-
 #include "test_graph.hpp"
 
+#include <mkldnn_plugin.h>
+#include "mkldnn_exec_network.h"
+#include <ie_core.hpp>
 #include <mkldnn_extension_utils.h>
 #include <config.h>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
index b6a65d6895b..3ecbb67598b 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
index 577222f1326..21d588768ec 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <nodes/base.hpp>
 
@@ -97,8 +91,6 @@ public:
     }
 };
 
-REG_FACTORY_FOR(Cpu::ImplFactory<FakeLayerImpl_permute>, FakeLayer_permute);
-
 static std::string precToStr (Precision prec) {
     return prec == Precision::I8 ? "I8" : "FP32";
 }
@@ -260,7 +252,16 @@ protected:
             ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
 
             MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
+            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+            {
+                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+                defaultExt->AddExt("FakeLayer_permute",
+                    [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+                                    return new Cpu::ImplFactory<FakeLayerImpl_permute>(layer);
+                                });
+                manager->AddExtension(defaultExt);
+            }
+            graph.CreateGraph(network, manager);
             auto& nodes = graph.getNodes();
             for (int i = 0; i < nodes.size(); i++) {
                 if (nodes[i]->getType() == MKLDNNPlugin::Permute) {
@@ -556,9 +557,18 @@ protected:
             InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
             ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
 
+            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+            {
+                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+                defaultExt->AddExt("FakeLayer_permute",
+                    [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+                                    return new Cpu::ImplFactory<FakeLayerImpl_permute>(layer);
+                                });
+                manager->AddExtension(defaultExt);
+            }
             MKLDNNGraphTestClass graph;
             graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
+            graph.CreateGraph(network, manager);
 
             InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.dims, InferenceEngine::TensorDesc::getLayoutByDims(p.dims)});
             src->allocate();
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
index a9ccb11b1b7..062fc1dd6e2 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
@@ -6,18 +6,11 @@
 #define NOMINMAX
 #endif
 
-#include <ie_plugin_config.hpp>
-
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
+#include <ie_plugin_config.hpp>
 #include "single_layer_common.hpp"
 #include <ie_layers.h>
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include "ir_gen_helper.hpp"
 #include <math.h>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
index f382a2e33f0..7c1a92b85fb 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
index a234a2de906..4fcea93cc4a 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
index d7c1139fe49..f47d245540d 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
@@ -2,12 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
-#include <mkldnn_extension_mngr.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
index 7ffadd25a21..ea2695ce71a 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
@@ -2,14 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-#include "single_layer_common.hpp"
 
 #include "test_graph.hpp"
 
-#include <mkldnn_extension_utils.h>
+#include "single_layer_common.hpp"
 #include "tests_common.hpp"
 
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
index 01ef525daf9..b1b87db2cb5 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_system_conf.h>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
index e7ea4f2cc5b..c1860f09b13 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
@@ -2,15 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 #include <ie_core.hpp>
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 
 using namespace ::testing;
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
index af696c07262..3d80459220f 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
index f1e4138d0ca..237a70d6a0f 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
index 4fb88dc74a8..cf856581879 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
@@ -2,15 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp
index 73f02e71ecc..c1c4c87820a 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp
@@ -2,13 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include "ir_gen_helper.hpp"
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
index 7dddde6ed34..3752e32c5a3 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
@@ -2,14 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "common_test_utils/data_utils.hpp"
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
+#include "common_test_utils/data_utils.hpp"
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp
index 8768b78756b..6c500cd419f 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include "ir_gen_helper.hpp"
 #include <ie_core.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
index 5a81b3e7aa8..e96cb0af41f 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
@@ -2,14 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
 #include "test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
 #include <ie_plugin_config.hpp>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
index e6764badb30..2f14f18770a 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
@@ -2,16 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
+#include "../test_graph.hpp"
 
 #include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
 #include <mkldnn_extension_mngr.h>
 #include "tests_common.hpp"
 #include <ie_core.hpp>
-#include "../test_graph.hpp"
 
 
 using namespace ::testing;
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
index 598a4f2a62f..e5d2c3bc287 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
@@ -2,15 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gtest/gtest.h>
+#include "../test_graph.hpp"
 #include "mkldnn_exec_network.h"
 
-#include <mkldnn_extension_utils.h>
 #include "tests_common.hpp"
-#include "../test_graph.hpp"
-#include <ie_ir_reader.hpp>
 #include <ie_core.hpp>
-#include <ie_system_conf.h>
 
 #include <ngraph/ngraph.hpp>
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
index 8c4a8fb2f24..815f82defdf 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
@@ -4,8 +4,21 @@
 
 #pragma once
 
-#include <nodes/list.hpp>
+// WA for windows.h
+#ifdef _WIN32
+# ifndef NOMINMAX
+#  define NOMINMAX
+# endif
+# ifndef _WINSOCKAPI_
+#  define _WINSOCKAPI_
+# endif
+# ifndef _WINSOCK2API_
+#  define _WINSOCK2API_
+# endif
+#endif
+
 #include <gtest/gtest.h>
+#include <nodes/list.hpp>
 #include <mkldnn_graph.h>
 #include <mkldnn_memory.h>
 #include <mkldnn_extension_utils.h>
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp
index 3d331635055..a307c295106 100644
--- a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp
@@ -7,136 +7,93 @@
 #include <tests_common.hpp>
 #include <watchdog/watchdog.h>
 #include <watchdog/watchdogPrivate.hpp>
-#include <mvnc/include/ncPrivateTypes.h>
 #include <thread>
 
 using namespace ::testing;
 using namespace InferenceEngine;
 
+using ms = std::chrono::milliseconds;
+
 class MockWatchdogDevice : public Watchdog::IDevice {
  public:
     using time_point  = Watchdog::IDevice::time_point;
-    MOCK_QUALIFIED_METHOD1(setInterval, noexcept, void(const std::chrono::milliseconds));
     MOCK_QUALIFIED_METHOD1(keepAlive, noexcept, void(const time_point &));
     MOCK_QUALIFIED_METHOD1(dueIn, const noexcept, std::chrono::milliseconds (const time_point &current_time));
     MOCK_QUALIFIED_METHOD0(isTimeout, const noexcept, bool ());
     MOCK_QUALIFIED_METHOD0(getHandle, const noexcept, void* ());
 };
 
-struct wd_context_opaque_private {
-    void * magic = reinterpret_cast<void *> (0xdeadbeaf);
-    Watchdog::IDevice * actual = nullptr;
-    bool   destroyed = false;
-};
-
-
 class MVNCWatchdogTests: public TestsCommon {
  protected:
-    devicePrivate_t d;
-    wd_context ctx, ctx1;
+    WatchdogHndl_t* m_watchdogHndl = nullptr;
+    WdDeviceHndl_t deviceHndl, deviceHndl1;
     StrictMock<MockWatchdogDevice> mockWatchee, mockWatchee1;
-    wd_context_opaque_private opaque, opaque1;
 
     void SetUp() override {
-        opaque.actual = &mockWatchee;
-        ctx.opaque = &opaque;
+        deviceHndl.m_device = &mockWatchee;
+        deviceHndl1.m_device = &mockWatchee1;
 
-        opaque1.actual = &mockWatchee1;
-        ctx1.opaque = &opaque1;
-
-        pthread_mutex_init(&d.dev_stream_m, nullptr);
+        ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
     }
+
     void TearDown() override {
-        pthread_mutex_destroy(&d.dev_stream_m);
+        watchdog_destroy(m_watchdogHndl);
+    }
+
+    void setExpectations(StrictMock<MockWatchdogDevice>& mock){
+        EXPECT_CALL(mock, keepAlive(_)).Times(AtLeast(0));
+        EXPECT_CALL(mock, dueIn(_)).WillRepeatedly(Return(ms(20000)));
+        EXPECT_CALL(mock, isTimeout()).WillRepeatedly(Return(false));
+        EXPECT_CALL(mock, getHandle()).WillRepeatedly(Return(&mock));
     }
 };
-using ms = std::chrono::milliseconds;
 
 TEST_F(MVNCWatchdogTests, canRegisterExternalWatchee) {
+    setExpectations(mockWatchee);
 
-    int handle = 1;
-    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
-    // do not expect that  any ping happened before we remove the thread
-    // this can be changed for example registering succeed only if first ping succeed
-    EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(0));
-    EXPECT_CALL(mockWatchee, setInterval(ms(1))).Times(1);
-    EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false));
-    EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000)));
-
-    d.wd_interval = 1;
-
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
     // allowing thread spin
     std::this_thread::sleep_for(std::chrono::milliseconds(1000));
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
 }
 
-// TODO: implement logic
-TEST_F(MVNCWatchdogTests, DISABLED_removeDeviceIfXLINKSessionNotIninitialized) {
-
-    d.wd_interval = 10;
-    ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
-    ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
-}
-
-#if defined(__APPLE__) && !defined(NDEBUG)
-TEST_F(MVNCWatchdogTests, DISABLED_canNotBeRegisteredTwice) {
-#else
 TEST_F(MVNCWatchdogTests, canNotBeRegisteredTwice) {
-#endif
+    setExpectations(mockWatchee);
 
-    d.wd_interval = 10;
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
+    ASSERT_NE(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
-    ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
     // allowing thread spin
     std::this_thread::sleep_for(std::chrono::milliseconds(1000));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
 }
 
-TEST_F(MVNCWatchdogTests, canUnRegisterNotInitialized) {
+TEST_F(MVNCWatchdogTests, canNotUnRegisterNotInitialized) {
+    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+    ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
 }
 
-TEST_F(MVNCWatchdogTests, canUnRegisterIfInterval0) {
-
-    d.wd_interval = 0;
-
-    ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
-    ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
-}
-
-#if defined(__APPLE__) && !defined(NDEBUG)
-TEST_F(MVNCWatchdogTests, DISABLED_failUnRegisterTwice) {
-#else
 TEST_F(MVNCWatchdogTests, failUnRegisterTwice) {
-#endif
+    setExpectations(mockWatchee);
 
-    d.wd_interval = 10;
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
     // allowing thread spin
     std::this_thread::sleep_for(std::chrono::milliseconds(1000));
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
-    ASSERT_NE(WD_ERRNO, watchdog_unregister_device(&ctx));
+
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
+    ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
 }
 
 TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) {
-    int handle = 1;
     int x = 0;
     int y = 0;
     int z = 0;
-    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
+    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
     EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(1));
-    EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1);
     EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Invoke([&z, &y]() {
         // will sleep at least 100 ms and avoid second keep alive call
         y = 100;
@@ -151,9 +108,8 @@ TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) {
         return std::chrono::milliseconds(y);
     }));
 
-    EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle));
+    EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1));
     EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(AtLeast(2));
-    EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1);
     EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Invoke([&x]() {
         // allow every second time to wait
         x = x == 0 ? 100 : 0;
@@ -163,201 +119,134 @@ TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) {
         return std::chrono::milliseconds(x);
     }));
 
-
-    d.wd_interval = 10;
-
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d));
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl1));
 
     std::this_thread::sleep_for(ms(1000));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1));
-}
-
-TEST_F(MVNCWatchdogTests, canNotStartWatchdogIfIntervalInvalid) {
-
-    opaque.actual = &mockWatchee;
-
-    int handle = 1;
-
-    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
-
-    d.wd_interval = 0;
-    ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
-    d.wd_interval = -1;
-    ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
-    // if fo some reason thread started we will get unxpected updatePongInterval calls
-    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl1));
 }
 
 TEST_F(MVNCWatchdogTests, canGetPingsOnRegularBasis) {
-
-    int handle = 1;
     int x = 0;
-    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
+    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
     // since interval is small keepAlive can happen several times once
     EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(2));
-    EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1);
     EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false));
     EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Invoke([&x](const MockWatchdogDevice::time_point &current_time){
         x = x == 0 ? 100 : 0;
         return std::chrono::milliseconds(x);
     }));
 
-
-    d.wd_interval = 10;
-
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
 
     std::this_thread::sleep_for(ms(1000));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
 }
 
 TEST_F(MVNCWatchdogTests, canWakeUpWatchdogWhenAddAndRemoveDevice) {
-
-    int handle = 1, handle1 = 2;
-
-    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
+    EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
     EXPECT_CALL(mockWatchee, keepAlive(_)).Times(1);
-    EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1);
     EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false));
     // without wake this will sleep for ever
     EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000)));
 
-    EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle1));
+    EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1));
     EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(1);
-    EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1);
     EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Return(false));
     EXPECT_CALL(mockWatchee1, dueIn(_)).WillRepeatedly(Return(ms(20000)));
 
-
-    d.wd_interval = 10;
-
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl));
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d));
-
+    ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl1));
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
-    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1));
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl));
+    ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl1));
 }
 
 TEST_F(MVNCWatchdogTests, stressWatchDog) {
-
     const int num_watchdog_device = 10;
-
-    watchdog_init_context(nullptr);
-
     StrictMock<MockWatchdogDevice> mockWatchee[num_watchdog_device];
-    int handle[num_watchdog_device];
-    wd_context ctx[num_watchdog_device];
-    wd_context_opaque_private opaque[num_watchdog_device];
+    WdDeviceHndl_t deviceHndl[num_watchdog_device];
 
     for (int i = 0; i != num_watchdog_device; i++) {
-        handle[i] = i;
-
-        EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i));
+        EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i]));
         // since interval is big keepAlive happens only once
         EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1);
 
-        EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1);
         EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false));
         EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000)));
+
+        deviceHndl[i].m_device = &mockWatchee[i];
     }
 
-    d.wd_interval = 10;
-
     for (int k = 0; k != num_watchdog_device; k++) {
-        opaque[k].actual = &mockWatchee[k];
-        ctx[k].opaque = &opaque[k];
-        ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+        ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
     for (int k = 0; k != num_watchdog_device; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(100));
 }
 
 TEST_F(MVNCWatchdogTests, stressWatchDog1) {
-
     const int num_watchdog_device = 10;
     const int num_watchdog_device_half = num_watchdog_device / 2;
 
-    watchdog_init_context(nullptr);
-
     StrictMock<MockWatchdogDevice> mockWatchee[num_watchdog_device];
-    int handle[num_watchdog_device];
-    wd_context ctx[num_watchdog_device];
-    wd_context_opaque_private opaque[num_watchdog_device];
+    WdDeviceHndl_t deviceHndl[num_watchdog_device];
 
     for (int i = 0; i != num_watchdog_device; i++) {
-        handle[i] = i;
-
-        EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i));
+        EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i]));
         // since interval is big keepAlive happens only once
         EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1);
 
-        EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1);
         EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false));
         EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000)));
-    }
 
-    d.wd_interval = 10;
-    for (int k = 0; k != num_watchdog_device; k++) {
-        opaque[k].actual = &mockWatchee[k];
-        ctx[k].opaque = &opaque[k];
+        deviceHndl[i].m_device = &mockWatchee[i];
     }
 
     for (int k = 0; k != num_watchdog_device_half; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+        ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
     for (int k = 0; k != num_watchdog_device_half; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k + num_watchdog_device_half], &d));
+        ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl[k + num_watchdog_device_half]));
         std::this_thread::sleep_for(std::chrono::milliseconds(20));
-        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl[k]));
         std::this_thread::sleep_for(std::chrono::milliseconds(20));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
     for (int k = 0; k != num_watchdog_device_half; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k + num_watchdog_device_half]));
+        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl[k + num_watchdog_device_half]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(100));
 }
 
 TEST_F(MVNCWatchdogTests, stressWatchDog2) {
-
     const int num_watchdog_device = 30;
     const int num_watchdog_device_half1 = num_watchdog_device / 3;
     const int num_watchdog_device_half2 = 2 * num_watchdog_device / 3;
 
-    watchdog_init_context(nullptr);
-
     StrictMock<MockWatchdogDevice> mockWatchee[num_watchdog_device];
-    int handle[num_watchdog_device];
-    wd_context ctx[num_watchdog_device];
-    wd_context_opaque_private opaque[num_watchdog_device];
+    WdDeviceHndl_t deviceHndl[num_watchdog_device];
 
     for (int i = 0; i != num_watchdog_device; i++) {
-        handle[i] = i;
-
-        EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i));
+        EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i]));
 
         // since interval is big keepAlive happens only once
         if (i >= num_watchdog_device_half2) {
@@ -366,41 +255,36 @@ TEST_F(MVNCWatchdogTests, stressWatchDog2) {
             EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1);
         }
 
-        EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1);
         EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false));
         EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000)));
-    }
 
-    d.wd_interval = 10;
-    for (int k = 0; k != num_watchdog_device; k++) {
-        opaque[k].actual = &mockWatchee[k];
-        ctx[k].opaque = &opaque[k];
+        deviceHndl[i].m_device = &mockWatchee[i];
     }
 
     for (int k = 0; k != num_watchdog_device_half1; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+        ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
     for (int k = 0; k != num_watchdog_device_half1; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(2000));
 
     for (int k = num_watchdog_device_half1; k != num_watchdog_device_half2; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+        ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl[k]));
         //this might lead to UB, for example thread might restart but after that device get removed, so giving more time
         std::this_thread::sleep_for(std::chrono::milliseconds(1000));
-        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     for (int k = num_watchdog_device_half2; k != num_watchdog_device; k++) {
-        ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+        ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl,  &deviceHndl[k]));
         //this might lead to UB, for example thread might restart but after that device get removed, so giving more time
         //so our expectations for number of calls are not set for last third
-        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+        ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl,  &deviceHndl[k]));
     }
 
     std::this_thread::sleep_for(std::chrono::milliseconds(3000));
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/xlink_device_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/xlink_device_tests.cpp
new file mode 100644
index 00000000000..369d291540f
--- /dev/null
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/xlink_device_tests.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <tests_common.hpp>
+#include <watchdog/xlink_device.h>
+#include <mvnc/include/ncPrivateTypes.h>
+
+using namespace ::testing;
+using namespace InferenceEngine;
+
+class XLinkDeviceTests: public TestsCommon {};
+class XLinkDeviceTestsWithParam: public TestsCommon, public testing::WithParamInterface<int> {};
+
+TEST_F(XLinkDeviceTests, shouldCreateXlinkDevice) {
+    devicePrivate_t devicePrivate = {0};
+    devicePrivate.wd_interval = 1;
+
+    WdDeviceHndl_t* deviceHndl = nullptr;
+    ASSERT_EQ(WD_ERRNO, xlink_device_create(&deviceHndl, &devicePrivate));
+
+    xlink_device_destroy(deviceHndl);
+}
+
+TEST_P(XLinkDeviceTestsWithParam, shouldNotCreateXlinkDeviceWithInvalidInterval) {
+    devicePrivate_t devicePrivate = {0};
+    devicePrivate.wd_interval = GetParam();
+
+    WdDeviceHndl_t* deviceHndl = nullptr;
+    ASSERT_NE(WD_ERRNO, xlink_device_create(&deviceHndl, &devicePrivate));
+
+    xlink_device_destroy(deviceHndl);
+}
+
+INSTANTIATE_TEST_CASE_P(WatchdogDevice,
+    XLinkDeviceTestsWithParam,
+    testing::Values(0, -1, -WATCHDOG_MAX_PING_INTERVAL_MS));
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h b/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h
index 8109a1bc240..2e146fb307f 100644
--- a/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h
@@ -20,5 +20,7 @@ public:
     MOCK_QUALIFIED_METHOD0(AvailableDevicesNames, const, std::vector<std::string>());
     MOCK_QUALIFIED_METHOD0(AvailableDevicesDesc, const, std::vector<ncDeviceDescr_t>());
 
+    MOCK_METHOD0(watchdogHndl, WatchdogHndl_t*());
+
     ~MvncStub() = default;
 };
diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt
index 1cb62b21a2c..f94453e060b 100644
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@@ -59,11 +59,11 @@ function(build_with_lto)
         ie_developer_export_targets(pugixml_mt)
         set_target_properties(pugixml_mt PROPERTIES FOLDER thirdparty)
     endif()
+
+    if(ENABLE_MKL_DNN)
+        set(SDL_cmake_included ON)
+        include(mkldnn.cmake)
+    endif()
 endfunction()
 
 build_with_lto()
-
-if(ENABLE_MKL_DNN)
-    set(SDL_cmake_included ON)
-    include(mkldnn.cmake)
-endif()
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.cpp
new file mode 100644
index 00000000000..13647c7cdf0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.cpp
@@ -0,0 +1,392 @@
+﻿// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "convolution_kernel_b_fs_yx_fsv16_imad.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+#include <vector>
+#include <string>
+#include <iostream>
+#include <algorithm>
+
+//
+// Kernel specific constants
+//
+static constexpr size_t fsv = 16;
+static constexpr size_t simd = 16;
+
+static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x, const size_t dilation_x) {
+    // Calculate number of variables needed to hold minimum input width.
+    // Equation for input block width: (output_block - 1) * stride + (filter_size - 1) * dilation + 1
+    // Result for one output_block gives minimum size of input width.
+    size_t min_in_block_size = (filter_size_x - 1) * dilation_x + 1;
+    // Input block is spread across sub-group, so ceil-divide by simd size.
+    size_t min_in_block_simds = kernel_selector::CeilDiv(min_in_block_size, simd);
+
+    size_t output_block_width = 0;
+    size_t max_block_size = std::min((min_in_block_simds * simd - 1 - (filter_size_x - 1) * dilation_x) / stride_x + 1, output_size_x);
+
+    if (output_size_x <= max_block_size)
+        return output_size_x;
+
+    for (size_t block = 4; block <= max_block_size; ++block) {
+        if (output_size_x % block == 0)
+            output_block_width = block;
+    }
+    if (output_block_width == 0 && output_size_x < max_block_size * 3) {
+        size_t min_overhang = max_block_size;
+        for (size_t block = 4; block <= max_block_size; ++block) {
+            size_t overhang = block - output_size_x % block;
+            if (overhang <= min_overhang) {
+                min_overhang = overhang;
+                output_block_width = block;
+            }
+        }
+    }
+
+    if (output_block_width == 0) {
+        output_block_width = max_block_size;
+    }
+    return output_block_width;
+}
+
+namespace kernel_selector {
+
+Convolution_kernel_b_fs_yx_fsv16_imad::BlockParams
+Convolution_kernel_b_fs_yx_fsv16_imad::GetBlockParams(const convolution_params& params) const {
+    constexpr float max_reg_pressure = 0.75f;
+
+    // TODO Investigate whether below algorithm for selecting optimal block params could be reduced to:
+    //      1. Enumerate possible block params as optimization space
+    //      2. Prune invalid params (too high register pressure, too big local memory usage)
+    //      3. Rank params according to some combination of:
+    //         - compute/memory ratio
+    //         - occupancy
+    //         - register pressure
+    //         - local memory usage
+    //      4. Select params with highest rank
+
+    // Select optimal block width
+    size_t block_width = getOutBlock_X(params.output.X().v, params.stride.x, params.filterSize.x, params.dilation.x);
+    size_t in_block_width = (block_width - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1;
+
+    // If possible increase features block size
+    size_t block_features = simd;
+    {
+        size_t tmp_block_features = simd * 2;
+        auto block2_params = BlockParams{ block_width, 1, tmp_block_features, in_block_width, 1, 1 };
+
+        bool c_mul_f = params.output.Feature().v % tmp_block_features == 0;
+        bool c_reg_pressure = EstimateRegPressure(params, block2_params) <= max_reg_pressure;
+
+        if (c_mul_f && c_reg_pressure) {
+            block_features = tmp_block_features;
+        }
+    }
+
+    // If not enough occupancy try to perform feature split or/and block reduction
+    size_t feature_slm_split = 1;
+    auto no_split_params = BlockParams{ block_width, 1, block_features, in_block_width, 1, 1 };
+    if (EstimateOccupancy(params, no_split_params) < 1.f) {
+        // Temporary variables for possible reductions in block sizes
+        bool update_block_params = false;
+        size_t split_block_width = block_width;
+        size_t split_in_block_width = in_block_width;
+        size_t split_block_features = block_features;
+
+        // Feature split requires extra registers, so check if it can be done with current block sizes
+        bool can_split =
+            EstimateRegPressure(params, BlockParams{ block_width, 1, block_features, in_block_width, 1, 2 }) <= max_reg_pressure;
+        // Has the occupancy reached sufficient level
+        bool enough_occupancy = false;
+        // Reductions to reduce register pressure
+        // Try to reduce block width to free some registers. Good compute/memory ratio will be pointless if barely any threads will run.
+        if (!can_split && block_width != 1) {
+            // At most twice reduction in output block width is acceptable
+            for (size_t w = block_width; w >= CeilDiv(block_width, 2); w -= 1) {
+                size_t tmp_in_width = (w - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1;
+                auto dummy_split_params = BlockParams{ w, 1, block_features, tmp_in_width, 1, 2 };
+
+                bool c_reg_pressure = EstimateRegPressure(params, dummy_split_params) <= max_reg_pressure;
+                bool c_mul_x = params.output.X().v % w == 0;
+
+                if (c_reg_pressure && c_mul_x) {
+                    split_block_width = w;
+                    split_in_block_width = tmp_in_width;
+                    can_split = true;
+                    break;
+                }
+            }
+        }
+        // Try to reduce block features.
+        // Done after attempting block width reduction, because bigger feature block allows more threads to write results in parallel.
+        if (!can_split) {
+            if (block_features / simd % 2 == 0) {
+                split_block_features = block_features / 2;
+                can_split = true;
+            }
+        }
+        // Check if previous reductions haven't improved occupancy enough
+        {
+            auto reduced_params = BlockParams{ split_block_width, 1, split_block_features, split_in_block_width, 1, 1 };
+            enough_occupancy = EstimateOccupancy(params, reduced_params) >= 1.f;
+            update_block_params = enough_occupancy;
+        }
+
+        if (can_split && !enough_occupancy) {
+            // TODO Try other split sizes
+            for (size_t split = 4; split < 5; ++split) {
+                auto tmp_params = BlockParams{ block_width, 1, block_features, in_block_width, 1, split };
+
+                bool c_ifm_mul = CeilDiv(params.weights.IFM().v, fsv) % split == 0;
+                bool c_slm = EstimateSLMUsage(params, tmp_params) <= 1.f;
+                bool c_lws = split * simd <= params.engineInfo.maxWorkGroupSize;
+                bool c_reg_pressure = EstimateRegPressure(params, tmp_params) <= max_reg_pressure;
+                bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f;
+
+                if (c_ifm_mul && c_slm && c_lws && c_reg_pressure) {
+                    feature_slm_split = split;
+                    update_block_params = true;
+                    enough_occupancy = c_occupancy;
+                }
+
+                // slm usage and work group sizes will only grow with split, so no point in checking
+                if (!c_slm || !c_lws || split * fsv >= params.weights.IFM().v)
+                    break;
+            }
+        }
+        // Splitting was not sufficient or couldn't be done
+        // Try to reduce block width if hasn't been done before
+        if (!enough_occupancy && split_block_width == block_width && block_width != 1) {
+            // At most twice reduction in output block width is acceptable
+            for (size_t w = block_width; w >= CeilDiv(block_width, 2); w -= 1) {
+                size_t tmp_in_width = (w - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1;
+                auto tmp_params = BlockParams{ w, 1, split_block_features, tmp_in_width, 1, feature_slm_split };
+
+                bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f;
+                bool c_mul_x = params.output.X().v % w == 0;
+
+                if (c_mul_x) {
+                    split_block_width = w;
+                    split_in_block_width = tmp_in_width;
+                    update_block_params = true;
+                }
+                // Reached enough occupancy, don't reduce futher to not hurt compute/mem ratio
+                if (c_mul_x && c_occupancy)
+                    break;
+            }
+        }
+        if (update_block_params) {
+            block_width = split_block_width;
+            in_block_width = split_in_block_width;
+            block_features = split_block_features;
+        }
+    }
+
+    // Select biggest block height that fits into registers
+    size_t block_height = 1;
+    size_t in_block_height = 1;
+    for (size_t h = 2; h < 16; ++h) {
+        if (params.output.Y().v % h != 0)
+            continue;
+
+        size_t tmp_in_block_height = (h - 1) * params.stride.y + (params.filterSize.y - 1) * params.dilation.y + 1;
+        auto tmp_params = BlockParams{ block_width, h, block_features, in_block_width, tmp_in_block_height, feature_slm_split };
+
+        bool c_reg_pressure = EstimateRegPressure(params, tmp_params) <= max_reg_pressure;
+        bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f;
+        bool c_slm = EstimateSLMUsage(params, tmp_params) <= 1.f;
+
+        if (c_reg_pressure && c_occupancy && c_slm) {
+            block_height = h;
+            in_block_height = tmp_in_block_height;
+        } else {
+            break;
+        }
+    }
+
+    return BlockParams{ block_width, block_height, block_features, in_block_width, in_block_height, feature_slm_split };
+}
+
+float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateRegPressure(const convolution_params& params, const BlockParams& block) const {
+    size_t bytes_used = 0;
+    // accumulator
+    size_t accumulator_elements = block.output_block_width * block.output_block_height * block.output_block_features;
+    bytes_used += accumulator_elements * BytesPerElement(GetAccumulatorType(params));
+    // input block
+    size_t input_block_elements = block.input_block_height * Align(block.input_block_width, simd) * fsv;
+    bytes_used += input_block_elements * BytesPerElement(params.inputs[0].GetDType());
+    // weights block
+    size_t weights_block_elements = block.output_block_features * fsv;
+    bytes_used += weights_block_elements * BytesPerElement(params.weights.GetDType());
+
+    // Experimentally selected number of registers needed for extra variables (eg. out_x, out_y, filter_idx, etc.)
+    constexpr size_t experimental_extra_regs = 8 * 32;
+    bytes_used += experimental_extra_regs;
+
+    // Experimentally selected number of registers needed for slm handling
+    constexpr size_t experimental_slm_regs = 4 * 32;
+    if (block.feature_slm_split != 1) {
+        bytes_used += experimental_slm_regs;
+    }
+
+    constexpr size_t reg_num = 128;
+    constexpr size_t bytes_per_reg = 32;
+    constexpr size_t max_reg_bytes = reg_num * bytes_per_reg;
+
+    return static_cast<float>(bytes_used) / static_cast<float>(max_reg_bytes);
+}
+
+float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateOccupancy(const convolution_params& params, const BlockParams& block) const {
+    size_t blocks_w = CeilDiv(params.output.X().v, block.output_block_width);
+    size_t blocks_h = CeilDiv(params.output.Y().v, block.output_block_height);
+    size_t blocks_f = CeilDiv(params.output.Feature().v, block.output_block_features) * block.feature_slm_split;
+    size_t block_b = params.output.Batch().v;
+
+    auto threads = blocks_w * blocks_h * blocks_f * block_b;
+    constexpr size_t max_threads_per_cu = 7;
+    size_t compute_units = params.engineInfo.computeUnitsCount;
+    size_t max_threads = compute_units * max_threads_per_cu;
+
+    return static_cast<float>(threads) / static_cast<float>(max_threads);
+}
+
+float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const {
+    size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_features * (block.feature_slm_split - 1);
+    size_t slm_bytes = slm_elements * BytesPerElement(GetAccumulatorType(params));
+
+    // TODO Actual maximum slm should also depend on number of work-groups, but this is device specific
+    size_t max_slm_bytes = params.engineInfo.maxLocalMemSize;
+
+    return static_cast<float>(slm_bytes) / static_cast<float>(max_slm_bytes);
+}
+
+ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+
+    k.EnableInputWeightsType(WeightsType::INT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableQuantization(QuantizationType::SYMMETRIC);
+    k.EnableDilation();
+    k.DisableTuning();
+    return k;
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params,
+                                                                   const optional_params& options) const {
+    return GetCommonKernelsData(params, options);
+}
+
+JitConstants Convolution_kernel_b_fs_yx_fsv16_imad::GetJitConstants(const convolution_params& params,
+                                                                     const DispatchData& kd) const {
+    auto mem_consts = Parent::GetJitConstants(params, kd);
+
+    auto block_params = GetBlockParams(params);
+
+    bool unroll_filter_y = block_params.output_block_height != 1;
+
+    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block_params.output_block_width));
+    mem_consts.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", block_params.input_block_width));
+    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block_params.output_block_height));
+    mem_consts.AddConstant(MakeJitConstant("IN_BLOCK_HEIGHT", block_params.input_block_height));
+    mem_consts.AddConstant(MakeJitConstant("FILTER_SIZE_Y_UNROLL", unroll_filter_y ? params.filterSize.y : 1));
+    mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", block_params.output_block_features / simd));
+    mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", block_params.output_block_features));
+    mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", block_params.feature_slm_split));
+    mem_consts.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
+    mem_consts.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
+
+    if (!params.fused_ops.empty()) {
+        auto input_dt = GetActivationType(params);
+        std::vector<std::string> idx_order = { "out_b", "(out_f + ofb * 16)", "(out_y + oh)", "(out_x + ow)" };
+        std::vector<Tensor::DataChannelName> loop_axes = { Tensor::DataChannelName::X };
+        if (block_params.output_block_height != 1) {
+            loop_axes.push_back(Tensor::DataChannelName::Y);
+        } else {
+            idx_order[idx_order.size() - 2] = "out_y";
+        }
+
+        FusedOpsConfiguration conf_scalar = { "_SCALAR",
+                                              idx_order,
+                                              "dequantized_val",
+                                              input_dt,
+                                              1,
+                                              LoadType::LT_UNALIGNED,
+                                              BoundaryCheck::DISABLED };
+        conf_scalar.SetLoopAxes(loop_axes, true);
+
+        mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
+    }
+
+    return mem_consts;
+}  // GetJitConstants
+
+ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad::SetDefault(const convolution_params& params,
+                                                                           int) const {
+    DispatchData kd;
+    const auto& output = params.output;
+    auto block_params = GetBlockParams(params);
+
+    kd.gws0 = CeilDiv(output.X().v, block_params.output_block_width);
+    kd.gws1 = CeilDiv(output.Y().v, block_params.output_block_height);
+    kd.gws2 = output.Batch().v * CeilDiv(output.Feature().v, block_params.output_block_features) * simd * block_params.feature_slm_split;
+
+    kd.lws0 = 1;
+    kd.lws1 = 1;
+    kd.lws2 = simd * block_params.feature_slm_split;
+
+    kd.cldnnStyle = {0, 0, 0, 0, 0};
+    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+
+    kd.efficiency = FORCE_PRIORITY_2;
+    // TODO Optimize 1x1, because this kernel is better in most cases
+    //if (params.filterSize.x == 1 && params.filterSize.y == 1)
+    //    kd.efficiency = FORCE_PRIORITY_1;
+    if (static_cast<float>(params.weights.IFM().v) / static_cast<float>(Align(params.weights.IFM().v, fsv)) < 0.5f)
+        kd.efficiency = FORCE_PRIORITY_4;
+
+    return kd;
+}  // SetDefault
+
+bool Convolution_kernel_b_fs_yx_fsv16_imad::Validate(const Params& params, const optional_params& options) const {
+    if (!Parent::Validate(params, options)) {
+        return false;
+    }
+
+    KernelData kd = KernelData::Default<convolution_params>(params);
+    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+
+    if (newParams.groups != 1 || newParams.split != 1)
+        return false;
+
+    return true;
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.h
similarity index 66%
rename from inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h
rename to inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.h
index d3dfeaf147b..dc3950ca128 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.h
@@ -21,11 +21,11 @@
 
 namespace kernel_selector {
 
-class Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks : public ConvolutionKernelBase {
+class Convolution_kernel_b_fs_yx_fsv16_imad : public ConvolutionKernelBase {
 public:
     using Parent = ConvolutionKernelBase;
-    Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks") {}
-    virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() {}
+    Convolution_kernel_b_fs_yx_fsv16_imad() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad") {}
+    virtual ~Convolution_kernel_b_fs_yx_fsv16_imad() {}
 
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
     ParamsKey GetSupportedKey() const override;
@@ -45,5 +45,21 @@ protected:
                  FusedOpType::SCALE,
                  FusedOpType::ACTIVATION };
     }
+
+    struct BlockParams {
+        size_t output_block_width;
+        size_t output_block_height;
+        size_t output_block_features;
+
+        size_t input_block_width;
+        size_t input_block_height;
+
+        size_t feature_slm_split;
+    };
+
+    BlockParams GetBlockParams(const convolution_params& params) const;
+    float EstimateRegPressure(const convolution_params& params, const BlockParams& block) const;
+    float EstimateOccupancy(const convolution_params& params, const BlockParams& block) const;
+    float EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const;
 };
 }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp
deleted file mode 100644
index a1aef4f61d7..00000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-﻿// Copyright (c) 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
-#include <vector>
-#include <iostream>
-
-//
-// Kernel specific constants
-//
-#define SIMD_SIZE 16
-
-static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x) {
-    size_t output_block_width = 0;
-    size_t max_block_size = std::min((SIMD_SIZE - filter_size_x) / stride_x + 1, output_size_x);
-
-    if (output_size_x <= max_block_size)
-        return output_size_x;
-
-    for (size_t block = 4; block <= max_block_size; ++block) {
-        if (output_size_x % block == 0)
-            output_block_width = block;
-    }
-    if (output_block_width == 0 && output_size_x < max_block_size * 3) {
-        size_t min_overhang = max_block_size;
-        for (size_t block = 4; block <= max_block_size; ++block) {
-            size_t overhang = block - output_size_x % block;
-            if (overhang <= min_overhang) {
-                min_overhang = overhang;
-                output_block_width = block;
-            }
-        }
-    }
-
-    if (output_block_width == 0) {
-        output_block_width = max_block_size;
-    }
-    return output_block_width;
-}
-
-static size_t get_ofm_per_wi(const size_t output_size_f) {
-    if (output_size_f % 32 == 0)
-        return 2;
-    return 1;
-}
-
-namespace kernel_selector {
-
-ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetSupportedKey() const {
-    ParamsKey k;
-    k.EnableInputDataType(Datatype::INT8);
-    k.EnableInputDataType(Datatype::UINT8);
-
-    k.EnableOutputDataType(Datatype::INT8);
-    k.EnableOutputDataType(Datatype::UINT8);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::F16);
-
-    k.EnableInputWeightsType(WeightsType::INT8);
-
-    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
-    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
-
-    k.EnableDifferentTypes();
-    k.EnableDifferentInputWeightsTypes();
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableBiasPerFeature();
-    k.EnableNonBiasTerm();
-    k.EnableBatching();
-    k.EnableQuantization(QuantizationType::SYMMETRIC);
-    k.DisableTuning();
-    return k;
-}
-
-KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetKernelsData(const Params& params,
-                                                                   const optional_params& options) const {
-    return GetCommonKernelsData(params, options);
-}
-
-JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetJitConstants(const convolution_params& params,
-                                                                     const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
-    const auto& output = params.output;
-
-    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x)));
-    mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", get_ofm_per_wi(output.Feature().v)));
-    mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", SIMD_SIZE * get_ofm_per_wi(output.Feature().v)));
-
-    if (!params.fused_ops.empty()) {
-        auto input_dt = GetActivationType(params);
-        FusedOpsConfiguration conf_scalar = {"", {"out_b", "out_f + j * 16", "out_y", "out_x + i"}, "dequantized", input_dt, 1};
-        conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
-        mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
-    }
-
-    return mem_consts;
-}  // GetJitConstants
-
-ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::SetDefault(const convolution_params& params,
-                                                                           int) const {
-    DispatchData kd;
-    const auto& output = params.output;
-    auto output_block_width = getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x);
-    auto ofm_blocks_per_simd = get_ofm_per_wi(output.Feature().v);
-
-    kd.gws0 = CeilDiv(output.X().v, output_block_width);
-    kd.gws1 = output.Y().v;
-    kd.gws2 = output.Batch().v * Align(output.Feature().v / ofm_blocks_per_simd, SIMD_SIZE);
-
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = SIMD_SIZE;
-
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
-
-    if (params.filterSize.x == 3)
-        kd.efficiency = FORCE_PRIORITY_2;
-    else
-        kd.efficiency = FORCE_PRIORITY_5;
-
-    return kd;
-}  // SetDefault
-
-bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3::Validate(const Params& params, const optional_params& options) const {
-    if (!Parent::Validate(params, options)) {
-        return false;
-    }
-
-    KernelData kd = KernelData::Default<convolution_params>(params);
-    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
-
-    if ((newParams.filterSize.x != newParams.filterSize.y) ||
-        (newParams.filterSize.x != 3 && newParams.filterSize.x != 5)) {
-        // Fitler size needs to be 3x3 or 5x5
-        return false;
-    }
-
-    if ((newParams.stride.x != newParams.stride.y) ||
-        (newParams.stride.x != 1 && newParams.stride.x != 2)) {
-        // Strides must be 1x1 or 2x2
-        return false;
-    }
-
-    if (newParams.groups != 1 || newParams.split != 1)
-        return false;
-
-    return true;
-}
-}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h
deleted file mode 100644
index e69a798e4e6..00000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
-// Copyright (c) 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#pragma once
-
-#include "convolution_kernel_base.h"
-#include <vector>
-
-namespace kernel_selector {
-
-class Convolution_kernel_b_fs_yx_fsv16_imad_3x3 : public ConvolutionKernelBase {
-public:
-    using Parent = ConvolutionKernelBase;
-    Convolution_kernel_b_fs_yx_fsv16_imad_3x3() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3") {}
-    virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3() {}
-
-    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-    ParamsKey GetSupportedKey() const override;
-
-protected:
-    bool Validate(const Params& params, const optional_params& options) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
-    DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
-    bool NeedPaddedInput() const override { return true; }
-    WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
-        return WeightsLayout::os_is_yx_osv16_isv16;
-    }
-
-    std::vector<FusedOpType> GetSupportedFusedOps() const override {
-        return { FusedOpType::ELTWISE,
-                 FusedOpType::QUANTIZE,
-                 FusedOpType::SCALE,
-                 FusedOpType::ACTIVATION };
-    }
-};
-}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp
deleted file mode 100644
index e62348c3d29..00000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-﻿// Copyright (c) 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
-#include <vector>
-#include <iostream>
-
-//
-// Kernel specific constants
-//
-#define SIMD_SIZE 16
-
-static size_t getOutBlock_X(size_t output_size_x) {
-    auto output_block_width = 7;
-    if (output_size_x % 8 == 0)
-        output_block_width = 8;
-    return output_block_width;
-}
-
-
-namespace kernel_selector {
-
-ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetSupportedKey() const {
-    ParamsKey k;
-    k.EnableInputDataType(Datatype::INT8);
-    k.EnableInputDataType(Datatype::UINT8);
-
-    k.EnableOutputDataType(Datatype::INT8);
-    k.EnableOutputDataType(Datatype::UINT8);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::F16);
-
-    k.EnableInputWeightsType(WeightsType::INT8);
-
-    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
-    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
-
-    k.EnableDifferentTypes();
-    k.EnableDifferentInputWeightsTypes();
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableBiasPerFeature();
-    k.EnableNonBiasTerm();
-    k.EnableBatching();
-    k.EnableQuantization(QuantizationType::SYMMETRIC);
-    k.DisableTuning();
-    return k;
-}
-
-KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetKernelsData(const Params& params,
-                                                                      const optional_params& options) const {
-    return GetCommonKernelsData(params, options);
-}
-
-JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetJitConstants(const convolution_params& params,
-                                                                        const DispatchData& kd) const {
-    auto mem_consts = Parent::GetJitConstants(params, kd);
-    const auto& output = params.output;
-
-    mem_consts.AddConstants({MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v))});
-
-    if (!params.fused_ops.empty()) {
-        auto input_dt = GetActivationType(params);
-        FusedOpsConfiguration conf_scalar = {"",
-                                             {"out_b", "(out_f + get_sub_group_id() * 16)", "out_y", "out_x + i"},
-                                             "dequantized",
-                                             input_dt,
-                                             1};
-        conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
-        mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
-    }
-
-    return mem_consts;
-}  // GetJitConstants
-
-ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::SetDefault(
-    const convolution_params& params,
-    int) const {
-    DispatchData kd;
-    const auto& output = params.output;
-
-    auto output_block_width = getOutBlock_X(output.X().v);
-    kd.gws0 = output.X().v / output_block_width;
-    kd.gws1 = output.Y().v;
-    kd.gws2 = output.Batch().v * output.Feature().v * 2;
-
-    kd.lws0 = 1;
-    kd.lws1 = 1;
-    kd.lws2 = SIMD_SIZE * 4;
-
-    kd.cldnnStyle = {0, 0, 0, 0, 0};
-    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
-
-    kd.efficiency = FORCE_PRIORITY_1;
-
-    return kd;
-}  // SetDefault
-
-bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::Validate(const Params& params, const optional_params& options) const {
-    if (!Parent::Validate(params, options)) {
-        return false;
-    }
-
-    KernelData kd = KernelData::Default<convolution_params>(params);
-    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
-
-    if (newParams.output.Feature().v % (2 * SIMD_SIZE) != 0) {
-        return false;
-    }
-
-    if ((newParams.filterSize.x != newParams.filterSize.y) ||
-        newParams.filterSize.x != 3) {
-        // Fitler size needs to be 3x3
-        return false;
-    }
-
-    if ((newParams.stride.x != newParams.stride.y) ||
-        (newParams.stride.x != 1 && newParams.stride.x != 2)) {
-        // Strides must be 1x1 or 2x2
-        return false;
-    }
-
-    if (newParams.output.X().v % 8 != 0 && newParams.output.X().v % 7 != 0) {
-        return false;
-    }
-
-    if (CeilDiv(newParams.inputs[0].Feature().v, 16) % 4 != 0) {
-        return false;
-    }
-
-    const auto& output = newParams.output;
-    auto output_block_width = getOutBlock_X(output.X().v);
-    size_t eu_count = params.engineInfo.computeUnitsCount;
-    auto global_size =
-        (output.X().v / output_block_width) * output.Y().v * ((output.Batch().v * output.Feature().v));
-    if ((global_size / 16) > (eu_count * 7)) {
-        return false;
-    }
-
-    if (newParams.groups != 1 || newParams.split != 1)
-        return false;
-
-    return true;
-}
-}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
index 2e0b86428e5..ec28317a4ba 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
@@ -71,8 +71,7 @@
 #include "convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h"
 #include "convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.h"
 #include "convolution_kernel_b_fs_yx_fsv16_imad_1x1.h"
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h"
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h"
+#include "convolution_kernel_b_fs_yx_fsv16_imad.h"
 #include "convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp"
 
 namespace kernel_selector {
@@ -82,8 +81,7 @@ convolution_kernel_selector::convolution_kernel_selector() {
 
     // b_fs_yx_fsv16 int8
     Attach<Convolution_kernel_b_fs_yx_fsv16_imad_1x1>();
-    Attach<Convolution_kernel_b_fs_yx_fsv16_imad_3x3>();
-    Attach<Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks>();
+    Attach<Convolution_kernel_b_fs_yx_fsv16_imad>();
 
     // b_fs_yx_fsv16 and b_fs_zyx_fsv16
     Attach<ConvolutionKernel_b_fs_yx_fsv16_depthwise>();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp
index c7d1c6abf21..55325dbeff4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #include "mvn_kernel_b_fs_yx_fsv16_imad.hpp"
 #include "common/common_tools.h"
 
@@ -28,6 +27,7 @@ static constexpr size_t pref_work_groups = 16;
 
 ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
     ParamsKey k;
+
     k.EnableInputDataType(Datatype::INT8);
     k.EnableInputDataType(Datatype::UINT8);
     k.EnableOutputDataType(Datatype::F16);
@@ -36,6 +36,8 @@ ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
     k.EnableOutputDataType(Datatype::UINT8);
     k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
     k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableDifferentTypes();
@@ -44,6 +46,7 @@ ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
     // k.EnableMVNMode(MVNMode::ACROSS_CHANNELS);
     k.EnableMVNMode(MVNMode::WITHIN_CHANNELS);
     k.EnableMVNNormalizeVariance();
+
     return k;
 }
 
@@ -54,7 +57,8 @@ bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_para
     auto params = static_cast<const mvn_params&>(p);
 
     // TODO Add support for input padding via iterating over y (parallel or in kernel).
-    if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
+    if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0 ||
+        params.inputs[0].Z().pad.Total() != 0)
         return false;
 
     return true;
@@ -63,7 +67,7 @@ bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_para
 MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_params& params) const {
     auto kd = Parent::SetDefault(params);
 
-    auto items_num = params.output.X().v * params.output.Y().v;
+    auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v;
     auto max_wg = params.engineInfo.maxWorkGroupSize;
     auto slm_per_sg = fsv * 4;
     auto max_slm = params.engineInfo.maxLocalMemSize;
@@ -98,17 +102,31 @@ JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& par
 
     if (!params.fused_ops.empty()) {
         std::vector<std::string> idx_order;
-        idx_order = { "b", "(f + set_idx)", "(output_spatial / OUTPUT_SIZE_X)", "(output_spatial % OUTPUT_SIZE_X)" };
+
+        if (params.inputs[0].GetDims().size() <= 4) {
+            idx_order = {"b",
+                         "(f + set_idx)",
+                         "(output_spatial / OUTPUT_SIZE_X)",
+                         "(output_spatial % OUTPUT_SIZE_X)"};
+        } else if (params.inputs[0].GetDims().size() == 5) {
+            idx_order = {"b",
+                         "(f + set_idx)",
+                         "(output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y))",
+                         "((output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y)",
+                         "(output_spatial % OUTPUT_SIZE_X)"};
+        }
+
         auto conf = FusedOpsConfiguration("", idx_order, "normalized", activation_dt);
-        jits.Merge(MakeFusedOpsJitConstants(params, { conf }));
+        jits.Merge(MakeFusedOpsJitConstants(params, {conf}));
     }
     return jits;
 }
 
-MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(const mvn_params& params) const {
+MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(
+    const mvn_params& params) const {
     MultiDispatchData md;
 
-    auto items_num = params.output.X().v * params.output.Y().v;
+    auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v;
     auto max_wg = params.engineInfo.maxWorkGroupSize;
     auto slm_per_sg = fsv * 4;
     auto max_slm = params.engineInfo.maxLocalMemSize;
@@ -158,7 +176,9 @@ MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::Se
     return md;
 }
 
-KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params, const optional_params& options, float estimated_time) const {
+KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params,
+                                                                   const optional_params& options,
+                                                                   float estimated_time) const {
     if (!Validate(params, options))
         return {};
 
@@ -190,10 +210,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
                          0,
                          0);
         kernel.arguments.clear();  // Clear original output argument
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
-        kd.internalBufferSizes.push_back(
-            params.output.Batch().v * Align(params.output.Feature().v, fsv) * runInfo.item_groups * intermidiate_bytes);
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
+                                         runInfo.item_groups * intermidiate_bytes);
     }
     {
         // Mean second stage
@@ -214,9 +234,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
                          0,
                          0);
         kernel.arguments.clear();  // Clear original output argument
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
-        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes);
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
+        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
+                                         intermidiate_bytes);
     }
     if (params.mvnNormalizeVariance) {
         // Variance first stage
@@ -237,9 +258,9 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
                          0,
                          0);
         kernel.arguments.clear();  // Clear original output argument
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
     }
     if (params.mvnNormalizeVariance) {
         // Variance second stage
@@ -260,9 +281,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
                          0,
                          0);
         kernel.arguments.clear();  // Clear original output argument
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 });
-        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes);
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2});
+        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
+                                         intermidiate_bytes);
     }
     {  // Final
         auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_final);
@@ -283,25 +305,24 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
                          false,
                          1,
                          GetFusedPrimitiveInputsCount(params));
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
+        kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
         if (params.mvnNormalizeVariance) {
-            kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 });
+            kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2});
         }
     }
     kd.intenralBufferDataType = Datatype::F32;
     kd.estimatedTime = estimated_time;
 
-    return { kd };
+    return {kd};
 }
 
-
 KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params, const optional_params& optParams) const {
     const mvn_params& orgParams = static_cast<const mvn_params&>(params);
 
     auto max_slm = params.engineInfo.maxLocalMemSize;
     auto slm_per_sg = fsv * 4;
     auto max_lws = params.engineInfo.maxWorkGroupSize;
-    auto items_num = orgParams.output.X().v * orgParams.output.Y().v;
+    auto items_num = orgParams.output.X().v * orgParams.output.Y().v * orgParams.output.Z().v;
 
     auto enough_slm = max_lws / simd * simd * slm_per_sg <= max_slm;
     auto enough_lws = max_lws / simd >= 1;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h
index 5fc42939649..52dbcd0f61c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h
@@ -31,4 +31,4 @@ public:
 
     KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
 };
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad.cl
new file mode 100644
index 00000000000..25961dc8794
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad.cl
@@ -0,0 +1,390 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/mmad.cl"
+#include "include/data_types.cl"
+
+#define AS_TYPE_N_(type, n, x) as_##type##n(x)
+#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
+#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
+
+#define AS_FILTER_TYPE_4(x) AS_TYPE_N(FILTER_TYPE, 4, x)
+
+#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
+#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
+
+#define SIMD 16
+#define FSV 16
+
+// int8 conv_input and weights data is packed to int32 "batches",
+// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(1, 1, FEATURE_SLM_SPLIT * SIMD)))
+KERNEL(convolution_gpu_b_fs_yx_fsv16_imad)(
+    const __global INPUT0_TYPE *conv_input,
+    __global OUTPUT_TYPE *output,
+    const __global FILTER_TYPE *weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE *biases,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx) {
+
+    #define LUT_VALUE_CLAMP(x) (( (IN_BLOCK_WIDTH % SIMD == 0) || ((x) < IN_BLOCK_WIDTH % SIMD) ) ? (x) : 0)
+    const int tmp = LUT_VALUE_CLAMP(get_sub_group_local_id());
+    #undef LUT_VALUE_CLAMP
+
+    const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
+    const uint out_y = (uint)get_global_id(1) * OUT_BLOCK_HEIGHT;
+    const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
+    uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
+    uint out_f = out_fg + get_sub_group_local_id();
+
+    const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+#if FEATURE_SLM_SPLIT == 1
+    const uint k_start = 0;
+#else
+    const uint k_start = get_sub_group_id() * FSV;
+#endif
+
+    uint filter_idx  = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, k_start, 0, 0);
+    const uint filter_idx_diff = (ALIGN(FILTER_IFM_NUM, 16) * FILTER_SIZE_X * FILTER_SIZE_Y * 16);
+
+    uint input_start_idx = INPUT0_GET_INDEX(out_b, k_start, input_y, input_x);
+
+    ACCUMULATOR_TYPE dotProd[OFM_BLOCKS_PER_SIMD][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH] = { };
+    uint4 input_val[IN_BLOCK_HEIGHT][CEIL_DIV(IN_BLOCK_WIDTH, SIMD)];
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16) / FEATURE_SLM_SPLIT; k++) {
+        __attribute__((opencl_unroll_hint(1)))
+        for (uint fyn = 0; fyn < FILTER_SIZE_Y / FILTER_SIZE_Y_UNROLL; fyn++) {
+            // Load input block IN_BLOCK_HEIGHT x IN_BLOCK_WIDTH, scattering width along sub-group
+            __attribute__((opencl_unroll_hint))
+            for (uint iyb = 0; iyb < IN_BLOCK_HEIGHT; ++iyb) {
+                __attribute__((opencl_unroll_hint))
+                for (uint ixb = 0; ixb < CEIL_DIV(IN_BLOCK_WIDTH, SIMD); ++ixb) {
+                    uint input_idx = input_start_idx + iyb * INPUT0_Y_PITCH * FSV + ixb * SIMD * FSV;
+                    if (ixb != CEIL_DIV(IN_BLOCK_WIDTH, SIMD) - 1) {
+                        input_val[iyb][ixb] = vload4(0, (__global uint *)(conv_input + input_idx + get_sub_group_local_id() * 16));
+                    } else {
+                        input_val[iyb][ixb] = vload4(0, (__global uint*)(conv_input + input_idx + tmp * 16));
+                    }
+                }
+            }
+
+            __attribute__((opencl_unroll_hint))
+            for (uint fyu = 0; fyu < FILTER_SIZE_Y_UNROLL; ++fyu) {
+                __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+                for (uint fx = 0; fx < FILTER_SIZE_X; fx++) {
+
+                    uint4 weights_val[OFM_BLOCKS_PER_SIMD];
+                    __attribute__((opencl_unroll_hint))
+                    for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+                        weights_val[ofb] = vload4(0, (__global uint *)(weights + filter_idx + ofb * filter_idx_diff));
+                    }
+
+                    __attribute__((opencl_unroll_hint))
+                    for (uint ive = 0; ive < 4; ive++) {
+                        __attribute__((opencl_unroll_hint))
+                        for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+                            __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+                            for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                                __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+                                for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+                                    const uint ow_offset = ow + OUT_BLOCK_WIDTH;
+                                    const uint y_block_idx = oh * STRIDE_SIZE_Y + fyu * DILATION_SIZE_Y;
+                                    const uint x_block_idx = ow * STRIDE_SIZE_X + fx * DILATION_SIZE_X;
+                                    const uint shuffle_wi = x_block_idx % SIMD;
+                                    const uint shuffle_idx = x_block_idx / SIMD;
+
+                                    dotProd[ofb][oh][ow] = TO_ACCUMULATOR_TYPE(
+                                        IMAD(dotProd[ofb][oh][ow],
+                                        AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val[y_block_idx][shuffle_idx][ive], shuffle_wi)),
+                                        AS_FILTER_TYPE_4(weights_val[ofb][ive])));
+                                }
+                            }
+                        }
+                    }
+
+                    filter_idx += FSV * FSV;
+                }
+            }
+            input_start_idx += DILATION_SIZE_Y * INPUT0_Y_PITCH * FSV;
+        }
+        input_start_idx += INPUT0_FEATURE_PITCH * FSV * FEATURE_SLM_SPLIT - (FILTER_SIZE_Y / FILTER_SIZE_Y_UNROLL) * DILATION_SIZE_Y * INPUT0_Y_PITCH * FSV;
+
+        filter_idx += FSV * FSV * FILTER_SIZE_X * FILTER_SIZE_Y * (FEATURE_SLM_SPLIT - 1);
+    }
+
+#if FEATURE_SLM_SPLIT != 1
+    // Additional local memory reduction for feature split mode
+#   if FEATURE_SLM_SPLIT < OFM_BLOCKS_PER_SIMD
+#   error convolution_gpu_b_fs_yx_fsv16_imad.cl - OFM_BLOCKS_PER_SIMD must be less or equal to FEATURE_SLM_SPLIT
+#   endif
+
+    const uint partial_acc_size = (FEATURE_SLM_SPLIT - 1) * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH;
+    __local ACCUMULATOR_TYPE partial_acc[partial_acc_size];
+
+    uint sgid_start_idx = get_sub_group_id();
+    sgid_start_idx = sgid_start_idx == 0 ? 0 : sgid_start_idx - 1;
+    __local ACCUMULATOR_TYPE* partial_acc_ptr = partial_acc + sgid_start_idx * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH
+                                                              + get_sub_group_local_id();
+
+    if (get_sub_group_id() < OFM_BLOCKS_PER_SIMD) {
+        __attribute__((opencl_unroll_hint))
+        for (uint wg = 0; wg < OFM_BLOCKS_PER_SIMD; ++wg) {
+            if (get_sub_group_id() == wg) {
+                __attribute__((opencl_unroll_hint))
+                for (uint ofb = 0; ofb < wg; ++ofb) {
+                    __attribute__((opencl_unroll_hint))
+                    for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                        __attribute__((opencl_unroll_hint))
+                        for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                            const uint partial_acc_ptr_idx =
+                                ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD +
+                                oh * OUT_BLOCK_WIDTH * SIMD +
+                                ow * SIMD;
+                            partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow];
+                        }
+                    }
+                }
+                __attribute__((opencl_unroll_hint))
+                for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                    __attribute__((opencl_unroll_hint))
+                    for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                        dotProd[0][oh][ow] = dotProd[wg][oh][ow];
+                    }
+                }
+                __attribute__((opencl_unroll_hint))
+                for (uint ofb = wg + 1; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+                    __attribute__((opencl_unroll_hint))
+                    for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                        __attribute__((opencl_unroll_hint))
+                        for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                            const uint partial_acc_ptr_idx =
+                                ((wg != 0) ? OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OFM_SIZE_PER_SIMD : 0) +
+                                ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD +
+                                oh * OUT_BLOCK_WIDTH * SIMD +
+                                ow * SIMD;
+                            partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow];
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        __attribute__((opencl_unroll_hint))
+        for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+            __attribute__((opencl_unroll_hint))
+            for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                __attribute__((opencl_unroll_hint))
+                for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                    const uint partial_acc_ptr_idx =
+                        ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD +
+                        oh * OUT_BLOCK_WIDTH * SIMD +
+                        ow * SIMD;
+                    partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow];
+                }
+            }
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_sub_group_id() >= OFM_BLOCKS_PER_SIMD)
+        return;
+
+    partial_acc_ptr = partial_acc + get_sub_group_id() * OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * SIMD + get_sub_group_local_id();
+    __attribute__((opencl_unroll_hint))
+    for (uint wg = 0; wg < FEATURE_SLM_SPLIT - 1; ++wg) {
+        __attribute__((opencl_unroll_hint))
+        for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+            __attribute__((opencl_unroll_hint))
+            for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                const uint partial_acc_ptr_idx =
+                    wg * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH +
+                    oh * OUT_BLOCK_WIDTH * SIMD +
+                    ow * SIMD;
+                dotProd[0][oh][ow] += partial_acc_ptr[partial_acc_ptr_idx];
+            }
+        }
+    }
+#endif
+
+#if FEATURE_SLM_SPLIT == 1
+#   define OFM_VALUES_PER_WI (OFM_BLOCKS_PER_SIMD)
+#else
+#   define OFM_VALUES_PER_WI 1
+    out_f += get_sub_group_id() * SIMD;
+    out_fg += get_sub_group_id() * SIMD;
+#endif
+
+#if BIAS_TERM
+    BIAS_TYPE bias[OFM_VALUES_PER_WI];
+    __attribute__((opencl_unroll_hint))
+    for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+        bias[ofb] = biases[out_f + ofb * SIMD];
+    }
+#endif
+
+    ACTIVATION_TYPE dequantized[OFM_VALUES_PER_WI][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH];
+    __attribute__((opencl_unroll_hint))
+    for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+        __attribute__((opencl_unroll_hint))
+        for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+            __attribute__((opencl_unroll_hint))
+            for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                dequantized[ofb][oh][ow] = TO_ACTIVATION_TYPE(dotProd[ofb][oh][ow]);
+#if BIAS_TERM
+                dequantized[ofb][oh][ow] += bias[ofb];
+#endif
+            }
+        }
+    }
+
+    OUTPUT_TYPE result[OFM_VALUES_PER_WI][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH];
+    __attribute__((opencl_unroll_hint))
+    for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD_SCALAR
+        FUSED_OPS_PRELOAD_SCALAR;
+#endif
+        __attribute__((opencl_unroll_hint))
+        for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+            __attribute__((opencl_unroll_hint))
+            for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+                ACTIVATION_TYPE dequantized_val = dequantized[ofb][oh][ow];
+#if HAS_FUSED_OPS
+#   if FUSED_OPS_CAN_USE_PRELOAD_SCALAR
+                FUSED_OPS_CALC_SCALAR;
+#   else
+                FUSED_OPS_SCALAR;
+#   endif
+                result[ofb][oh][ow] = FUSED_OPS_RESULT_SCALAR;
+#else
+                result[ofb][oh][ow] = TO_OUTPUT_TYPE(dequantized_val);
+#endif
+            }
+        }
+    }
+
+    uint dst_index = OUTPUT_GET_INDEX(out_b, out_fg, out_y, out_x);
+
+    if ((OUTPUT_SIZE_X % OUT_BLOCK_WIDTH == 0 || out_x + OUT_BLOCK_WIDTH <= OUTPUT_SIZE_X)
+        && (OUTPUT_FEATURE_NUM % OFM_BLOCKS_PER_SIMD == 0) ) {
+        __attribute__((opencl_unroll_hint(OFM_VALUES_PER_WI)))
+        for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ofb++) {
+            bool good_of_block = (CEIL_DIV(OUTPUT_FEATURE_NUM, SIMD) % OFM_BLOCKS_PER_SIMD == 0) || (out_fg + ofb * SIMD <= OUTPUT_FEATURE_NUM);
+            if (good_of_block) {
+                __attribute__((opencl_unroll_hint))
+                for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                    bool good_y = (OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT == 0) || (out_y + oh < OUTPUT_SIZE_Y);
+                    if (good_y) {
+                        uint ow = 0;
+                    #if OUTPUT_TYPE_SIZE == 1
+                        __attribute__((opencl_unroll_hint))
+                        for (; ow + 8 <= OUT_BLOCK_WIDTH; ow += 8) {
+                            MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8) result_val;
+                            __attribute__((opencl_unroll_hint))
+                            for (uint i = 0; i < 8; ++i) {
+                                result_val[i] = result[ofb][oh][ow + i];
+                            }
+                            DT_OUTPUT_BLOCK_WRITE8(output, dst_index, result_val);
+                            dst_index += 8 * SIMD;
+                        }
+                    #endif
+                    #if OUTPUT_TYPE_SIZE <= 2
+                        __attribute__((opencl_unroll_hint))
+                        for (; ow + 4 <= OUT_BLOCK_WIDTH; ow += 4) {
+                            MAKE_VECTOR_TYPE(OUTPUT_TYPE, 4) result_val;
+                            __attribute__((opencl_unroll_hint))
+                            for (uint i = 0; i < 4; ++i) {
+                                result_val[i] = result[ofb][oh][ow + i];
+                            }
+                            DT_OUTPUT_BLOCK_WRITE4(output, dst_index, result_val);
+                            dst_index += 4 * SIMD;
+                        }
+                    #endif
+
+                        __attribute__((opencl_unroll_hint))
+                        for (; ow + 2 <= OUT_BLOCK_WIDTH; ow += 2) {
+                            MAKE_VECTOR_TYPE(OUTPUT_TYPE, 2) result_val;
+                            __attribute__((opencl_unroll_hint))
+                            for (uint i = 0; i < 2; ++i) {
+                                result_val[i] = result[ofb][oh][ow + i];
+                            }
+                            DT_OUTPUT_BLOCK_WRITE2(output, dst_index, result_val);
+                            dst_index += 2 * SIMD;
+                        }
+
+                        if (OUT_BLOCK_WIDTH % 2 == 1) {
+                            OUTPUT_TYPE result_val = result[ofb][oh][ow];
+                            DT_OUTPUT_BLOCK_WRITE(output, dst_index, result_val);
+                            dst_index += 1 * SIMD;
+                        }
+                    }  // if (good_y)
+                    dst_index += OUTPUT_Y_PITCH * FSV - OUT_BLOCK_WIDTH * FSV;
+                }  // for (OUT_BLOCK_HEIGHT)
+            }  // if (good_of_block)
+            dst_index += OUTPUT_FEATURE_PITCH * FSV - OUTPUT_Y_PITCH * FSV * OUT_BLOCK_HEIGHT;
+        }  // for (OFM_VALUES_PER_WI)
+    } else {
+        __attribute__((opencl_unroll_hint(OFM_VALUES_PER_WI)))
+        for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ofb++) {
+            bool good_of_block = (CEIL_DIV(OUTPUT_FEATURE_NUM, SIMD) % OFM_BLOCKS_PER_SIMD == 0) || (out_fg + ofb * SIMD <= OUTPUT_FEATURE_NUM);
+            if (good_of_block) {
+                const uint dst_index = OUTPUT_GET_INDEX(out_b, out_f + ofb * SIMD, out_y, out_x);
+                __attribute__((opencl_unroll_hint))
+                for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+                    bool good_y = (OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT == 0) || (out_y + oh < OUTPUT_SIZE_Y);
+                    if (good_y) {
+                        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+                        for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+
+#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
+                            if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && ow >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH)
+                                break;
+#endif
+
+#if OUTPUT_FEATURE_NUM % SIMD != 0
+                            if (out_fg + (ofb + 1) * SIMD >= OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % SIMD)
+                                result[ofb][oh][ow] = (OUTPUT_TYPE)0;
+#endif
+                            output[dst_index + ow * FSV + oh * OUTPUT_Y_PITCH * FSV] = result[ofb][oh][ow];
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+#undef AS_INPUT0_TYPE_4
+#undef AS_TYPE_N
+#undef AS_TYPE_N_
+#undef AS_FILTER_TYPE_4
+
+#undef CEIL_DIV
+#undef ALIGN
+
+#undef SIMD
+#undef FSV
+#undef OFM_VALUES_PER_WI
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl
deleted file mode 100644
index 5915c849fe6..00000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright (c) 2018-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "include/common.cl"
-#include "include/fetch.cl"
-#include "include/imad.cl"
-#include "include/mmad.cl"
-
-#if QUANTIZATION_TERM
-#define ACCUMULATOR_TYPE int
-#define TO_ACCUMULATOR_TYPE(x) convert_int(x)
-#define ACTIVATION_TYPE float
-#define TO_ACTIVATION_TYPE(x) convert_float(x)
-#else
-#define ACCUMULATOR_TYPE INPUT0_TYPE
-#define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
-#define ACTIVATION_TYPE INPUT0_TYPE
-#define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
-#endif
-
-#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
-#define AS_TYPE_N_(type, n, x) as_##type##n(x)
-#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
-#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
-#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
-
-// int8 conv_input and weights data is packed to int32 "batches",
-// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience
-__attribute__((intel_reqd_sub_group_size(16)))
-__attribute__((reqd_work_group_size(1, 1, 16)))
-KERNEL(convolution_gpu_b_fs_yx_fsv16_imad_3x3)(
-    const __global INPUT0_TYPE *conv_input,
-    __global OUTPUT_TYPE *output,
-    const __global FILTER_TYPE *weights,
-#if BIAS_TERM
-    const __global BIAS_TYPE *biases,
-#endif
-#if HAS_FUSED_OPS_DECLS
-    FUSED_OPS_DECLS,
-#endif
-    uint split_idx) {
-
-    #define LUT_VALUE_CLAMP(x) ((x) < (OUT_BLOCK_WIDTH - 1) * STRIDE_SIZE_X + FILTER_SIZE_X ? (x) : 0)
-    const int tmp[16] = {
-        LUT_VALUE_CLAMP(0),
-        LUT_VALUE_CLAMP(1),
-        LUT_VALUE_CLAMP(2),
-        LUT_VALUE_CLAMP(3),
-        LUT_VALUE_CLAMP(4),
-        LUT_VALUE_CLAMP(5),
-        LUT_VALUE_CLAMP(6),
-        LUT_VALUE_CLAMP(7),
-        LUT_VALUE_CLAMP(8),
-        LUT_VALUE_CLAMP(9),
-        LUT_VALUE_CLAMP(10),
-        LUT_VALUE_CLAMP(11),
-        LUT_VALUE_CLAMP(12),
-        LUT_VALUE_CLAMP(13),
-        LUT_VALUE_CLAMP(14),
-        LUT_VALUE_CLAMP(15)
-    };
-    #undef LUT_VALUE_CLAMP
-
-    const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
-    const uint out_y = get_global_id(1);
-    const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
-    const uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
-    const uint out_f = out_fg + get_sub_group_local_id();
-    ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * OFM_BLOCKS_PER_SIMD] = {0};
-    const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
-
-    const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
-
-    uint filter_idx  = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, 0, 0, 0);
-#if OFM_BLOCKS_PER_SIMD == 2
-    uint filter_idx2 = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f + 16, 0, 0, 0);
-#endif
-
-    __attribute__((opencl_unroll_hint(1)))
-    for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16); k++) {
-        __attribute__((opencl_unroll_hint(1)))
-        for (uint j = 0; j < FILTER_SIZE_Y; j++) {
-            uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, k * 16, input_y + j, input_x + tmp[get_sub_group_local_id()]);
-            uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
-
-            __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
-            for (uint i = 0; i < FILTER_SIZE_X; i++) {
-
-                uint4 weights_val = vload4(0, (__global uint *)(weights + filter_idx));
-#if OFM_BLOCKS_PER_SIMD == 2
-                uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx2));
-#endif
-
-                __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-                for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
-                    const uint ow_offset = ow + OUT_BLOCK_WIDTH;
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0)));
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1)));
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2)));
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3)));
-
-#if OFM_BLOCKS_PER_SIMD == 2
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s0)));
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s1)));
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s2)));
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s3)));
-#endif
-                }
-                filter_idx += 16 * 16;
-#if OFM_BLOCKS_PER_SIMD == 2
-                filter_idx2 += 16 * 16;
-#endif
-            }
-        } 
-    }
-
-#if BIAS_TERM
-    BIAS_TYPE bias[OFM_BLOCKS_PER_SIMD] = { biases[out_f]
-#if OFM_BLOCKS_PER_SIMD == 2
-        , biases[out_f + 16]
-#endif
-    };
-#endif
-    __attribute__((opencl_unroll_hint(OFM_BLOCKS_PER_SIMD)))
-    for (uint j = 0; j < OFM_BLOCKS_PER_SIMD; j++) {
-        const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + j * 16, out_y, out_x);
-#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
-        FUSED_OPS_PRELOAD;
-#endif
-        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
-
-#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
-            if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && i >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH)
-                break;
-#endif
-            ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
-#if BIAS_TERM
-            dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i] + bias[j];
-#else
-            dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i];
-#endif
-            OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
-    #if FUSED_OPS_CAN_USE_PRELOAD
-            FUSED_OPS_CALC;
-    #else
-            FUSED_OPS;
-    #endif
-            result = FUSED_OPS_RESULT;
-#else
-            result = TO_OUTPUT_TYPE(dequantized);
-#endif
-
-#if OUTPUT_FEATURE_NUM % 16 != 0
-            if (out_fg + j * 16 + 16 > OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % 16)
-                result = (OUTPUT_TYPE)0;
-#endif
-            output[dst_index + i * 16] = result;
-        }
-    }
-}
-
-#undef AS_INPUT0_TYPE_4
-#undef AS_TYPE_N
-#undef AS_TYPE_N_
-#undef MAKE_VECTOR_TYPE
-#undef TO_ACTIVATION_TYPE
-#undef ACTIVATION_TYPE
-#undef TO_ACCUMULATOR_TYPE
-#undef ACCUMULATOR_TYPE
-
-#undef CEIL_DIV
-#undef ALIGN
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl
deleted file mode 100644
index df87ae04fd6..00000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright (c) 2018-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "include/common.cl"
-#include "include/fetch.cl"
-#include "include/imad.cl"
-#include "include/mmad.cl"
-
-#if QUANTIZATION_TERM
-    #define ACCUMULATOR_TYPE int
-    #define TO_ACCUMULATOR_TYPE(x) convert_int(x)
-    #define ACTIVATION_TYPE float
-    #define TO_ACTIVATION_TYPE(x) convert_float(x)
-#else
-    #define ACCUMULATOR_TYPE INPUT0_TYPE
-    #define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
-    #define ACTIVATION_TYPE INPUT0_TYPE
-    #define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
-#endif
-
-#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
-#define AS_TYPE_N_(type, n, x) as_##type##n(x)
-#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
-#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
-
-__attribute__((intel_reqd_sub_group_size(16)))
-KERNEL(convolution_gpu_b_fs_yx_fsv16_3x3_ks)(
-    const __global INPUT0_TYPE   *conv_input,
-    __global OUTPUT_TYPE         *output,
-    const __global FILTER_TYPE    *weights,
-#if BIAS_TERM
-    const __global BIAS_TYPE     *biases,
-#endif
-#if HAS_FUSED_OPS_DECLS
-    FUSED_OPS_DECLS,
-#endif
-    uint split_idx)
-{
-#if OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 1
-    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0};
-#elif OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 2
-    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
-#elif OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 1
-    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0};
-#else  // OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 2
-    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-#endif
-
-    const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
-    const uint out_y = get_global_id(1);
-    const uint out_f = (uint)(get_group_id(2) * 32 + get_sub_group_local_id());
-    const uint subgroup_id = get_sub_group_id();
-    const uint subgroup_local_id = get_sub_group_local_id();
-    const uint feature_offset = subgroup_id * INPUT0_FEATURE_NUM / 4;
-    const uint out_b = (uint)(get_group_id(2) * 32) / OUTPUT_FEATURE_NUM;
-
-    ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * 2] = { 0 };
-    const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
-    const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
-
-    uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, feature_offset, 0, 0);
-    uint diff_filter_idx = 16*3*3*FILTER_IFM_NUM;
-
-    __attribute__((opencl_unroll_hint(1)))
-    for(uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16)/4; k++ ) {
-        __attribute__((opencl_unroll_hint(1)))
-        for(uint j = 0; j < FILTER_SIZE_Y; j++) {
-            uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, feature_offset + k * 16, input_y + j, input_x + tmp[subgroup_local_id]);
-            uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
-            
-             __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
-            for(uint i = 0; i < FILTER_SIZE_X; i++) {
-
-                uint4 weights_val = vload4(0, (__global uint*)(weights + filter_idx));                
-                uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx + diff_filter_idx));
-
-                __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-                for(uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
-                    const uint ow_offset = ow + OUT_BLOCK_WIDTH;
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0)));
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1)));
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2)));
-                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3)));
-
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s0)));
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s1)));
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s2)));
-                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s3)));
-                }
-                filter_idx += 16 * 16;
-            }
-        }
-    }
-
-    //k slicing summing up with SLM
-    __local ACCUMULATOR_TYPE partial_acc[16 * OUT_BLOCK_WIDTH * 6];
-    if(subgroup_id == 0)
-    {
-        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-        for(uint i = 0; i < OUT_BLOCK_WIDTH; i++)
-        {
-            partial_acc[16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
-        }       
-    }
-    else if(subgroup_id == 1)
-    {
-        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-        for(uint i = 0; i < OUT_BLOCK_WIDTH; i++)
-        {
-            partial_acc[i * 16 + subgroup_local_id] = dotProd[i];
-            dotProd[i] = dotProd[i + OUT_BLOCK_WIDTH];
-        }
-    }
-    else if (subgroup_id == 2)
-    {
-        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
-        {
-            partial_acc[2 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
-            partial_acc[3 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
-
-        }
-    }
-    else if (subgroup_id == 3)
-    {
-        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
-        {
-            partial_acc[4 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
-            partial_acc[5 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (subgroup_id < 2) {
-        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
-        {
-            dotProd[i] += partial_acc[(i + subgroup_id * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
-            dotProd[i] += partial_acc[(i + (subgroup_id + 2) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
-            dotProd[i] += partial_acc[(i + (subgroup_id + 4) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
-        }
-#if BIAS_TERM
-    BIAS_TYPE bias = biases[out_f + get_sub_group_id() * 16];
-#endif
-
-#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
-    FUSED_OPS_PRELOAD;
-#endif
-    const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + subgroup_id * 16, out_y, out_x);
-     __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
-    for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
-    {
-        ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
-#if BIAS_TERM
-        dequantized = (ACTIVATION_TYPE)dotProd[i] + bias;
-#else
-        dequantized = (ACTIVATION_TYPE)dotProd[i];
-#endif
-#if HAS_FUSED_OPS
-    #if FUSED_OPS_CAN_USE_PRELOAD
-        FUSED_OPS_CALC;
-    #else
-        FUSED_OPS;
-    #endif
-        output[dst_index + i * 16] = FUSED_OPS_RESULT;
-#else
-        output[dst_index + i * 16] = TO_OUTPUT_TYPE(dequantized);
-#endif
-    }
-    }
-}
-
-#undef AS_INPUT0_TYPE_4
-#undef AS_TYPE_N
-#undef AS_TYPE_N_
-#undef MAKE_VECTOR_TYPE
-#undef TO_ACTIVATION_TYPE
-#undef ACTIVATION_TYPE
-#undef TO_ACCUMULATOR_TYPE
-#undef ACCUMULATOR_TYPE
-
-#undef CEIL_DIV
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl
index 4867898bafb..c334425d3ca 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl
@@ -58,10 +58,11 @@
 //  If required analogously the mvn_var_1 and mvn_var_2 kernels should be enqueud, additionally providing results from
 //  mvn_mean_2 kernel.
 //
-//  Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels (mvn_mean_2, mvn_var_2).
-//  To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be used.
-//  As at this stage there is no further need to synchronize and this kernel will perform simple normalization given known mean and inverse of variance.
-//  Due to this this kernel can be enqueued with full paralellization, not limiting it to single work-group.
+//  Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels
+//  (mvn_mean_2, mvn_var_2). To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be
+//  used. As at this stage there is no further need to synchronize and this kernel will perform simple normalization
+//  given known mean and inverse of variance. Due to this this kernel can be enqueued with full paralellization, not
+//  limiting it to single work-group.
 //     lws:          SIMD x 1 x 1
 //     gws:          (x * y) / SIMD * SIMD x feature x batch
 //
@@ -73,7 +74,6 @@
 //                In parallel mode this must be equal to LWS * ITEM_GROUPS, except in mvn_final kernel where it has no restrictions.
 // ITEM_GROUPS  - Number of work-groups performing accumulation in parallel mode. Should be the same in both stages of parallel kernels.
 
-
 #define FSV                   16
 #define INPUT_SLICE_PITCH     16
 #define SG_NUM                (LWS / SIMD)
@@ -88,7 +88,7 @@
 
 #define TO_MEAN_PACKED_TYPE   CAT(convert_, MEAN_PACKED_TYPE)
 
-#define ITEMS_NUM             (OUTPUT_SIZE_X * OUTPUT_SIZE_Y)
+#define ITEMS_NUM             (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z)
 
 #define CEIL_DIV(a, b)        (((a) + (b) - 1) / (b))
 
@@ -115,8 +115,11 @@ KERNEL(mvn_mean_1)(const __global INPUT0_TYPE* input,
     const uint sgid = get_sub_group_id();
     const uint sglid = get_sub_group_local_id();
 
+#if INPUT0_DIMS == 5
+    const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
+#else  // INPUT0_DIMS == 4
     const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
-
+#endif
 
     INT_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_input)(input, data_sets_offset, get_global_id(0));
 
@@ -198,8 +201,11 @@ KERNEL(mvn_var_1)(const __global INPUT0_TYPE* input,
     const uint sgid = get_sub_group_id();
     const uint sglid = get_sub_group_local_id();
 
+#if INPUT0_DIMS == 5
+    const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
+#else  // INPUT0_DIMS == 4
     const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
-
+#endif
 
     MEAN_TYPE mean = means[flat_data_set_group * FSV + sglid];
     MEAN_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_sq_dev)(input, data_sets_offset, get_global_id(0), mean);
@@ -312,7 +318,11 @@ KERNEL(mvn_final)(
     const uint sgid = get_sub_group_id() + items_group * SG_NUM;
     const uint sglid = get_sub_group_local_id();
 
+#if INPUT0_DIMS == 5
+    const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
+#else  // INPUT0_DIMS == 4
     const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
+#endif
     uint input_offset;
 
 #if (!PRECALC_MEAN || (NORMALIZE_VARIANCE && !PRECALC_VARIANCE)) && SG_NUM != 1
@@ -348,7 +358,11 @@ KERNEL(mvn_final)(
 #if OUTPUT_IS_FP
     input_offset = data_sets_offset + sgid * SIMD * FSV;
     uint output_spatial_base = sgid * SIMD;
+#if OUTPUT_DIMS == 5
+    uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0, 0) + sgid * SIMD * FSV;
+#else  // OUTPUT_DIMS == 4
     uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV;
+#endif
     // For fused ops to align with non-fp path
     const uint set_idx = sglid;
 
@@ -360,18 +374,25 @@ KERNEL(mvn_final)(
             uint output_spatial = output_spatial_base + si;
             MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
             OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
+#           if HAS_FUSED_OPS
                 FUSED_OPS;
                 result = FUSED_OPS_RESULT;
-#else
+#           else
                 result = TO_OUTPUT_TYPE(normalized);
-#endif
+#           endif
 #if !OUTPUT_PAD_IN_ITEMS
             DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
 #else
+#   if OUTPUT_DIMS == 5
+            uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+            uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+#   else  // OUTPUT_DIMS == 4
             uint x = output_spatial % OUTPUT_SIZE_X;
             uint y = output_spatial / OUTPUT_SIZE_X;
             output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+#   endif
             DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
 #endif
         }
@@ -396,24 +417,29 @@ KERNEL(mvn_final)(
             uint output_spatial = output_spatial_base + si;
             MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
             OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
+#           if HAS_FUSED_OPS
                 FUSED_OPS;
                 result = FUSED_OPS_RESULT;
-#else
+#           else
                 result = TO_OUTPUT_TYPE(normalized);
-#endif
+#           endif
 #if !OUTPUT_PAD_IN_ITEMS
             DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
 #else
+#   if OUTPUT_DIMS == 5
+            uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+            uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+#   else  // OUTPUT_DIMS == 4
             uint x = output_spatial % OUTPUT_SIZE_X;
             uint y = output_spatial / OUTPUT_SIZE_X;
             output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+#   endif
             DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
 #endif
         }
-    } else if (lws_uniform_leftovers > 0 &&
-               sg_uniform_leftovers > 0 &&
-               sgid == lws_uniform_leftovers_full_simds) {
+    } else if (lws_uniform_leftovers > 0 && sg_uniform_leftovers > 0 && sgid == lws_uniform_leftovers_full_simds) {
         // TODO: May be worth to consider the data here as across sub-group
         // Rest of leftovers, still use whole sub-group, but change addresses to not load extra data.
         INPUT_PACKED_TYPE in_pack;
@@ -454,25 +480,36 @@ KERNEL(mvn_final)(
             uint output_spatial = output_spatial_base + si;
             MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
             OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
-            FUSED_OPS;
-            result = FUSED_OPS_RESULT;
-#else
-            result = TO_OUTPUT_TYPE(normalized);
-#endif
+#           if HAS_FUSED_OPS
+                FUSED_OPS;
+                result = FUSED_OPS_RESULT;
+#           else
+                result = TO_OUTPUT_TYPE(normalized);
+#           endif
 #if !OUTPUT_PAD_IN_ITEMS
             DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
 #else
+#   if OUTPUT_DIMS == 5
+            uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+            uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+#   else  // OUTPUT_DIMS == 4
             uint x = output_spatial % OUTPUT_SIZE_X;
             uint y = output_spatial / OUTPUT_SIZE_X;
             output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+#   endif
             DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
 #endif
         }
     }
-#else // => !OUTPUT_IS_FP
+#else  // => !OUTPUT_IS_FP
     input_offset = data_sets_offset + sgid * SIMD * FSV;
+#if OUTPUT_DIMS == 5
+    uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0, 0) + sgid * SIMD * FSV;
+#else  // OUTPUT_DIMS == 4
     uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV;
+#endif
     uint output_spatial = sgid * SIMD + sglid;
 
     for (uint spatial_idx = 0; spatial_idx < ITEMS_NUM / GWS; ++spatial_idx) {
@@ -482,19 +519,26 @@ KERNEL(mvn_final)(
         __attribute__((opencl_unroll_hint))
         for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
             MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
-            #if HAS_FUSED_OPS
+#           if HAS_FUSED_OPS
                 FUSED_OPS;
                 result[set_idx] = FUSED_OPS_RESULT;
-            #else
+#           else
                 result[set_idx] = TO_OUTPUT_TYPE(normalized);
-            #endif
+#           endif
         }
 #if !OUTPUT_PAD_IN_ITEMS
         ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
 #else
+#   if OUTPUT_DIMS == 5
+        uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+        uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+        uint x = output_spatial % OUTPUT_SIZE_X;
+        output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+#   else  // OUTPUT_DIMS == 4
         uint x = output_spatial % OUTPUT_SIZE_X;
         uint y = output_spatial / OUTPUT_SIZE_X;
         output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+#   endif
         ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
 #endif
 
@@ -518,24 +562,29 @@ KERNEL(mvn_final)(
         __attribute__((opencl_unroll_hint))
         for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
             MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
-            #if HAS_FUSED_OPS
+#           if HAS_FUSED_OPS
                 FUSED_OPS;
                 result[set_idx] = FUSED_OPS_RESULT;
-            #else
+#           else
                 result[set_idx] = TO_OUTPUT_TYPE(normalized);
-            #endif
+#           endif
         }
 #if !OUTPUT_PAD_IN_ITEMS
         ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
 #else
+#   if OUTPUT_DIMS == 5
+        uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+        uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+        uint x = output_spatial % OUTPUT_SIZE_X;
+        output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+#   else  // OUTPUT_DIMS == 4
         uint x = output_spatial % OUTPUT_SIZE_X;
         uint y = output_spatial / OUTPUT_SIZE_X;
         output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+#   endif
         ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
 #endif
-    } else if (lws_uniform_leftovers > 0 &&
-               sg_uniform_leftovers > 0 &&
-               sgid == lws_uniform_leftovers_full_simds) {
+    } else if (lws_uniform_leftovers > 0 && sg_uniform_leftovers > 0 && sgid == lws_uniform_leftovers_full_simds) {
         // TODO: May be worth to consider the data here as across sub-group
         // Rest of leftovers, still use whole sub-group, but change addresses to not load extra data.
         INPUT_PACKED_TYPE in_pack = ((const __global INPUT_PACKED_TYPE*)(input + input_offset))[sglid % sg_uniform_leftovers];
@@ -544,20 +593,27 @@ KERNEL(mvn_final)(
         __attribute__((opencl_unroll_hint))
         for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
             MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
-            #if HAS_FUSED_OPS
+#           if HAS_FUSED_OPS
                 FUSED_OPS;
                 result[set_idx] = FUSED_OPS_RESULT;
-            #else
+#           else
                 result[set_idx] = TO_OUTPUT_TYPE(normalized);
-            #endif
+#           endif
         }
         if (sglid < sg_uniform_leftovers) {
 #if !OUTPUT_PAD_IN_ITEMS
             ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
 #else
+#   if OUTPUT_DIMS == 5
+            uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+            uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+#   else  // OUTPUT_DIMS == 4
             uint x = output_spatial % OUTPUT_SIZE_X;
             uint y = output_spatial / OUTPUT_SIZE_X;
             output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+#   endif
             ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
 #endif
         }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
index 6775ba48c96..0137dc261c5 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
@@ -111,6 +111,7 @@ attach_activation_gpu::attach_activation_gpu() {
         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw },
         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw },
         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw },
+        { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw },
         { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw },
         { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw },
         { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw },
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
index 46610452f9b..429201bb7a9 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
@@ -88,6 +88,10 @@ attach_mvn_gpu::attach_mvn_gpu() {
                                  mvn_gpu::create);
     implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
                                  mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
+                                 mvn_gpu::create);
     implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
                                  mvn_gpu::create);
     implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
index 6425e7ec81c..1e1ff1e7299 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
@@ -280,6 +280,10 @@ void prepare_buffer_fusing::run(program_impl& p) {
                     if (usr_layout.format == format::b_fs_yx_fsv16 &&
                         (opt_lower_pad % 16 != 0 || opt_upper_pad % 16 != 0))
                         return;
+                    if (input_layout.data_padding.lower_size().batch[0] != 0 || input_layout.data_padding.upper_size().batch[0] != 0 ||
+                        input_layout.data_padding.lower_size().spatial[0] != 0 || input_layout.data_padding.upper_size().spatial[0] != 0 ||
+                        input_layout.data_padding.lower_size().spatial[1] != 0 || input_layout.data_padding.upper_size().spatial[1] != 0)
+                        return;
                 }
 
                 if (format == format::bfyx && crop_size.batch[0] == input_layout.size.batch[0] &&
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
index 02d5dfc4649..71fc7e4ee23 100644
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -154,6 +154,14 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
     auto next_output_layout = next.get_output_layout();
     auto prev_dt = prev.get_output_layout().data_type;
 
+    auto is_input_idx = [&](size_t idx) -> bool {
+        if (&next.get_dependency(idx) == &prev)
+            return true;
+        if (next.get_dependency(idx).is_type<reorder>() && &next.get_dependency(idx).get_dependency(0) == &prev)
+            return true;
+        return false;
+    };
+
     if (next.is_type<reorder>())
         return true;
 
@@ -190,6 +198,9 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
     if (next.is_type<convolution>() && fmt_prev == format::b_fs_yx_fsv4 && fmt_next == format::byxf_af32 && next.as<convolution>().get_groups() == 1)
         return true;
 
+    if (next.is_type<convolution>() && fmt_prev == format::b_fs_yx_fsv16 && fmt_next == format::b_fs_yx_fsv4 && is_input_idx(0))
+        return true;
+
     if (next.is_type<quantize>() && fmt_prev == format::bfyx && prev.is_input() && prev_dt == data_types::u8)
         return true;
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
index f623585f43b..1098cecbe61 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -8318,6 +8318,8 @@ INSTANTIATE_TEST_CASE_P(
         .all_test_params(format::b_fs_yx_fsv16)
         .add(convolution_random_test_all_params{
             1, 89, 3, { 1, 1 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv4, false, false, false })
+        .add(convolution_random_test_all_params{
+            1, 16, 32, { 3, 3 }, { 17, 17 }, { 1, 1 }, { -8, -8 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv16, false, false, true })
     ),
     to_string_convolution_all_params
 );
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
index 50ab7f31397..5ad4ed4dca3 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
@@ -20,6 +20,7 @@
 #include <api/input_layout.hpp>
 #include "api/crop.hpp"
 #include <api/eltwise.hpp>
+#include <api/reorder.hpp>
 #include <api/topology.hpp>
 #include <api/network.hpp>
 #include <api/engine.hpp>
@@ -587,6 +588,46 @@ TEST(crop_gpu, basic_in1x4x1x1_split) {
         EXPECT_EQ(output_ptr_2[i], out2[i]);
 }
 
+TEST(crop_gpu, basic_in1x4x1x1_crop_pad) {
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 1;
+    auto feature_num = 4;
+    auto x_size = 1;
+    auto y_size = 1;
+
+    auto crop_batch_num = 1;
+    auto crop_feature_num_1 = 3;
+    auto crop_x_size = 1;
+    auto crop_y_size = 1;
+    auto feature_offset_1 = 0;
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+
+    padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1});
+    auto padded_layout = input.get_layout().with_padding(in_pad);
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(reorder("input_reorder", "input", padded_layout));
+    topology.add(crop("crop1", "input_reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) }));
+    topology.add(reorder("out_reorder", "crop1", format::bfyx, data_types::f32));
+
+    std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
+    std::vector<float> out1 = { -1.f, 2.f,-3.f };
+    set_values(input, input_vec);
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+
+    network network(engine, topology, bo);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("out_reorder").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < out1.size();i++)
+        EXPECT_EQ(output_ptr[i], out1[i]);
+}
+
 TEST(crop_gpu, basic_int_in1x4x1x1_split) {
     // Tests split with crop implementation
     //                 _CROP_1(1x3x1x1,offset(0x0x0x0))
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
index 35c9ba8e2ae..793f93b0d54 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
@@ -31,16 +31,15 @@
 
 using namespace cldnn;
 
-class mvn_gpu_test : public ::testing::TestWithParam<cldnn::format>
-{
-};
+class mvn_gpu_test : public ::testing::TestWithParam<cldnn::format> {};
 
 template <typename T>
-void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_variance) {
+void mvn_compute_mean_accross_channels(cldnn::memory& output, bool normalize_variance) {
     auto output_size = output.get_layout().size;
 
     uint32_t batch_size = output_size.batch[0];
     uint32_t feature_size = output_size.feature[0];
+    uint32_t z_size = output_size.spatial[2];
     uint32_t y_size = output_size.spatial[1];
     uint32_t x_size = output_size.spatial[0];
 
@@ -48,32 +47,29 @@ void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_var
 
     float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F;
 
-    for (uint32_t b = 0; b < batch_size; ++b)
-    {
+    for (uint32_t b = 0; b < batch_size; ++b) {
         float sum = 0.f;
         float variance = 0.f;
-        for (uint32_t f = 0; f < feature_size; ++f)
-        {
-            for (uint32_t y = 0; y < y_size; ++y)
-            {
-                for (uint32_t x = 0; x < x_size; ++x)
-                {
-                    auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
-                    size_t data_index = output.get_layout().get_linear_offset(index_tensor);
-                    float data = static_cast<float>(buff[data_index]);
-                    sum += data;
-                    if (normalize_variance)
-                        variance += data*data;
+        for (uint32_t f = 0; f < feature_size; ++f) {
+            for (uint32_t z = 0; z < z_size; z++) {
+                for (uint32_t y = 0; y < y_size; ++y) {
+                    for (uint32_t x = 0; x < x_size; ++x) {
+                        auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0));
+                        size_t data_index = output.get_layout().get_linear_offset(index_tensor);
+                        float data = static_cast<float>(buff[data_index]);
+                        sum += data;
+                        if (normalize_variance)
+                            variance += data * data;
+                    }
                 }
             }
         }
-        sum /= feature_size * y_size * x_size;
+        sum /= feature_size * y_size * x_size * z_size;
         T result_sum = static_cast<T>(sum);
         EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b;
 
-        if (normalize_variance)
-        {
-            variance /= feature_size * y_size * x_size;
+        if (normalize_variance) {
+            variance /= feature_size * y_size * x_size * z_size;
             T result_variance = static_cast<T>(variance);
             EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b;
         }
@@ -81,11 +77,12 @@ void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_var
 }
 
 template <typename T>
-void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_variance) {
+void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_variance) {
     auto output_size = output.get_layout().size;
 
     uint32_t batch_size = output_size.batch[0];
     uint32_t feature_size = output_size.feature[0];
+    uint32_t z_size = output_size.spatial[2];
     uint32_t y_size = output_size.spatial[1];
     uint32_t x_size = output_size.spatial[0];
 
@@ -93,31 +90,28 @@ void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_vari
 
     float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F;
 
-    for (uint32_t b = 0; b < batch_size; ++b)
-    {
-        for (uint32_t f = 0; f < feature_size; ++f)
-        {
+    for (uint32_t b = 0; b < batch_size; ++b) {
+        for (uint32_t f = 0; f < feature_size; ++f) {
             float sum = 0.f;
             float variance = 0.f;
-            for (uint32_t y = 0; y < y_size; ++y)
-            {
-                for (uint32_t x = 0; x < x_size; ++x)
-                {
-                    auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
-                    size_t data_index = output.get_layout().get_linear_offset(index_tensor);
-                    float data = static_cast<float>(buff[data_index]);
-                    sum += data;
-                    if (normalize_variance)
-                        variance += data*data;
+            for (uint32_t z = 0; z < z_size; ++z) {
+                for (uint32_t y = 0; y < y_size; ++y) {
+                    for (uint32_t x = 0; x < x_size; ++x) {
+                        auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0));
+                        size_t data_index = output.get_layout().get_linear_offset(index_tensor);
+                        float data = static_cast<float>(buff[data_index]);
+                        sum += data;
+                        if (normalize_variance)
+                            variance += data * data;
+                    }
                 }
             }
-            sum /= y_size * x_size;
+            sum /= y_size * x_size * z_size;
             T result_sum = static_cast<T>(sum);
             EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b << ", f=" << f;
 
-            if (normalize_variance)
-            {
-                variance /= y_size * x_size;
+            if (normalize_variance) {
+                variance /= y_size * x_size * z_size;
                 T result_variance = static_cast<T>(variance);
                 EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b << ", f=" << f;
             }
@@ -125,15 +119,14 @@ void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_vari
     }
 }
 
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx)
-{
-    //mvn accross channels fp32 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx) {
+    // mvn accross channels fp32 test with normalize_variance set to false
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<float>(input, true, 8, 100);
 
@@ -153,15 +146,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx)
     mvn_compute_mean_accross_channels<float>(output, false);
 }
 
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16)
-{
-    //mvn accross channels fp16 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16) {
+    // mvn accross channels fp16 test with normalize_variance set to false
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<FLOAT16>(input, true, 8, 100);
 
@@ -181,15 +173,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16)
     mvn_compute_mean_accross_channels<FLOAT16>(output, false);
 }
 
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance)
-{
-    //mvn accross channels fp32 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance) {
+    // mvn accross channels fp32 test with normalize_variance set to true
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<float>(input, true, 8, 100);
 
@@ -209,15 +200,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance)
     mvn_compute_mean_accross_channels<float>(output, true);
 }
 
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16)
-{
-    //mvn accross channels fp16 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16) {
+    // mvn accross channels fp16 test with normalize_variance set to true
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<FLOAT16>(input, true, 8, 100);
 
@@ -237,15 +227,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16)
     mvn_compute_mean_accross_channels<FLOAT16>(output, true);
 }
 
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx)
-{
-    //mvn within channels fp32 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx) {
+    // mvn within channels fp32 test with normalize_variance set to false
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<float>(input, true, 8, 100);
 
@@ -265,15 +254,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx)
     mvn_compute_mean_within_channels<float>(output, false);
 }
 
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16)
-{
-    //mvn within channels fp16 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16) {
+    // mvn within channels fp16 test with normalize_variance set to false
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<FLOAT16>(input, true, 8, 100);
 
@@ -293,15 +281,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16)
     mvn_compute_mean_within_channels<FLOAT16>(output, false);
 }
 
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance)
-{
-    //mvn within channels fp32 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance) {
+    // mvn within channels fp32 test with normalize_variance set to true
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<float>(input, true, 8, 100);
 
@@ -321,15 +308,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance)
     mvn_compute_mean_within_channels<float>(output, true);
 }
 
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16)
-{
-    //mvn within channels fp16 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16) {
+    // mvn within channels fp16 test with normalize_variance set to true
     using namespace cldnn;
     using namespace tests;
 
     const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+    auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
 
     tests::set_random_values<FLOAT16>(input, true, 8, 100);
 
@@ -360,16 +346,18 @@ struct mvn_basic_test_params {
 
 struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
     template <typename T>
-    void fill_data(memory& mem, const tests::VVVVF<T>& data) {
+    void fill_data(memory& mem, const tests::VVVVVF<T>& data) {
         auto size = mem.get_layout().size;
         auto ptr = mem.pointer<T>();
         for (size_t bi = 0; bi < static_cast<size_t>(size.batch[0]); ++bi) {
             for (size_t fi = 0; fi < static_cast<size_t>(size.feature[0]); ++fi) {
-                for (size_t yi = 0; yi < static_cast<size_t>(size.spatial[1]); ++yi) {
-                    for (size_t xi = 0; xi < static_cast<size_t>(size.spatial[0]); ++xi) {
-                        auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
-                        auto offset = mem.get_layout().get_linear_offset(tensor_addr);
-                        ptr[offset] = data[bi][fi][xi][yi];
+                for (size_t zi = 0; zi < static_cast<size_t>(size.spatial[2]); ++zi) {
+                    for (size_t yi = 0; yi < static_cast<size_t>(size.spatial[1]); ++yi) {
+                        for (size_t xi = 0; xi < static_cast<size_t>(size.spatial[0]); ++xi) {
+                            auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+                            auto offset = mem.get_layout().get_linear_offset(tensor_addr);
+                            ptr[offset] = data[bi][fi][xi][yi][zi];
+                        }
                     }
                 }
             }
@@ -379,7 +367,14 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
     template <typename T>
     void fill_random_data(memory& mem, int min, int max, int k = 8) {
         auto size = mem.get_layout().size;
-        auto input_data = tests::generate_random_4d<T>(size.batch[0], size.feature[0], size.spatial[0], size.spatial[1], min, max, k);
+        auto input_data = tests::generate_random_5d<T>(size.batch[0],
+                                                       size.feature[0],
+                                                       size.spatial[0],
+                                                       size.spatial[1],
+                                                       size.spatial[2],
+                                                       min,
+                                                       max,
+                                                       k);
         fill_data(mem, input_data);
     }
 
@@ -403,23 +398,23 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
         auto& size = params.input_size;
         auto& output_pad = params.output_pad;
 
-        auto input = memory::allocate(eng, { params.input_type, params.input_format, size });
+        auto input = memory::allocate(eng, {params.input_type, params.input_format, size});
 
         switch (params.input_type) {
-        case data_types::f32:
-            fill_random_data<float>(input, -127, 127);
-            break;
-        case data_types::f16:
-            fill_random_data<FLOAT16>(input, -127, 127);
-            break;
-        case data_types::i8:
-            fill_random_data<int8_t>(input, -127, 127);
-            break;
-        case data_types::u8:
-            fill_random_data<uint8_t>(input, -127, 127);
-            break;
-        default:
-            break;
+            case data_types::f32:
+                fill_random_data<float>(input, -127, 127);
+                break;
+            case data_types::f16:
+                fill_random_data<FLOAT16>(input, -127, 127);
+                break;
+            case data_types::i8:
+                fill_random_data<int8_t>(input, -127, 127);
+                break;
+            case data_types::u8:
+                fill_random_data<uint8_t>(input, -127, 127);
+                break;
+            default:
+                break;
         }
 
         topology topo;
@@ -453,23 +448,31 @@ struct mvn_test_case_generator : std::vector<mvn_basic_test_params> {
     }
 
     mvn_test_case_generator& smoke_tests(format::type fmt, data_types in_dt) {
-        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, false, padding() });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, true, padding() });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, false, padding() });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, true, padding() });
+        push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, false, false, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, false, true, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, true, false, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, true, true, padding()});
+        return *this;
+    }
+
+    mvn_test_case_generator& zyx_tests(format::type fmt, data_types in_dt) {
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 1, 67, 71}, false, false, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 1, 67, 71}, false, true, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 5, 67, 71}, false, false, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 5, 67, 71}, false, true, padding()});
         return *this;
     }
 
     mvn_test_case_generator& extended_tests(format::type fmt, data_types in_dt) {
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding() });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding() });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding() });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding() });
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, false, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, true, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, false, padding()});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, true, padding()});
         // output padding
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1}) });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1}) });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1}) });
-        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1}) });
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1})});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1})});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1})});
+        push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1})});
 
         return *this;
     }
@@ -477,16 +480,18 @@ struct mvn_test_case_generator : std::vector<mvn_basic_test_params> {
 
 INSTANTIATE_TEST_CASE_P(smoke,
                         mvn_random_test,
-                        testing::ValuesIn(
-                            mvn_test_case_generator()
-                            .smoke_tests(format::b_fs_yx_fsv16, data_types::i8)
-                            .smoke_tests(format::b_fs_yx_fsv16, data_types::u8)
-                        ), );
+                        testing::ValuesIn(mvn_test_case_generator()
+                                              .smoke_tests(format::b_fs_yx_fsv16, data_types::i8)
+                                              .smoke_tests(format::b_fs_yx_fsv16, data_types::u8)), );
+
+INSTANTIATE_TEST_CASE_P(zyx,
+                        mvn_random_test,
+                        testing::ValuesIn(mvn_test_case_generator()
+                                              .zyx_tests(format::b_fs_zyx_fsv16, data_types::i8)
+                                              .zyx_tests(format::b_fs_zyx_fsv16, data_types::u8)), );
 
 INSTANTIATE_TEST_CASE_P(extended,
                         mvn_random_test,
-                        testing::ValuesIn(
-                            mvn_test_case_generator()
-                            .extended_tests(format::b_fs_yx_fsv16, data_types::i8)
-                            .extended_tests(format::b_fs_yx_fsv16, data_types::u8)
-                        ), );
+                        testing::ValuesIn(mvn_test_case_generator()
+                                              .extended_tests(format::b_fs_yx_fsv16, data_types::i8)
+                                              .extended_tests(format::b_fs_yx_fsv16, data_types::u8)), );
diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index 1093f242dec..9097834a586 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit 1093f242dec18e9d45c60b14370e24431384ea54
+Subproject commit 9097834a5860fcf2ccbbd856e1e111bf0124c2de
diff --git a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
index 9b997a0aa2b..02352ca1fd4 100644
--- a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
+++ b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
@@ -16,10 +16,10 @@ add_library(${TARGET_NAME} STATIC ${MVNC_SOURCES})
 target_include_directories(${TARGET_NAME}
         PUBLIC
         "include"
+        ${WATCHDOG_INCLUDE}
         PRIVATE
         ${XLINK_INCLUDE}
-        ${XLINK_PLATFORM_INCLUDE}
-        ${WATCHDOG_INCLUDE})
+        ${XLINK_PLATFORM_INCLUDE})
 
 target_compile_definitions(${TARGET_NAME}
         PRIVATE
diff --git a/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h b/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h
index cb791eecb01..a3f6d2d3f11 100644
--- a/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h
+++ b/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h
@@ -10,6 +10,8 @@ extern "C"
 {
 #endif
 
+#include "watchdog/watchdog.h"
+
 #define NC_THERMAL_BUFFER_SIZE 100
 #define NC_DEBUG_BUFFER_SIZE   120
 #define NC_MAX_DEVICES         (32)
@@ -159,6 +161,12 @@ struct ncDeviceDescr_t {
     char name[NC_MAX_NAME_SIZE];
 };
 
+typedef struct ncDeviceOpenParams {
+    WatchdogHndl_t* watchdogHndl;
+    int watchdogInterval;
+    const char* customFirmwareDirectory;
+} ncDeviceOpenParams_t;
+
 typedef enum {
     NC_FIFO_HOST_RO = 0, // fifo can be read through the API but can not be
                          // written ( graphs can read and write data )
@@ -201,7 +209,7 @@ MVNC_EXPORT_API ncStatus_t ncSetDeviceConnectTimeout(int deviceConnectTimeoutSec
  *          If NULL or empty, default path searching behavior will be used.
  */
 MVNC_EXPORT_API ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
-    struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory);
+    struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams);
 
 /**
  * @brief Returns a description of all available devices in the system
@@ -215,7 +223,7 @@ MVNC_EXPORT_API ncStatus_t ncAvailableDevices(struct ncDeviceDescr_t *deviceDesc
 /**
  * @brief Close device and destroy handler
  */
-MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle);
+MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle, WatchdogHndl_t* watchdogHndl);
 
 // Graph
 MVNC_EXPORT_API ncStatus_t ncGraphCreate(const char* name, struct ncGraphHandle_t **graphHandle);
diff --git a/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h b/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h
index ef9ce2ee621..e539788e676 100644
--- a/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h
+++ b/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h
@@ -54,7 +54,7 @@ struct _devicePrivate_t {
     deviceCapabilities_t dev_attr;
     ncDeviceState_t state;
     uint32_t device_id;
-    wd_context watchdog_ctx;
+    WdDeviceHndl_t* watchdog_device;
     int wd_interval;
 };
 
diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h
index 596d43f01e6..09c3d7dde80 100644
--- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h
+++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h
@@ -5,48 +5,42 @@
 #ifndef MVNC_WATCHDOG_H
 #define MVNC_WATCHDOG_H
 
-#include <mvnc.h>
 #ifdef __cplusplus
-# define WD_API  extern "C"
-# else
-# define WD_API
+extern "C"
+{
 #endif
 
-/**
-* @brief default ping interval is 1 second
-*/
-#define WATCHDOG_PING_INTERVAL_MS 1000
+typedef struct _WatchdogHndl_t WatchdogHndl_t;
 
-typedef struct wd_context_tag {
-    void * opaque;
-} wd_context;
+typedef struct _WdDeviceHndl_t {
+    void* m_device;
+} WdDeviceHndl_t;
 
 typedef enum {
     WD_ERRNO = 0,
     WD_NOTINITIALIZED,
-    WD_DUPLICATE,
     WD_FAIL
 } wd_error_t;
 
-/**
- * @brief initializes watchdog context, required to be called before any other WD API calls
- * @return
- */
-WD_API wd_error_t watchdog_init_context(wd_context *ctx);
+wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl);
+void watchdog_destroy(WatchdogHndl_t* watchdogHndl);
 
 /**
  * @brief Creates watchdog thread, if not created, and registers new watchee device, and initialise opaque handle to it.
  *        To avoid a memory leak, the registered device must be unregister with watchdog_unregister_device().
- * @param d - newly connected device descriptor
+ * @param deviceHandle - newly connected device descriptor
  * @return
  */
-WD_API wd_error_t watchdog_register_device(wd_context *ctx, devicePrivate_t *d);
+wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle);
 
 /**
  * @brief remove watch_dog device from the list, and might stop watchdog worker thread
  * @return result of operation
  */
-WD_API wd_error_t watchdog_unregister_device(wd_context *ctx);
+wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle);
 
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MVNC_WATCHDOG_H
diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp
index 0c5e91be5ad..99b516fc477 100644
--- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp
+++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp
@@ -5,6 +5,17 @@
 #pragma once
 
 #include <chrono>
+#include <string>
+#include <cstring>
+#include <functional>
+#include <stdexcept>
+
+#define MVLOG_UNIT_NAME watchdog
+#include "XLinkLog.h"
+
+#if defined(_WIN32)
+#include "win_synchapi.h"
+#endif // defined(_WIN32)
 
 namespace Watchdog {
 
@@ -12,15 +23,11 @@ namespace Watchdog {
  * @brief represents watchdog device interface to be registered within watchdog worker
  */
 class IDevice {
- public:
+public:
     using time_point = std::chrono::steady_clock::time_point;
 
     virtual ~IDevice() = default;
 
-    /**
-     * @brief depending on implementation watchdog device shouldn't have interval longer than that
-     */
-    virtual void setInterval(const std::chrono::milliseconds msInterval) noexcept = 0;
     /**
      * @brief watchdog request device to keep alive with current timestamp
      */
@@ -39,4 +46,47 @@ class IDevice {
     virtual void *getHandle() const noexcept = 0;
 };
 
+class AutoScope {
+public:
+    explicit AutoScope(const std::function<void()>& func) : _func(func) {}
+    ~AutoScope() { _func(); }
+
+    AutoScope(const AutoScope&) = delete;
+    AutoScope(AutoScope&&) = delete;
+    AutoScope& operator=(const AutoScope&) = delete;
+    AutoScope& operator=(AutoScope&&) = delete;
+private:
+    std::function<void()> _func;
+};
+
+class CustomUniqueLock {
+public:
+    explicit CustomUniqueLock(pthread_mutex_t* mutex)
+        :m_mutex(mutex) {
+        if(m_mutex == nullptr) {
+            throw std::runtime_error("mutex should not be null");
+        }
+
+        int rc = pthread_mutex_lock(m_mutex);
+        if (rc != 0) {
+            throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc));
+        }
+    };
+
+    ~CustomUniqueLock() {
+        int rc = pthread_mutex_unlock(m_mutex);
+        if (rc != 0) {
+            mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc));
+        }
+    }
+
+    CustomUniqueLock(const CustomUniqueLock&) = delete;
+    CustomUniqueLock(const CustomUniqueLock&&) = delete;
+    CustomUniqueLock& operator=(const CustomUniqueLock&) = delete;
+    CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete;
+
+private:
+    pthread_mutex_t* m_mutex = nullptr;
+};
+
 }  // namespace Watchdog
diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h
new file mode 100644
index 00000000000..bff0b59b4a0
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef MVNC_XLINK_DEVICE_H
+#define MVNC_XLINK_DEVICE_H
+
+#include "mvnc.h"
+#include "watchdog.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define WATCHDOG_MAX_PING_INTERVAL_MS 1000
+
+wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice);
+void xlink_device_destroy(WdDeviceHndl_t* deviceHandle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c b/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c
index 4001024dea5..39007833881 100644
--- a/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c
+++ b/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c
@@ -36,6 +36,7 @@
 #include "XLinkMacros.h"
 #include "XLinkStringUtils.h"
 #include "watchdog.h"
+#include "xlink_device.h"
 
 #define THERMAL_BUFFER_SIZE 100
 #define THERMAL_THROTTLING_BUFFER_SIZE (THERMAL_BUFFER_SIZE + sizeof(int))
@@ -660,7 +661,7 @@ ncStatus_t ncSetDeviceConnectTimeout(int deviceConnectTimeoutSec) {
 }
 
 ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
-    struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory) {
+    struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams) {
 
     //----------------------------------------------------------
     //      Check input
@@ -669,7 +670,11 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     deviceDesc_t in_deviceDesc = {0};
     copyNcDeviceDescrToXLink(&in_ncDeviceDesc, &in_deviceDesc);
 
+    int watchdogInterval = deviceOpenParams.watchdogInterval;
+    const char* customFirmwareDirectory = deviceOpenParams.customFirmwareDirectory;
+
     CHECK_HANDLE_CORRECT_RC(deviceHandlePtr, NC_INVALID_PARAMETERS);
+    CHECK_HANDLE_CORRECT_RC(deviceOpenParams.watchdogHndl, NC_INVALID_PARAMETERS);
     if (watchdogInterval < 0) {
         mvLog(MVLOG_ERROR, "Invalid watchdogInterval");
         return NC_INVALID_PARAMETERS;
@@ -1094,8 +1099,12 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     d->device_mon_stream_id = deviceMonitorStreamId;
 
 #if !(defined(NO_BOOT))
-    watchdog_init_context(&d->watchdog_ctx);
-    watchdog_register_device(&d->watchdog_ctx, d);
+    wd_error_t wd_rc = xlink_device_create(&d->watchdog_device, d);
+    if (wd_rc) {
+        mvLog(MVLOG_WARN, "watchdog is not started for device %p", d->xlink);
+    } else {
+        watchdog_register_device(deviceOpenParams.watchdogHndl, d->watchdog_device);
+    }
 #endif
 
     getDevAttributes(d);
@@ -1110,7 +1119,10 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     CHECK_STREAM_ID(graphMonitorStreamId, {
         printfOverXLinkClose(d);
         // TODO NO_BOOT case
-        watchdog_unregister_device(&d->watchdog_ctx);
+        if (d->watchdog_device != NULL) {
+            watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device);
+            xlink_device_destroy(d->watchdog_device);
+        }
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
@@ -1124,7 +1136,10 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
 #else
     CHECK_STREAM_ID(graphMonitorStreamId, {
         // TODO NO_BOOT case
-        watchdog_unregister_device(&d->watchdog_ctx);
+        if (d->watchdog_device != NULL) {
+            watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device);
+            xlink_device_destroy(d->watchdog_device);
+        }
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
@@ -1654,7 +1669,7 @@ static ncStatus_t destroyDeviceHandle(struct ncDeviceHandle_t **deviceHandlePtr)
 }
 
 
-ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
+ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr, WatchdogHndl_t* watchdogHndl) {
     int found = 0;
     XLinkError_t rc = X_LINK_SUCCESS;
 
@@ -1732,7 +1747,10 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
 #endif
 
 #if !defined(NO_BOOT)
-    watchdog_unregister_device(&d->watchdog_ctx);
+    if (d->watchdog_device != NULL) {
+        watchdog_unregister_device(watchdogHndl, d->watchdog_device);
+        xlink_device_destroy(d->watchdog_device);
+    }
 #endif
 
     // Save all devices before reset
diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
index 35e0316b2a2..dda87de61ac 100644
--- a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
@@ -2,32 +2,20 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "watchdog.h"
+#include "watchdogPrivate.hpp"
+
 #include <thread>
-#include <future>
 #include <vector>
 #include <ostream>
 #include <iostream>
-#include <utility>
-#include <watchdog.h>
-#include <watchdogPrivate.hpp>
-#include <algorithm>
+#include <atomic>
 #include <memory>
-#include <string>
-#include <cstring>
-#include <ncCommPrivate.h>
-#include <mvnc.h>
-#include <ncPrivateTypes.h>
-#include <list>
+#include <algorithm>
+#include <unordered_map>
 
 #define MVLOG_UNIT_NAME watchdog
 #include "XLinkLog.h"
-#include "XLink.h"
-#include "XLinkPrivateDefines.h"
-#include "XLinkErrorUtils.h"
-
-#if defined(_WIN32)
-#include "win_synchapi.h"
-#endif // defined(_WIN32)
 
 namespace {
 
@@ -35,556 +23,403 @@ using namespace std;
 using namespace chrono;
 using namespace Watchdog;
 
-/**
- * @brief implementation of watchdog device using xlink representation of it
- */
-class XLinkDevice : public IDevice {
-    _devicePrivate_t privateDevice;
-    using time_point = std::chrono::steady_clock::time_point;
-    time_point lastPongTime = time_point::min();
-    time_point lastPingTime = time_point::min();
-    enum : int { deviceHangTimeout = 12000};
-
-public:
-    explicit XLinkDevice(devicePrivate_t *pDevice)
-        : privateDevice(*pDevice) {
-        setInterval(milliseconds(privateDevice.wd_interval));
-    }
-
-    void setInterval(const std::chrono::milliseconds msInterval) noexcept override {
-        privateDevice.wd_interval = std::max(static_cast<int>(msInterval.count()), WATCHDOG_PING_INTERVAL_MS);
-    }
-
-    void keepAlive(const time_point &current_time) noexcept override {
-        bool bPong = sendPingMessage();
-        // we consider that as first pong time even if it wasn't happen as beginning of boot
-        if (lastPongTime == time_point::min()) {
-            lastPongTime = current_time;
-        }
-
-        lastPingTime = current_time;
-
-        int diff = duration_cast<milliseconds>(current_time - lastPongTime).count();
-
-        if (bPong) {
-            lastPongTime = current_time;
-            mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", privateDevice.xlink, diff);
-        } else {
-            mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", privateDevice.xlink, diff);
-        }
-    }
-
-    milliseconds dueIn(const time_point &current_time) const noexcept override {
-        if (lastPingTime == time_point::min())
-            return milliseconds::zero();
-
-        // overdue
-        if (current_time - lastPingTime > std::chrono::milliseconds(privateDevice.wd_interval)) {
-            return milliseconds::zero();
-        }
-
-        return duration_cast<milliseconds>(lastPingTime + std::chrono::milliseconds(privateDevice.wd_interval) - current_time);
-    }
-
-    /**
-     * @brief means device is hanging
-     */
-    bool isTimeout() const noexcept override {
-        if (lastPongTime > lastPingTime) return false;
-        if (lastPingTime - lastPongTime > milliseconds(deviceHangTimeout)) {
-            // cleaning xlink connection - allowing abort all semaphores waiting in other threads
-            XLinkResetAll();
-            return true;
-        }
-        return false;
-    }
-
-    /**
-     * @brief gets some opaque handle that clearly destinguesh one device previate_t from another
-     */
-    void *getHandle() const noexcept override {
-        return privateDevice.xlink;
-    }
-
-private:
-    bool sendPingMessage() {
-        XLinkError_t rc = X_LINK_SUCCESS;
-        XLINK_RET_ERR_IF(pthread_mutex_lock(&privateDevice.dev_stream_m), false);
-
-        deviceCommand_t config = {};
-        config.type = DEVICE_WATCHDOG_PING;
-
-        // xlink ping acknowledge interval shouldn't be more then expected ping interval
-        rc = XLinkWriteDataWithTimeout(privateDevice.device_mon_stream_id, (const uint8_t*)&config, sizeof(config), deviceHangTimeout);
-
-        if(pthread_mutex_unlock(&privateDevice.dev_stream_m) != 0) {
-            mvLog(MVLOG_ERROR, "Failed to unlock privateDevice.dev_stream_m");
-        }
-
-        if (rc != X_LINK_SUCCESS) {
-            mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc));
-            return false;
-        }
-        return true;
-    }
-};
-
 /**
  * @brief when device just added into watchdog, it should not be due interval at all
  */
 class NoDueOnFirstCall : public IDevice {
-    std::shared_ptr<IDevice> original;
-    bool bFirstCall = false;
- public:
-    NoDueOnFirstCall(const std::shared_ptr<IDevice> & original) : original(original) {}
-    void setInterval(const std::chrono::milliseconds msInterval) noexcept override {
-        original->setInterval(msInterval);
+public:
+    NoDueOnFirstCall(IDevice* original) : m_originalPtr(original) {}
+
+    void keepAlive(const time_point& current_time) noexcept override  {
+        m_originalPtr->keepAlive(current_time);
+        m_firstCall = true;
     }
-    void keepAlive(const time_point &current_time) noexcept override  {
-        original->keepAlive(current_time);
-        bFirstCall = true;
-    }
-    std::chrono::milliseconds dueIn(const time_point &current_time) const noexcept override {
-        if (!bFirstCall) {
+
+    milliseconds dueIn(const time_point& current_time) const noexcept override {
+        if (!m_firstCall) {
             return milliseconds::zero();
         }
-        return original->dueIn(current_time);
+
+        return m_originalPtr->dueIn(current_time);
     }
+
     bool isTimeout() const noexcept override {
-        return original->isTimeout();
-    }
-    void *getHandle() const noexcept override {
-        return original->getHandle();
-    }
-};
-
-class CustomUniqueLock {
-public:
-    explicit CustomUniqueLock(pthread_mutex_t* mutex)
-        :m_mutex(mutex) {
-        if(m_mutex == nullptr) {
-            throw std::runtime_error("mutex should not be null");
-        }
-
-        int rc = pthread_mutex_lock(m_mutex);
-        if (rc != 0) {
-            throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc));
-        }
-    };
-
-    ~CustomUniqueLock() {
-        int rc = pthread_mutex_unlock(m_mutex);
-        if (rc != 0) {
-            mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc));
-        }
+        return m_originalPtr->isTimeout();
     }
 
-    CustomUniqueLock(const CustomUniqueLock&) = delete;
-    CustomUniqueLock(const CustomUniqueLock&&) = delete;
-    CustomUniqueLock& operator=(const CustomUniqueLock&) = delete;
-    CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete;
+    void* getHandle() const noexcept override {
+        return m_originalPtr->getHandle();
+    }
 
 private:
-    pthread_mutex_t* m_mutex = nullptr;
-};
-
-static void * WD_OPAQUE_MAGIC = reinterpret_cast<void*>(0xdeadbeaf);
-
-struct wd_context_opaque {
-    void * magic = WD_OPAQUE_MAGIC;
-    IDevice * actual = nullptr;
-    bool   destroyed = false;
-    void *handleCached = nullptr;
+    IDevice* m_originalPtr;
+    bool m_firstCall = false;
 };
 
 class WatchdogImpl {
-    using wd_context_as_tuple = std::tuple<std::shared_ptr<IDevice>, bool*, void*>;
+public:
+    WatchdogImpl();
+    ~WatchdogImpl();
 
-    using Devices = std::list<wd_context_as_tuple>;
-    Devices watchedDevices;
-    std::atomic_bool threadRunning {false};
-
-    pthread_mutex_t routineLock;
-    pthread_cond_t  wakeUpPingThread;
-    std::thread poolThread;
+    bool registerDevice(IDevice* device);
+    bool removeDevice(IDevice* device);
 
     WatchdogImpl(const WatchdogImpl&) = delete;
     WatchdogImpl(WatchdogImpl&&) = delete;
     WatchdogImpl& operator = (const WatchdogImpl&) = delete;
     WatchdogImpl& operator = (WatchdogImpl&&) = delete;
 
-    class AutoScope {
-    public:
-        explicit AutoScope(const std::function<void()>& func) : _func(func) {}
-        ~AutoScope() { _func(); }
-
-        AutoScope(const AutoScope&) = delete;
-        AutoScope& operator=(const AutoScope&) = delete;
-    private:
-        std::function<void()> _func;
-    };
+private:
+    void waitFor(const milliseconds sleepInterval);
+    void watchdogRoutine() noexcept;
 
 private:
+    using Devices = std::vector<std::shared_ptr<IDevice>>;
+    using DevicesMap = std::unordered_map<void*, std::shared_ptr<IDevice>>;
 
-    WatchdogImpl() {
-        int rc = pthread_mutex_init(&routineLock, NULL);
-        if (rc != 0) {
-            throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc));
-        }
+    Devices watchedDevices;
+    DevicesMap removedDevices;
+    std::atomic_bool threadRunning {false};
+
+    pthread_mutex_t routineLock;
+    pthread_cond_t  wakeUpPingThread;
+    std::thread poolThread;
+};
+
+//------------- Watchdog implementation -------------
+
+WatchdogImpl::WatchdogImpl() {
+    int rc = pthread_mutex_init(&routineLock, NULL);
+    if (rc != 0) {
+        throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc));
+    }
 
 #if !(defined(__APPLE__) || defined(_WIN32))
-        pthread_condattr_t attr;
-        rc = pthread_condattr_init(&attr);
-        if (rc != 0) {
-            throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
-        }
-        AutoScope attrDestroy([&attr]{
-            if (pthread_condattr_destroy(&attr) != 0)
-                mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute.");
-        });
+    pthread_condattr_t attr;
+    rc = pthread_condattr_init(&attr);
+    if (rc != 0) {
+        throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
+    }
 
-        rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
-        if (rc != 0) {
-            throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc));
-        }
+    AutoScope attrDestroy([&attr]{
+        if (pthread_condattr_destroy(&attr) != 0)
+            mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute.");
+    });
+
+    rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+    if (rc != 0) {
+        throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc));
+    }
 #endif // !(defined(__APPLE__) || defined(_WIN32))
 
-        rc = pthread_cond_init(&wakeUpPingThread, NULL);
-        if (rc != 0) {
-            throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc));
+    rc = pthread_cond_init(&wakeUpPingThread, NULL);
+    if (rc != 0) {
+        throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc));
+    }
+}
+
+WatchdogImpl::~WatchdogImpl() {
+    mvLog(MVLOG_INFO, "watchdog terminated\n");
+    try
+    {
+        CustomUniqueLock lock {&routineLock};
+        for (auto &item : watchedDevices) {
+            mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", item->getHandle());
         }
+    } catch (const std::exception & ex) {
+        mvLog(MVLOG_ERROR, "error %s", ex.what());
+    } catch (...) {
+        mvLog(MVLOG_ERROR, "unknown error");
     }
 
-public:
-
-    static WatchdogImpl &instance() {
-        static WatchdogImpl watchdog;
-        return watchdog;
+    threadRunning = false;
+    int rc = pthread_cond_broadcast(&wakeUpPingThread);
+    if (rc != 0) {
+        mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
     }
 
+    if (poolThread.joinable()) {
+        poolThread.join();
+    }
 
-    ~WatchdogImpl() {
-        mvLog(MVLOG_INFO, "watchdog terminated\n");
-        try
-        {
-            CustomUniqueLock lock {&routineLock};
-            for (auto &item : watchedDevices) {
-                *std::get<1>(item) = true;
-                mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", std::get<2>(item));
-            }
-        } catch (const std::exception & ex) {
-            mvLog(MVLOG_ERROR, "error %s", ex.what());
-        } catch (...) {
-            mvLog(MVLOG_ERROR, "unknown error");
-        }
+    rc = pthread_mutex_destroy(&routineLock);
+    if (rc != 0) {
+        mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc);
+    }
 
-        threadRunning = false;
-        int rc = pthread_cond_broadcast(&wakeUpPingThread);
-        if (rc != 0) {
-            mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
-        }
+    rc = pthread_cond_destroy(&wakeUpPingThread);
+    if (rc != 0) {
+        mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc);
+    }
+}
 
-        rc = pthread_mutex_destroy(&routineLock);
-        if (rc != 0) {
-            mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc);
-        }
+bool WatchdogImpl::registerDevice(IDevice* device) {
+    mvLog(MVLOG_INFO, "register device: %p\n", &device);
 
-        rc = pthread_cond_destroy(&wakeUpPingThread);
-        if (rc != 0) {
-            mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc);
-        }
+    CustomUniqueLock lock {&routineLock};
 
+    if (!threadRunning) {
         if (poolThread.joinable()) {
             poolThread.join();
         }
-    }
+        threadRunning = true;
 
-public:
-    void *register_device(std::shared_ptr<IDevice> device) {
-        CustomUniqueLock lock {&routineLock};
-        std::unique_ptr<wd_context_opaque> ctx (new wd_context_opaque);
-
-        // rare case of exact pointer address collision
-        if (ctx.get() == WD_OPAQUE_MAGIC) {
-            std::unique_ptr<wd_context_opaque> ctx2(new wd_context_opaque);
-            ctx.reset(ctx2.release());
-        }
-
-        if (!threadRunning) {
-            if (poolThread.joinable()) {
-                poolThread.join();
-            }
-            threadRunning = true;
-
-            poolThread = std::thread([this]() {
-                if (pthread_setname_np(
+        poolThread = std::thread([this]() {
+            if (pthread_setname_np(
 #ifndef __APPLE__
-                pthread_self(),
+            pthread_self(),
 #endif
-                "WatchdogThread") != 0) {
-                    perror("Setting name for watchdog thread failed");
-                }
-                watchdog_routine();
-            });
-        } else {
-            // wake up thread
-            int rc = pthread_cond_broadcast(&wakeUpPingThread);
-            if (rc != 0) {
-                mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
+            "WatchdogThread") != 0) {
+                perror("Setting name for watchdog thread failed");
             }
-        }
-
-        ctx->handleCached = device->getHandle();
-        watchedDevices.emplace_back(device, &ctx->destroyed, ctx->handleCached);
-
-        ctx->actual = std::get<0>(watchedDevices.back()).get();
-
-        return ctx.release();
+            watchdogRoutine();
+        });
     }
 
-    void *register_device(devicePrivate_t *device) {
-        return register_device(std::make_shared<NoDueOnFirstCall>(std::make_shared<XLinkDevice>(device)));
+    auto it = std::find_if(std::begin(watchedDevices),
+                           std::end(watchedDevices),
+                           [&device](const std::shared_ptr<IDevice>& item) {
+                               return item->getHandle() == device->getHandle();
+                           });
+
+    bool found = it != std::end(watchedDevices);
+    if (!found) {
+        watchedDevices.emplace_back(std::make_shared<NoDueOnFirstCall>(device));
     }
 
-    bool remove_device(void *opaque) {
-        mvLog(MVLOG_INFO, "remove_device : %p\n", opaque);
-        auto ptr  = reinterpret_cast<wd_context_opaque *>(opaque);
-        if (ptr == nullptr) {
-            return false;
-        }
-
-        bool bFound = false;
-        {
-            CustomUniqueLock lock {&routineLock};
-
-            // thread already removed
-            if (ptr->destroyed) {
-                delete ptr;
-                return true;
-            }
-
-            auto idx = std::find_if(std::begin(watchedDevices),
-                                    std::end(watchedDevices),
-                                    [ptr](const wd_context_as_tuple &item) {
-                                        return std::get<0>(item)->getHandle() == ptr->actual->getHandle();
-                                    });
-            bFound = idx != std::end(watchedDevices);
-            if(bFound) {
-                watchedDevices.erase(idx);
-                delete ptr;
-            }
-        }
-
-        // wake up thread since we might select removed device as nex to be ping, and there is no more devices available
-        int rc = pthread_cond_broadcast(&wakeUpPingThread);
-        if (rc != 0) {
-            mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
-        }
-
-        return bFound;
+    int rc = pthread_cond_broadcast(&wakeUpPingThread);
+    if (rc != 0) {
+        mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
     }
 
- private:
-    /// @note: We are using here pthread_cond_timedwait as a replacement for condition_variable::wait_for,
-    /// as libstdc++ has bug not using monotonic clock. When GCC 10.x became minimum supported version,
-    /// that code could be removed.
-    void wait_for(const milliseconds sleepInterval) {
-        struct timespec timeToWait = {0, 0};
+    return !found;
+}
 
-        const auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
+bool WatchdogImpl::removeDevice(IDevice* device) {
+    mvLog(MVLOG_INFO, "remove device: %p\n", &device);
+
+    CustomUniqueLock lock {&routineLock};
+
+    auto it = std::find_if(std::begin(watchedDevices),
+                           std::end(watchedDevices),
+                           [&device](const std::shared_ptr<IDevice>& item) {
+                               return item->getHandle() == device->getHandle();
+                           });
+
+    bool removed = it != std::end(watchedDevices);
+    if (removed) {
+        watchedDevices.erase(it);
+    } else if (removedDevices.count(device->getHandle())) {
+        removedDevices.erase(device->getHandle());
+        removed = true;
+    }
+
+    // wake up thread since we might select removed device as nex to be ping, and there is no more devices available
+    int rc = pthread_cond_broadcast(&wakeUpPingThread);
+    if (rc != 0) {
+        mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
+    }
+
+    return removed;
+}
+
+void WatchdogImpl::waitFor(const milliseconds sleepInterval) {
+    struct timespec timeToWait = {0, 0};
+
+    const auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
 
 #if (defined(__APPLE__) || defined(_WIN32))
-        timeToWait.tv_sec = sec.count();
-        timeToWait.tv_nsec =
-            std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
-            std::chrono::nanoseconds(sec).count();
+    timeToWait.tv_sec = sec.count();
+    timeToWait.tv_nsec =
+        std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+        std::chrono::nanoseconds(sec).count();
 #else
-        clock_gettime(CLOCK_MONOTONIC, &timeToWait);
-        const auto secondInNanoSeconds = 1000000000L;
-        const auto nsecSum = std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
-                std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec;
-        timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds;
-        timeToWait.tv_nsec = nsecSum % secondInNanoSeconds;
+    clock_gettime(CLOCK_MONOTONIC, &timeToWait);
+    const auto secondInNanoSeconds = 1000000000L;
+    const auto nsecSum = std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+                         std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec;
+    timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds;
+    timeToWait.tv_nsec = nsecSum % secondInNanoSeconds;
 #endif // (defined(__APPLE__) || defined(_WIN32))
 
 #if defined(__APPLE__)
-        const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
+    const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
 #else
-        const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
+    const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
 #endif // defined(__APPLE__)
-        if (rc != 0 && rc != ETIMEDOUT) {
-            throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc));
-        }
+
+    if (rc != 0 && rc != ETIMEDOUT) {
+        throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc));
+    }
+}
+
+void WatchdogImpl::watchdogRoutine() noexcept {
+    try {
+        mvLog(MVLOG_INFO, "thread started\n");
+
+        milliseconds sleepInterval;
+        CustomUniqueLock lock{&routineLock};
+
+        do {
+            for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end();) {
+                auto &device = *deviceIt;
+                auto isReady = device->dueIn(steady_clock::now()).count() <= 0;
+                if (isReady) {
+                    auto now = steady_clock::now();
+                    device->keepAlive(steady_clock::now());
+                    mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n",
+                          duration_cast<std::chrono::milliseconds>(steady_clock::now() - now).count());
+                }
+                if (device->isTimeout()) {
+                    mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle());
+                    // marking device as deleted, to prevent double resource free from wd_unregister_device
+                    removedDevices[device->getHandle()] = device;
+                    deviceIt = watchedDevices.erase(deviceIt);
+                } else {
+                    ++deviceIt;
+                }
+            }
+            auto currentTime = steady_clock::now();
+            auto minInterval = std::min_element(watchedDevices.begin(), watchedDevices.end(),
+                                                [&currentTime](const Devices::value_type& device1,
+                                                               const Devices::value_type& device2) {
+                                                    return device1->dueIn(currentTime).count() <
+                                                            device2->dueIn(currentTime).count();
+                                                });
+            // if for some reason we have empty devices list but watchdog is active
+            if (minInterval == watchedDevices.end()) {
+                mvLog(MVLOG_INFO, "no active devices to watch, stopping  Watchdog thread\n");
+                threadRunning = false;
+                break;
+            }
+
+            sleepInterval = (*minInterval)->dueIn(currentTime);
+            if (sleepInterval.count() <= 0) {
+                continue;
+            }
+
+            mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
+
+            waitFor(sleepInterval);
+
+            mvLog(MVLOG_DEBUG, "waiting completed in  %ld ms\n",
+                  duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
+
+        } while (threadRunning);
+    } catch (const std::exception &ex) {
+        mvLog(MVLOG_ERROR, "error %s", ex.what());
+    } catch (...) {
+        mvLog(MVLOG_ERROR, "unknown error");
     }
 
-    void watchdog_routine() noexcept {
-        try {
-            mvLog(MVLOG_INFO, "thread started\n");
-
-            milliseconds sleepInterval;
-
-            CustomUniqueLock lock {&routineLock};
-
-            do {
-                for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end(); ) {
-                    auto &device = std::get<0>(*deviceIt);
-                    auto isReady = device->dueIn(steady_clock::now()).count() == 0;
-                    if (isReady) {
-                        auto now = high_resolution_clock::now();
-                        device->keepAlive(steady_clock::now());
-                        mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", duration_cast<std::chrono::milliseconds>(high_resolution_clock ::now()-now).count());
-                    }
-                    if (device->isTimeout()) {
-                        mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle());
-                        // marking device as deleted, to prevent double resource free from wd_unregister_device
-                        *std::get<1>(*deviceIt) = true;
-                        deviceIt = watchedDevices.erase(deviceIt);
-                    }
-                    else {
-                        ++deviceIt;
-                    }
-                }
-                auto currentTime = steady_clock::now();
-                auto minInterval = std::min_element(watchedDevices.begin(),
-                                                    watchedDevices.end(),
-                                                    [&currentTime] (const Devices::value_type & device1, const Devices::value_type & device2) {
-                                                        return std::get<0>(device1)->dueIn(currentTime).count()
-                                                            < std::get<0>(device2)->dueIn(currentTime).count();
-                                                    });
-                // if for some reason we have empty devices list but watchdog is active
-                if (minInterval == watchedDevices.end()) {
-                    mvLog(MVLOG_INFO, "no active devices to watch, stopping  Watchdog thread\n");
-                    threadRunning = false;
-                    break;
-                }
-                // TODO: no timer coalescing feature, to minimized thread wakes
-                sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime);
-                if (sleepInterval.count() <= 0)
-                    continue;
-
-                mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
-                wait_for(sleepInterval);
-
-                mvLog(MVLOG_DEBUG, "waiting completed in  %ld ms\n",
-                      duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
-            } while (threadRunning);
-        } catch (const std::exception & ex) {
-            mvLog(MVLOG_ERROR, "error %s", ex.what());
-        } catch (...) {
-            mvLog(MVLOG_ERROR, "unknown error");
-        }
-
-        mvLog(MVLOG_INFO, "thread ended\n");
-    }
-};
+    mvLog(MVLOG_INFO, "thread ended\n");
+}
 
 }  // namespace
 
-WD_API wd_error_t watchdog_init_context(wd_context *ctx) {
-    try {
-        mvLogLevelSet(MVLOG_ERROR);
-        mvLogDefaultLevelSet(MVLOG_ERROR);
-        if (!ctx) {
-            return WD_NOTINITIALIZED;
-        }
-        // opaque pointer initialized
-        if (ctx->opaque == WD_OPAQUE_MAGIC) {
-            mvLog(MVLOG_INFO, "watchdog context (%p) already initialized \n", ctx);
-        } else {
-            ctx->opaque = WD_OPAQUE_MAGIC;
-        }
-        return WD_ERRNO;
-    }  catch (...) {
-        mvLog(MVLOG_ERROR, "failed initialize watchdog context: %p\n", ctx);
+struct _WatchdogHndl_t {
+    WatchdogImpl* m_watchdog;
+};
+
+wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl) {
+    if (out_watchdogHndl == nullptr) {
+        return WD_NOTINITIALIZED;
     }
+
+    *out_watchdogHndl = nullptr;
+    auto tmpWdHndl =
+        static_cast<WatchdogHndl_t*>(malloc(sizeof(WatchdogHndl_t)));
+    if(tmpWdHndl == nullptr) {
+        return WD_FAIL;
+    }
+
+    try {
+        tmpWdHndl->m_watchdog = new WatchdogImpl();
+        *out_watchdogHndl = tmpWdHndl;
+        return WD_ERRNO;
+    } catch (const std::exception& ex) {
+        mvLog(MVLOG_ERROR, "error %s", ex.what());
+    } catch (...) {
+        mvLog(MVLOG_ERROR, "unknown error");
+    }
+
+    free(tmpWdHndl);
     return WD_FAIL;
 }
 
-WD_API wd_error_t watchdog_register_device(wd_context * ctx, devicePrivate_t *device) {
+void watchdog_destroy(WatchdogHndl_t* watchdogHndl) {
+    if (watchdogHndl == nullptr) {
+        return;
+    }
+
+    if (watchdogHndl->m_watchdog != nullptr) {
+        delete(watchdogHndl->m_watchdog);
+    }
+
+    free(watchdogHndl);
+}
+
+wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) {
+    if (watchdogHndl == nullptr) {
+        mvLog(MVLOG_ERROR, "watchdog handle is null\n");
+        return WD_NOTINITIALIZED;
+    }
+
+    if (deviceHandle == nullptr) {
+        mvLog(MVLOG_ERROR, "watchdog device handle is null\n");
+        return WD_NOTINITIALIZED;
+    }
+
+    if (deviceHandle->m_device == nullptr) {
+        mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle);
+        return WD_NOTINITIALIZED;
+    }
+
     try {
-        if (!ctx) {
-            mvLog(MVLOG_ERROR, "watchdog context is null\n");
-            return WD_NOTINITIALIZED;
-        }
-        // opaque pointer initialized
-        if (ctx->opaque == nullptr) {
-            mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx);
-            return WD_NOTINITIALIZED;
-        }
-        if (device && device->wd_interval <= 0) {
-            mvLog(MVLOG_ERROR, "watchdog interval should be > 0, but was (%d)\n", device->wd_interval);
-            return WD_NOTINITIALIZED;
-        }
-        // opaque pointer initialized
-        if (ctx->opaque != WD_OPAQUE_MAGIC) {
-            auto watchee = reinterpret_cast<wd_context_opaque*>(ctx->opaque);
-            // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor
-            if (watchee->magic == WD_OPAQUE_MAGIC) {
-                // actually this can represent already registered context, so need to check
-                // since we are adding NoDue wrapper, lets check for it
-                if (nullptr != dynamic_cast<NoDueOnFirstCall*>(watchee->actual)) {
-                    mvLog(MVLOG_ERROR, "watchdog context (%p) already registered within watchdog\n", ctx);
-                    return WD_DUPLICATE;
-                }
-
-                // transferring interval from context
-                if (device) {
-                    watchee->actual->setInterval(milliseconds(device->wd_interval));
-                }
-                ctx->opaque = WatchdogImpl::instance().register_device(
-                    shared_ptr<IDevice>(new NoDueOnFirstCall(shared_ptr<IDevice>(watchee->actual, [](IDevice*){}))));
-
-                if (ctx->opaque == nullptr) {
-                    mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx);
-                } else {
-                    return WD_ERRNO;
-                }
-            }
-            mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx);
-            return WD_NOTINITIALIZED;
-        }
-
-        if (device && device->wd_interval > 0) {
-            ctx->opaque = WatchdogImpl::instance().register_device(device);
-        } else {
-            ctx->opaque = nullptr;
+        WatchdogImpl* watchdog = watchdogHndl->m_watchdog;
+        auto device = reinterpret_cast<IDevice*>(deviceHandle->m_device);
+        if (!watchdog->registerDevice(device)) {
+            mvLog(MVLOG_WARN, "cannot register device\n");
+            return WD_FAIL;
         }
         return WD_ERRNO;
     } catch (const std::exception & ex) {
         mvLog(MVLOG_ERROR, "failed to register device: %s\n", ex.what());
     } catch (...) {
-        mvLog(MVLOG_ERROR, "failed to register device context (%p)\n", ctx);
+        mvLog(MVLOG_ERROR, "failed to register device (%p)\n", deviceHandle);
     }
+
     return WD_FAIL;
 }
 
-WD_API wd_error_t watchdog_unregister_device(wd_context *ctx) {
+wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) {
+    if (watchdogHndl == nullptr) {
+        mvLog(MVLOG_ERROR, "watchdog handle is null\n");
+        return WD_NOTINITIALIZED;
+    }
+
+    if (deviceHandle == nullptr) {
+        mvLog(MVLOG_ERROR, "watchdog device handle is null\n");
+        return WD_NOTINITIALIZED;
+    }
+
+    if (deviceHandle->m_device == nullptr) {
+        mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle);
+        return WD_NOTINITIALIZED;
+    }
+
     try {
-        if (ctx == nullptr || ctx->opaque == nullptr) {
-            return WD_NOTINITIALIZED;
-        } else {
-            if (ctx->opaque != WD_OPAQUE_MAGIC) {
-                auto watchee = reinterpret_cast<wd_context_opaque *>(ctx->opaque);
-                // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor
-                if (watchee->magic == WD_OPAQUE_MAGIC) {
-                    if (!WatchdogImpl::instance().remove_device(ctx->opaque)) {
-                        mvLog(MVLOG_WARN, "cannot remove device\n");
-                        return WD_FAIL;
-                    }
-                }
-            }
+        WatchdogImpl* watchdog = watchdogHndl->m_watchdog;
+        auto device = reinterpret_cast<IDevice*>(deviceHandle->m_device);
+        if (!watchdog->removeDevice(device)) {
+            mvLog(MVLOG_WARN, "cannot remove device\n");
+            return WD_FAIL;
         }
-
-        if (ctx != nullptr) {
-            // opaque pointer deleted
-            ctx->opaque = nullptr;
-        }
-
         return WD_ERRNO;
     } catch (const std::exception & ex) {
-        mvLog(MVLOG_WARN, "error %s", ex.what());
+        mvLog(MVLOG_ERROR, "error %s", ex.what());
     } catch (...) {
-        mvLog(MVLOG_WARN, "unknown error");
+        mvLog(MVLOG_ERROR, "unknown error");
     }
 
     return WD_FAIL;
diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp
new file mode 100644
index 00000000000..56623257d9c
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp
@@ -0,0 +1,173 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "xlink_device.h"
+#include "watchdog.h"
+#include "watchdogPrivate.hpp"
+
+#include "XLink.h"
+#include "XLinkPrivateDefines.h"
+#include "XLinkErrorUtils.h"
+
+#include <ncPrivateTypes.h>
+
+#include <algorithm>
+
+namespace {
+
+using namespace std;
+using namespace chrono;
+using namespace Watchdog;
+
+class XLinkDevice : public IDevice {
+public:
+    explicit XLinkDevice(devicePrivate_t* pDevice);
+
+    void keepAlive(const time_point& current_time) noexcept override;
+
+    milliseconds dueIn(const time_point& current_time) const noexcept override;
+    bool isTimeout() const noexcept override;
+
+    /**
+     * @brief gets some opaque handle that clearly distinguish one device private_t from another
+     */
+    void* getHandle() const noexcept override;
+
+    ~XLinkDevice() = default;
+
+private:
+    bool sendPingMessage();
+
+private:
+    const int kDeviceHangTimeout = 12000;
+
+    _devicePrivate_t m_devicePrivate;
+
+    time_point m_lastPongTime = time_point::min();
+    time_point m_lastPingTime = time_point::min();
+};
+
+//----------------- XLinkDevice implementation ---------------------
+
+XLinkDevice::XLinkDevice(devicePrivate_t* pDevice)
+    : m_devicePrivate(*pDevice) {
+    if (m_devicePrivate.wd_interval <= 0) {
+        throw runtime_error(
+            "watchdog interval should be > 0, but was " + std::to_string(m_devicePrivate.wd_interval));
+    }
+    m_devicePrivate.wd_interval = std::max(m_devicePrivate.wd_interval, WATCHDOG_MAX_PING_INTERVAL_MS);
+}
+
+void XLinkDevice::keepAlive(const time_point &current_time) noexcept {
+    bool bPong = sendPingMessage();
+    // we consider that as first pong time even if it wasn't happen as beginning of boot
+    if (m_lastPongTime == time_point::min()) {
+        m_lastPongTime = current_time;
+    }
+
+    m_lastPingTime = current_time;
+
+    int diff = duration_cast<milliseconds>(current_time - m_lastPongTime).count();
+
+    if (bPong) {
+        m_lastPongTime = current_time;
+        mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", m_devicePrivate.xlink, diff);
+    } else {
+        mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", m_devicePrivate.xlink, diff);
+    }
+}
+
+milliseconds XLinkDevice::dueIn(const time_point& current_time) const noexcept {
+    if (m_lastPingTime == time_point::min()) {
+        return milliseconds::zero();
+    }
+
+    // overdue
+    if (current_time - m_lastPingTime > std::chrono::milliseconds(m_devicePrivate.wd_interval)) {
+        return milliseconds::zero();
+    }
+
+    return duration_cast<milliseconds>(m_lastPingTime +
+                std::chrono::milliseconds(m_devicePrivate.wd_interval) - current_time);
+}
+
+bool XLinkDevice::isTimeout() const noexcept {
+    if (m_lastPongTime > m_lastPingTime) {
+        return false;
+    }
+
+    if (m_lastPingTime - m_lastPongTime > milliseconds(kDeviceHangTimeout)) {
+        // cleaning xlink connection - allowing abort all semaphores waiting in other threads
+        XLinkResetAll();
+        return true;
+    }
+
+    return false;
+}
+
+void* XLinkDevice::getHandle() const noexcept {
+    return m_devicePrivate.xlink;
+}
+
+bool XLinkDevice::sendPingMessage() {
+    XLINK_RET_ERR_IF(pthread_mutex_lock(&m_devicePrivate.dev_stream_m), false);
+
+    deviceCommand_t config = {};
+    config.type = DEVICE_WATCHDOG_PING;
+
+    // xlink ping acknowledge interval shouldn't be more then expected ping interval
+    XLinkError_t rc = XLinkWriteDataWithTimeout(m_devicePrivate.device_mon_stream_id,
+            (const uint8_t*)&config, sizeof(config), kDeviceHangTimeout);
+
+    if(pthread_mutex_unlock(&m_devicePrivate.dev_stream_m) != 0) {
+        mvLog(MVLOG_ERROR, "Failed to unlock m_devicePrivate.dev_stream_m");
+    }
+
+    if (rc != X_LINK_SUCCESS) {
+        mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc));
+        return false;
+    }
+
+    return true;
+}
+
+} // namespace
+
+wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice) {
+    if (out_deviceHandle == nullptr || pDevice == nullptr) {
+        return WD_NOTINITIALIZED;
+    }
+
+    *out_deviceHandle = nullptr;
+    auto tmpWdDeviceHndl =
+        static_cast<WdDeviceHndl_t*>(malloc(sizeof(WdDeviceHndl_t)));
+    if(tmpWdDeviceHndl == nullptr) {
+        return WD_FAIL;
+    }
+
+    try {
+        tmpWdDeviceHndl->m_device = new XLinkDevice(pDevice);
+        *out_deviceHandle = tmpWdDeviceHndl;
+        return WD_ERRNO;
+    } catch (const std::exception& ex) {
+        mvLog(MVLOG_ERROR, "error %s", ex.what());
+    } catch (...) {
+        mvLog(MVLOG_ERROR, "unknown error");
+    }
+
+    free(tmpWdDeviceHndl);
+    return WD_FAIL;
+}
+
+void xlink_device_destroy(WdDeviceHndl_t* deviceHandle) {
+    if (deviceHandle == nullptr) {
+        return;
+    }
+
+    if (deviceHandle->m_device != nullptr) {
+        delete(reinterpret_cast<XLinkDevice*>(deviceHandle->m_device));
+    }
+
+    free(deviceHandle);
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp
index 8a710ac98f5..196d2acca79 100644
--- a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp
@@ -20,10 +20,17 @@ void MvncTestsCommon::SetUp() {
     initialize_usb_boot();
     ASSERT_NO_ERROR(setLogLevel(ncLogLevel));
     availableDevices_ = getAmountOfDevices();
+
+    ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
+
+    m_ncDeviceOpenParams.watchdogInterval = watchdogInterval;
+    m_ncDeviceOpenParams.customFirmwareDirectory = firmwarePath;
+    m_ncDeviceOpenParams.watchdogHndl = m_watchdogHndl;
 }
 
 void MvncTestsCommon::TearDown() {
     ncDeviceResetAll();
+    watchdog_destroy(m_watchdogHndl);
 }
 
 int MvncTestsCommon::setLogLevel(const mvLog_t logLevel) {
@@ -53,7 +60,7 @@ void MvncTestsCommon::openDevices(const int devicesToBoot, ncDeviceHandle_t **de
     ncDeviceDesc.platform = NC_ANY_PLATFORM;
 
     for (int index = 0; index < devicesToBoot; ++index) {
-        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, watchdogInterval, firmwarePath));
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, m_ncDeviceOpenParams));
         ASSERT_TRUE(deviceHandlers[index] != nullptr);
         ++amountOfBooted;
     }
@@ -87,7 +94,7 @@ void MvncLoggingTests::SetUp() {
     _deviceDesc.platform = NC_ANY_PLATFORM;
 
     for (int index = 0; index < availableDevices_; ++index) {
-        ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, watchdogInterval, firmwarePath));
+        ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, m_ncDeviceOpenParams));
     }
 
     setbuf(stdout, buff);
@@ -97,7 +104,7 @@ void MvncLoggingTests::SetUp() {
 void MvncLoggingTests::TearDown() {
     setbuf(stdout, NULL);
     for (int index = 0; index < availableDevices_; ++index) {
-        ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index]));
+        ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index], m_watchdogHndl));
     }
 }
 
@@ -116,7 +123,7 @@ void MvncGraphAllocations::SetUp() {
 
 void MvncGraphAllocations::TearDown() {
     for (int index = 0; index < _bootedDevices; ++index) {
-        ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index]));
+        ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index], m_watchdogHndl));
     }
     _bootedDevices = 0;
 }
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h
index f2add94a82d..76960bec193 100644
--- a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h
@@ -33,6 +33,8 @@ public:
     mvLog_t ncLogLevel              = MVLOG_INFO;
     int     watchdogInterval        = 1000;
     int     availableDevices_       = 0;
+    WatchdogHndl_t* m_watchdogHndl = nullptr;
+    ncDeviceOpenParams_t m_ncDeviceOpenParams = {};
 
     ~MvncTestsCommon() override = default;
     MvncTestsCommon();
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp
index 1cdaf8b66e2..1827dece606 100644
--- a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp
@@ -23,8 +23,8 @@ TEST_P(MvncStressTests, OpenClose1001) {
 
     for (int i = 0; i < iterations; ++i) {
         printf("Iteration %d of %d\n", i, iterations);
-        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
-        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
         deviceHandle = nullptr;
     }
 }
@@ -46,7 +46,7 @@ TEST_P(MvncStressTests, AllocateDeallocateGraph1001) {
 
     // Open device
     ncDeviceHandle_t *deviceHandle = nullptr;
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
 
     for (int i = 0; i < iterations; ++i) {
         printf("Iteration %d of %d\n", i, iterations);
@@ -66,7 +66,7 @@ TEST_P(MvncStressTests, AllocateDeallocateGraph1001) {
         // Destroy graph
         ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
     }
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 }
 
 
@@ -87,7 +87,7 @@ TEST_P(MvncStressTests, FullCycleOfWork101Times) {
 
     for (int i = 0; i < iterations; i++) {
         ncDeviceHandle_t *deviceHandle = nullptr;
-        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
 
         ncGraphHandle_t*  graphHandle = nullptr;
         std::string graphName = "graph";
@@ -146,7 +146,7 @@ TEST_P(MvncStressTests, FullCycleOfWork101Times) {
 
         ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
 
-        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
     }
 
 }
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp
index 03e95c9845a..b41e13d89ab 100644
--- a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp
@@ -93,7 +93,7 @@ TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
     deviceDesc.protocol = NC_USB;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, m_ncDeviceOpenParams));
 
     actDeviceName = deviceHandle_USB->private_data->dev_addr;
     ASSERT_TRUE(actDeviceName.size());
@@ -101,15 +101,15 @@ TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
 
     // Open PCIe device
     deviceDesc.protocol = NC_PCIE;
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, m_ncDeviceOpenParams));
 
     actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
     ASSERT_TRUE(actDeviceName.size());
     ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName));
 
     // Close all
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe, m_watchdogHndl));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB, m_watchdogHndl));
 }
 
 /**
@@ -129,8 +129,7 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
     deviceDesc.platform = NC_ANY_PLATFORM;
 
     // Open PCIe device
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, m_ncDeviceOpenParams));
 
     actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
     ASSERT_TRUE(actDeviceName.size());
@@ -138,8 +137,7 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
 
     // Open USB device
     deviceDesc.protocol = NC_USB;
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, m_ncDeviceOpenParams));
 
     actDeviceName = deviceHandle_USB->private_data->dev_addr;
     ASSERT_TRUE(actDeviceName.size());
@@ -147,8 +145,8 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
 
 
     // Close all
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe, m_watchdogHndl));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB, m_watchdogHndl));
 }
 
 //------------------------------------------------------------------------------
@@ -167,7 +165,7 @@ TEST_P(MvncOpenDevice, OpenAndClose) {
     deviceDesc.protocol = _deviceProtocol;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
 
     ASSERT_TRUE(deviceHandle != nullptr);
     ASSERT_TRUE(deviceHandle->private_data != nullptr);
@@ -178,7 +176,7 @@ TEST_P(MvncOpenDevice, OpenAndClose) {
 
     ASSERT_TRUE(isSameProtocolDevice(deviceName, _deviceProtocol));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 }
 
 /**
@@ -193,8 +191,7 @@ TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) {
     deviceDesc.protocol = _deviceProtocol;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
-                                 watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
 
     ASSERT_TRUE(deviceHandle != nullptr);
 
@@ -204,7 +201,7 @@ TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) {
     ASSERT_TRUE(device->dev_addr_booted != nullptr);
     ASSERT_TRUE(device->xlink != nullptr);
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 }
 
 /**
@@ -228,16 +225,16 @@ TEST_P(MvncOpenDevice, OpenTwiceSameHandler) {
     unsigned int data_lenght_second = MAX_DEV_NAME;
 
     // First open, get device name
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
     ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
                                       dev_addr_first_open, &data_lenght_first));
 
     // Second open, get device name
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
     ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
                                       dev_addr_second_open, &data_lenght_second));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
     // Should be the same device
     ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
 }
@@ -258,14 +255,12 @@ TEST_P(MvncOpenDevice, DISABLED_OpenSameDeviceTwiceDifferentHandlers) {
     deviceDesc.protocol = _deviceProtocol;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc, m_ncDeviceOpenParams));
 
     // Till we don't have multiple device support, this function would try to open same device
-    ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc, m_ncDeviceOpenParams));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1, m_watchdogHndl));
 }
 
 
@@ -284,22 +279,20 @@ TEST_P(MvncOpenDevice, OpenTwiceWithOneXLinkInitializion) {
     deviceDesc.protocol = _deviceProtocol;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
 
     actDeviceName = deviceHandle->private_data->dev_addr;
     ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 
     // Second open
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
 
     actDeviceName = deviceHandle->private_data->dev_addr;
     ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 }
 
 //------------------------------------------------------------------------------
@@ -312,7 +305,7 @@ TEST_P(MvncLoggingTests, ShouldNotPrintErrorMessagesIfCanNotOpenDevice) {
     setLogLevel(MVLOG_INFO);
     ncDeviceHandle_t * deviceHandle = nullptr;
 
-    ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, m_ncDeviceOpenParams));
 
     std::string content(buff);
     for (int i = MVLOG_WARN; i < MVLOG_LAST; i++) {
@@ -416,7 +409,7 @@ TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesParallel) {
 */
 TEST_F(MvncCloseDevice, EmptyDeviceHandler) {
     ncDeviceHandle_t *deviceHandle = nullptr;
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 }
 
 /**
@@ -441,7 +434,7 @@ TEST_F(MvncCloseDevice, EmptyFieldsOfDeviceHandle) {
         deviceHandlePtr = dH.get();
     }
 
-    ASSERT_EQ(ncDeviceClose(&deviceHandlePtr), NC_INVALID_PARAMETERS);
+    ASSERT_EQ(ncDeviceClose(&deviceHandlePtr, m_watchdogHndl), NC_INVALID_PARAMETERS);
 }
 
 //------------------------------------------------------------------------------
@@ -509,7 +502,7 @@ TEST_P(MvncInference, DISABLED_DoOneIterationOfInference) {
 
     ASSERT_NO_ERROR(ncGraphDestroy(&_graphHandle[0]));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0]));
+    ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0], m_watchdogHndl));
 }
 
 
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp
index a5cdea04cec..53a1db21e1a 100644
--- a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp
@@ -27,13 +27,13 @@ TEST_F(MvncOpenUSBDevice, ShouldOpenDeviceAfterChangeConnectTimeoutFromZero) {
     deviceDesc.platform = NC_ANY_PLATFORM;
 
     ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(0));
-    ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
     std::this_thread::sleep_for(3_sec);
     ASSERT_NO_ERROR(ncDeviceResetAll());
 
     ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(30));
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
 
     ASSERT_NO_ERROR(ncDeviceResetAll());
 }
@@ -44,8 +44,8 @@ TEST_F(MvncOpenUSBDevice, WithCustomFirmware) {
         GTEST_SKIP();
 
     // Use custom firmware dir path as parameter for ncDeviceOpen
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
 }
 
 /**
@@ -58,10 +58,10 @@ TEST_F(MvncOpenUSBDevice, AllAvailableDevices) {
     ncDeviceHandle_t * deviceHandles[MAX_DEVICES] = {nullptr};
 
     for (int index = 0; index < availableDevices_; ++index) {
-        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, watchdogInterval, firmwarePath));
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, m_ncDeviceOpenParams));
     }
     for (int index = 0; index < availableDevices_; ++index) {
-        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index]));
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index], m_watchdogHndl));
     }
 }
 
@@ -78,7 +78,7 @@ TEST_F(MvncOpenUSBDevice, AllAvailableMultiThreads) {
 
     for (int i = 0; i < availableDevices_; ++i) {
         requests[i] = std::thread([i, &rc, &deviceHandle, this]() {
-            rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, watchdogInterval, firmwarePath);
+            rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, m_ncDeviceOpenParams);
         });
     }
 
@@ -88,7 +88,7 @@ TEST_F(MvncOpenUSBDevice, AllAvailableMultiThreads) {
     }
 
     for (int i = 0; i < availableDevices_; ++i) {
-        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i]));
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i], m_watchdogHndl));
     }
 }
 
@@ -102,7 +102,8 @@ TEST_F(MvncOpenUSBDevice, WithInvalidFirmwarePath) {
     const char invalidPath[MAX_PATH] = "./InvalidPath/";
 
     // Use custom firmware dir path as parameter for ncDeviceOpen
-    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, invalidPath));
+    m_ncDeviceOpenParams.customFirmwareDirectory = invalidPath;
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
 
     ASSERT_EQ(deviceHandle_, nullptr);
 }
@@ -119,12 +120,12 @@ TEST_F(MvncOpenUSBDevice, OpenAvailableDeviceByName) {
     ASSERT_TRUE(availableDevices.size());
     strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
     ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
                                       dev_addr_open, &data_lenght));
 
     ASSERT_TRUE(strncmp(dev_addr_open, deviceDesc_.name, NC_MAX_NAME_SIZE) == 0);
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
 }
 
 TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) {
@@ -138,7 +139,7 @@ TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) {
     auto availableDevices = getDevicesList();
     ASSERT_TRUE(availableDevices.size());
 
-    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
 }
 
 TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) {
@@ -156,16 +157,16 @@ TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) {
     ASSERT_TRUE(availableDevices.size());
     strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
     ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
                                       dev_addr_first_open, &data_lenght_first));
 
     // Second open, get device name
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
     ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
                                       dev_addr_second_open, &data_lenght_second));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
     // Should be the same device
     ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
 }
@@ -188,7 +189,7 @@ TEST_F(MvncOpenUSBDevice, CheckErrorWhenPlatformConflictWithName) {
     strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
     deviceDesc_.platform = wrongPlatform;
 
-    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
 }
 
 //------------------------------------------------------------------------------
@@ -200,7 +201,7 @@ TEST_F(MvncCloseUSBDevice, USBDeviceWillBeAvailableRightAfterClosing) {
         GTEST_SKIP();
 
     ASSERT_NO_ERROR(ncDeviceOpen(
-            &deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+            &deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
 
     ASSERT_TRUE(deviceHandle_);
 
@@ -210,7 +211,7 @@ TEST_F(MvncCloseUSBDevice, USBDeviceWillBeAvailableRightAfterClosing) {
     };
     strcpy(deviceDesc_.name, deviceHandle_->private_data->dev_addr);
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
 
     deviceDesc_t foundDevice = {};
     XLinkError_t rc = XLinkFindFirstSuitableDevice(
@@ -229,7 +230,7 @@ TEST_P(MvncDevicePlatform, OpenAndClose) {
     if (available_myriad2_ == 0 || available_myriadX_ == 0)
         GTEST_SKIP();
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
 
     char deviceName[MAX_DEV_NAME];
     unsigned int size = MAX_DEV_NAME;
@@ -237,7 +238,7 @@ TEST_P(MvncDevicePlatform, OpenAndClose) {
 
     EXPECT_TRUE(isSamePlatformUSBDevice(deviceName, devicePlatform_));
 
-    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
 
 }
 
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index d7684481d14..01318af1f8f 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -74,6 +74,7 @@ extensions/front/ATenToEmbeddingBag.py
 extensions/front/AttributedGatherNormalizer.py
 extensions/front/AttributedPadToPad.py
 extensions/front/binary_quantize_normalization.py
+extensions/front/broadcast_with_range.py
 extensions/front/caffe/__init__.py
 extensions/front/caffe/accum_ext.py
 extensions/front/caffe/argmax_ext.py
@@ -114,7 +115,6 @@ extensions/front/caffe/relu_ext.py
 extensions/front/caffe/reorgyolo_ext.py
 extensions/front/caffe/resample_ext.py
 extensions/front/caffe/reshape.py
-extensions/front/caffe/ShuffleChannel.py
 extensions/front/caffe/shufflechannel_ext.py
 extensions/front/caffe/sigmoid.py
 extensions/front/caffe/simplernms_ext.py
@@ -173,6 +173,8 @@ extensions/front/mxnet/clip_ext.py
 extensions/front/mxnet/conv_ext.py
 extensions/front/mxnet/copy_ext.py
 extensions/front/mxnet/crop_ext.py
+extensions/front/mxnet/cumsum.py
+extensions/front/mxnet/cumsum_ext.py
 extensions/front/mxnet/custom.py
 extensions/front/mxnet/custom_rpn_proposal.py
 extensions/front/mxnet/deformable_conv_ext.py
@@ -245,6 +247,7 @@ extensions/front/onnx/constant_of_shape_ext.py
 extensions/front/onnx/constant_of_shape_to_broadcast.py
 extensions/front/onnx/conv_ext.py
 extensions/front/onnx/crop_ext.py
+extensions/front/onnx/cumsum_ext.py
 extensions/front/onnx/deformable_conv_ext.py
 extensions/front/onnx/detection_output.py
 extensions/front/onnx/detectionoutput_ext.py
@@ -286,6 +289,7 @@ extensions/front/onnx/priorgridgenerator_ext.py
 extensions/front/onnx/proposal_ext.py
 extensions/front/onnx/quantize_dequantize_linear.py
 extensions/front/onnx/quantize_ext.py
+extensions/front/onnx/range_ext.py
 extensions/front/onnx/reduce_max_ext.py
 extensions/front/onnx/reduce_mean_ext.py
 extensions/front/onnx/reduce_min_ext.py
@@ -350,6 +354,7 @@ extensions/front/tf/crop_and_resize_ext.py
 extensions/front/tf/CropAndResizeReplacement.py
 extensions/front/tf/CTCGreedyDecoder.py
 extensions/front/tf/CTCGreedyDecoder_ext.py
+extensions/front/tf/cumsum_ext.py
 extensions/front/tf/deconv_ext.py
 extensions/front/tf/depth_to_space.py
 extensions/front/tf/elementwise_ext.py
@@ -399,6 +404,7 @@ extensions/front/tf/placeholder_ext.py
 extensions/front/tf/placeholder_with_default_ext.py
 extensions/front/tf/pooling_ext.py
 extensions/front/tf/prelu.py
+extensions/front/tf/range_ext.py
 extensions/front/tf/reduce_ext.py
 extensions/front/tf/reshape_related_ext.py
 extensions/front/tf/resize_bilinear.py
@@ -448,6 +454,7 @@ extensions/front/tf/unique_ext.py
 extensions/front/tf/UnpackPackReverseInputChannels.py
 extensions/front/tf/variable_ext.py
 extensions/front/tf/variables_values_freezing.py
+extensions/front/tf/WhereDecomposition.py
 extensions/front/tf/yolo_v1.json
 extensions/front/tf/yolo_v1_tiny.json
 extensions/front/tf/yolo_v2.json
@@ -591,6 +598,7 @@ extensions/ops/constant_fill.py
 extensions/ops/copyop.py
 extensions/ops/correlation.py
 extensions/ops/ctc_greedy_decoder.py
+extensions/ops/cumsum.py
 extensions/ops/data_augmentation.py
 extensions/ops/depth_to_space.py
 extensions/ops/DetectionOutput.py
diff --git a/model-optimizer/extensions/front/broadcast_with_range.py b/model-optimizer/extensions/front/broadcast_with_range.py
new file mode 100644
index 00000000000..0bb73b1e2e4
--- /dev/null
+++ b/model-optimizer/extensions/front/broadcast_with_range.py
@@ -0,0 +1,85 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.gather import Gather
+from extensions.ops.range import Range
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input
+from mo.graph.graph import Graph, rename_nodes, Node
+from mo.ops.unsqueeze import Unsqueeze
+
+
+class ExpandRangeConstant(FrontReplacementSubgraph):
+    """
+    Searches for Constant operations filled with range values starting from 0 and replaces it with Range operation
+    Faced in ONNX BERT -- replacing it makes model reshape-able by sequence length
+
+    WARNING: true BIDIRECTIONAL mode of Broadcast could cause issues
+    (the probability is small, so we decided to keep the optimization)
+
+    value_input[1, X] (value=range(0,X))     shape_input[Y, 1]
+            \                               /
+          Broadcast(mode='bidirectional') [Y, X]
+    """
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for node in graph.get_op_nodes(type='Broadcast'):
+            value = node.in_port(0).get_source().node
+            if value.soft_get('type') == 'Const':
+                self.replace(node, value)
+
+    @staticmethod
+    def replace(node: Node, const: Node):
+        graph = node.graph
+        shape = const.shape
+        const_name = const.soft_get('name', const.id)
+
+        non_one_dims = np.argwhere(shape != 1).flatten()
+        one_dims = np.argwhere(shape == 1).flatten()
+
+        if not (non_one_dims.size == 1 and 5 < np.prod(shape) < 500):
+            # (5;500) range is deduced to affect less models
+            return
+
+        value = const.value
+        if not np.array_equal(np.arange(0, np.prod(shape), 1).reshape(shape), value):
+            return
+
+        positive_idx = non_one_dims.item(0)
+        negative_idx = positive_idx - len(shape)
+        gather = create_op_with_const_inputs(graph, Gather, {1: int64_array(negative_idx), 2: int64_array(0)},
+                                             {'name': node.soft_get('name', node.id) + '/BroadcastingDim'})
+
+        range_node = create_op_with_const_inputs(graph, Range,
+                                                 {0: np.array(0, dtype=value.dtype),
+                                                  2: np.array(1, dtype=value.dtype)},
+                                                 {'name': const_name + '/Range', 'dtype': value.dtype})
+
+        node.in_port(1).get_connection().add_destination(gather.in_port(0))
+        gather.out_port(0).connect(range_node.in_port(1))
+        node.in_port(0).get_connection().set_source(range_node.out_port(0))
+
+        if one_dims.size:
+            unsqueeze = create_op_node_with_second_input(graph, Unsqueeze, one_dims,
+                                                         {'name': const_name + '/KeepShape'})
+            range_node.out_port(0).get_connection().insert_node(unsqueeze)
+            rename_nodes([(const, const_name + '/ToBeDeleted'), (unsqueeze, const_name)])
+        else:
+            rename_nodes([(const, const_name + '/ToBeDeleted'), (range_node, const_name)])
diff --git a/model-optimizer/extensions/front/broadcast_with_range_test.py b/model-optimizer/extensions/front/broadcast_with_range_test.py
new file mode 100644
index 00000000000..2c1ef540e86
--- /dev/null
+++ b/model-optimizer/extensions/front/broadcast_with_range_test.py
@@ -0,0 +1,75 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.front.broadcast_with_range import ExpandRangeConstant
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \
+    regular_op_with_empty_data, connect_data
+
+
+class TestRangeBroadcast(unittest.TestCase):
+    def test_broadcast_with_range_positive_test(self):
+        graph = build_graph({
+            **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}),
+            **valued_const_with_data('value', np.arange(0, 384).reshape((1, 384))),
+            **regular_op_with_empty_data('bc', {'type': 'Broadcast'}),
+            **result(),
+        }, [
+            *connect('value', '0:bc'),
+            *connect('shape', '1:bc'),
+            *connect('bc', 'output'),
+        ], nodes_with_edges_only=True)
+        ExpandRangeConstant().find_and_replace_pattern(graph)
+
+        graph_ref = build_graph({
+            **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}),
+
+            # start
+            **valued_const_with_data('start', np.array(0)),
+            # limit
+            **valued_const_with_data('minus_one', np.array(-1)),
+            **valued_const_with_data('zero', np.array(0)),
+            **regular_op_with_empty_data('range_dim', {'type': 'Gather'}),
+            # delta
+            **valued_const_with_data('delta', np.array(1)),
+            **regular_op_with_empty_data('range', {'type': 'Range'}),
+
+            # keep dims
+            **valued_const_with_data('axes', np.array([0])),
+            **regular_op_with_empty_data('keep_shape', {'type': 'Unsqueeze'}),
+
+            **regular_op_with_empty_data('bc', {'type': 'Broadcast'}),
+            **result(),
+        }, [
+            *connect('start', '0:range'),
+            *connect('shape', '0:range_dim'),
+            *connect('minus_one', '1:range_dim'),
+            *connect('zero', '2:range_dim'),
+            *connect('range_dim', '1:range'),
+            *connect('delta', '2:range'),
+            *connect('range', '0:keep_shape'),
+            *connect('axes', '1:keep_shape'),
+            *connect('keep_shape', '0:bc'),
+            *connect_data('shape', '1:bc'),
+            *connect('bc', 'output'),
+        ], nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/caffe/ShuffleChannel.py b/model-optimizer/extensions/front/caffe/ShuffleChannel.py
deleted file mode 100644
index 07c815b579a..00000000000
--- a/model-optimizer/extensions/front/caffe/ShuffleChannel.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import numpy as np
-
-from extensions.ops.Cast import Cast
-from extensions.ops.elementwise import Div
-from extensions.ops.transpose import Transpose
-from mo.front.common.partial_infer.utils import int64_array
-from mo.front.common.replacement import FrontReplacementPattern
-from mo.front.tf.graph_utils import create_op_node_with_second_input
-from mo.graph.graph import Graph, Node, rename_node
-from mo.ops.const import Const
-from mo.ops.reshape import Reshape
-from mo.ops.shape import Shape
-from mo.utils.shape import node_to_get_features_dimension_value, node_to_get_batch_value, \
-    new_shape_node_from_shape_nodes
-
-
-class ShuffleChannel(FrontReplacementPattern):
-    """
-    Before:
-        ShuffleChannel(group)
-
-    After:
-        Reshape[input_batch, group, input_channels/group, -1]
-          \/
-        Transpose[0, 2, 1, 3]
-          \/
-        Reshape[input_shape]
-    """
-    enabled = True
-    graph_condition = [lambda graph: graph.graph['layout'] == 'NCHW']
-
-    @staticmethod
-    def decompose_shuffle_channel(node: Node):
-        graph = node.graph
-        name = node.soft_get('name', node.id)
-
-        rename_node(node, name + '/to_be_removed')
-
-        shape = Shape(graph, dict(name=name + '/InputShape')).create_node()
-        shape.in_port(0).connect(node.in_port(0).get_source())
-
-        # Reshape [input_batch, group, input_channels/group, -1]
-        batch = node_to_get_batch_value(shape)
-        group = Const(graph, dict(name=name + '/Rows', value=int64_array([node.group]))).create_node()
-        const = Const(graph, dict(name=name + '/Const', value=int64_array([-1]))).create_node()
-
-        input_channels = node_to_get_features_dimension_value(shape)
-        output_channels = create_op_node_with_second_input(
-            graph, Div, np.int64(node.group), {'name': name + '/Cols'}, input_node=input_channels)
-        i_output_channels = Cast(graph, {'name': output_channels.name + '/Convert', 'dst_type': np.int64}).create_node()
-        output_channels.out_port(0).connect(i_output_channels.in_port(0))
-
-        reshape_split_dim = new_shape_node_from_shape_nodes([batch, group, i_output_channels, const])
-        reshape_split_node = Reshape(graph, dict(name=name + '/Reshape_split_')).create_node()
-        reshape_split_dim.out_port(0).connect(reshape_split_node.in_port(1))
-
-        # Transpose(0, 2, 1, 3)
-        transpose_node = create_op_node_with_second_input(
-            graph, Transpose, int64_array([0, 2, 1, 3]), {'name': name + '/Transpose_'}, input_node=reshape_split_node)
-
-        # Reshape back to input shape
-        reshape_concat = Reshape(graph, dict(name=name)).create_node()
-        rename_node(reshape_concat, name)
-
-        shape.out_port(0).connect(reshape_concat.in_port(1))
-        transpose_node.out_port(0).connect(reshape_concat.in_port(0))
-
-        # Final connections
-        node.in_port(0).get_connection().set_destination(reshape_split_node.in_port(0))
-        node.out_port(0).get_connection().set_source(reshape_concat.out_port(0))
-
-    def find_and_replace_pattern(self, graph: Graph):
-        for shuffle_channel in graph.get_op_nodes(op='ShuffleChannel'):
-            self.decompose_shuffle_channel(shuffle_channel)
diff --git a/model-optimizer/extensions/front/caffe/ShuffleChannel_test.py b/model-optimizer/extensions/front/caffe/ShuffleChannel_test.py
deleted file mode 100644
index 06f52a68b1f..00000000000
--- a/model-optimizer/extensions/front/caffe/ShuffleChannel_test.py
+++ /dev/null
@@ -1,100 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import unittest
-
-from extensions.front.caffe.ShuffleChannel import ShuffleChannel
-from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Node
-from mo.utils.ir_engine.compare_graphs import compare_graphs
-from mo.utils.unittest.graph import build_graph
-
-nodes_attributes = {
-    'placeholder': {'kind': 'op', 'op': 'Parameter', 'shape': int64_array([1, 48, 28, 28])},
-    'shuffle_channel': {'kind': 'op', 'op': 'ShuffleChannel', 'group': int64_array(2), 'name': 'scname'},
-    'result': {'kind': 'op', 'op': 'Result'},
-
-    'shape': {'op': 'ShapeOf', 'kind': 'op'},
-    'batch_gather': {'op': 'Gather', 'kind': 'op'},
-    'batch_gather_idx': {'value': int64_array([0]), 'kind': 'op', 'type': 'Const'},
-    'batch_gather_axis': {'value': int64_array(0), 'kind': 'op', 'type': 'Const'},
-
-    'group': {'value': int64_array([2]), 'kind': 'op', 'type': 'Const'},
-
-    'channel_gather': {'op': 'Gather', 'kind': 'op'},
-    'channel_gather_idx': {'value': int64_array([1]), 'kind': 'op', 'type': 'Const'},
-    'channel_gather_axis': {'value': int64_array(0), 'kind': 'op', 'type': 'Const'},
-
-    'output_channels': {'op': 'Div', 'kind': 'op'},
-    'div_group': {'value': int64_array([2]), 'kind': 'op', 'type': 'Const'},
-    'convert': {'op': 'Cast', 'kind': 'op'},
-    'const': {'value': int64_array([-1]), 'kind': 'op', 'type': 'Const'},
-    'concat': {'op': 'Concat', 'kind': 'op'},
-    'reshape_split': {'op': 'Reshape', 'kind': 'op'},
-    'transpose': {'op': 'Transpose', 'kind': 'op'},
-    'transpose_const': {'value': int64_array([0, 2, 1, 3]), 'kind': 'op', 'type': 'Const'},
-    'reshape_concat': {'op': 'Reshape', 'kind': 'op'}
-}
-
-
-class ShuffleChannelTests(unittest.TestCase):
-    def test_1(self):
-        graph = build_graph(nodes_attributes,
-                            [
-                                ('placeholder', 'shuffle_channel'),
-                                ('shuffle_channel', 'result')
-                            ],
-                            nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        graph.stage = 'front'
-
-        ref_graph = build_graph(nodes_attributes,
-                                [
-                                    ('placeholder', 'shape', {'in': 0, 'out': 0}),
-
-                                    ('shape', 'batch_gather', {'in': 0, 'out': 0}),
-                                    ('batch_gather_idx', 'batch_gather', {'in': 1, 'out': 0}),
-                                    ('batch_gather_axis', 'batch_gather', {'in': 2, 'out': 0}),
-
-                                    ('shape', 'channel_gather', {'in': 0, 'out': 0}),
-                                    ('channel_gather_idx', 'channel_gather', {'in': 1, 'out': 0}),
-                                    ('channel_gather_axis', 'channel_gather', {'in': 2, 'out': 0}),
-
-                                    ('channel_gather', 'output_channels', {'in': 0, 'out': 0}),
-                                    ('div_group', 'output_channels', {'in': 1, 'out': 0}),
-                                    ('output_channels', 'convert', {'in': 0, 'out': 0}),
-
-                                    ('batch_gather', 'concat', {'in': 0, 'out': 0}),
-                                    ('group', 'concat', {'in': 1, 'out': 0}),
-                                    ('convert', 'concat', {'in': 2, 'out': 0}),
-                                    ('const', 'concat', {'in': 3, 'out': 0}),
-
-                                    ('placeholder', 'reshape_split', {'in': 0, 'out': 0}),
-                                    ('concat', 'reshape_split', {'in': 1, 'out': 0}),
-
-                                    ('reshape_split', 'transpose', {'in': 0, 'out': 0}),
-                                    ('transpose_const', 'transpose', {'in': 1, 'out': 0}),
-
-                                    ('transpose', 'reshape_concat', {'in': 0, 'out': 0}),
-                                    ('shape', 'reshape_concat', {'in': 1, 'out': 0}),
-
-                                    ('reshape_concat', 'result')
-                                ],
-                                nodes_with_edges_only=True)
-
-        ShuffleChannel().find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, ref_graph, 'result', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-        self.assertTrue(Node(graph, 'result').in_port(0).get_source().node.name == 'scname')
diff --git a/model-optimizer/extensions/front/caffe/shufflechannel_ext.py b/model-optimizer/extensions/front/caffe/shufflechannel_ext.py
index e2acd71ae24..2c00c50fbaa 100644
--- a/model-optimizer/extensions/front/caffe/shufflechannel_ext.py
+++ b/model-optimizer/extensions/front/caffe/shufflechannel_ext.py
@@ -13,7 +13,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-from extensions.ops.shufflechannel import ShuffleChannelOp
+from extensions.ops.shufflechannel import ShuffleChannels
 from mo.front.caffe.collect_attributes import collect_attributes
 from mo.front.common.extractors.utils import layout_attrs
 from mo.front.extractor import FrontExtractorOp
@@ -29,5 +29,5 @@ class ShuffleChannelFrontExtractor(FrontExtractorOp):
         mapping_rule.update(layout_attrs())
 
         # update the attributes of the node
-        ShuffleChannelOp.update_node_stat(node, mapping_rule)
+        ShuffleChannels.update_node_stat(node, mapping_rule)
         return cls.enabled
diff --git a/model-optimizer/extensions/front/mxnet/cumsum.py b/model-optimizer/extensions/front/mxnet/cumsum.py
new file mode 100644
index 00000000000..db98c65bc73
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/cumsum.py
@@ -0,0 +1,47 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.Cast import Cast
+from extensions.ops.cumsum import CumSum
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs, mxnet_str_dtype_to_np
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Graph, rename_node, Node
+from mo.ops.const import Const
+
+
+class CumSumFrontReplacer(FrontReplacementOp):
+    op = 'MXNetCumSum'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        name = node.soft_get('name', node.id)
+        axis = node.soft_get('axis', 0)
+
+        rename_node(node=node, name=name + '/to_be_removed')
+        cumsum_node = create_op_node_with_second_input(graph, CumSum, int64_array(axis),
+                                                       {'name': name, 'reverse': False, 'exclusive': False})
+        rename_node(cumsum_node, name)
+
+        node.in_port(0).get_connection().set_destination(cumsum_node.in_port(0))
+        if node.has_valid('mx_out_type') and node['mx_out_type'] is not None:
+            rename_node(node=cumsum_node, name=name + '/Clamp')
+            convert = Cast(graph, {'name': name, 'dst_type': node['mx_out_type']}).create_node()
+            rename_node(convert, name)
+            cumsum_node.out_port(0).connect(convert.in_port(0))
+            return [convert.id]
+        else:
+            return [cumsum_node.id]
diff --git a/model-optimizer/extensions/front/mxnet/cumsum_ext.py b/model-optimizer/extensions/front/mxnet/cumsum_ext.py
new file mode 100644
index 00000000000..4ee3bcc35fb
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/cumsum_ext.py
@@ -0,0 +1,37 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+from extensions.ops.cumsum import MXNetCumSum
+from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs, mxnet_str_dtype_to_np
+
+
+class CumSumExtractor(FrontExtractorOp):
+    op = '_np_cumsum'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+
+        update_attrs = {
+            'axis': attrs.int('axis', 0),
+            'mx_out_type': attrs.dtype('dtype', None)
+        }
+
+        MXNetCumSum.update_node_stat(node, update_attrs)
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/cumsum_ext.py b/model-optimizer/extensions/front/onnx/cumsum_ext.py
new file mode 100644
index 00000000000..d6b55f62156
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/cumsum_ext.py
@@ -0,0 +1,31 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.cumsum import CumSum
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class CumSumFrontExtractor(FrontExtractorOp):
+    op = 'CumSum'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        exclusive = onnx_attr(node, 'exclusive', 'i', 0)
+        reverse = onnx_attr(node, 'reverse', 'i', 0)
+        CumSum.update_node_stat(node, {'exclusive': exclusive, 'reverse': reverse})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/range_ext.py b/model-optimizer/extensions/front/onnx/range_ext.py
new file mode 100644
index 00000000000..a4cf3b32bc5
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/range_ext.py
@@ -0,0 +1,29 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.range import Range
+from mo.front.extractor import FrontExtractorOp
+from mo.graph.graph import Node
+
+
+class RangeFrontExtractor(FrontExtractorOp):
+    op = 'Range'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node: Node):
+        Range.update_node_stat(node, {})
+        return cls.enabled
+
diff --git a/model-optimizer/extensions/front/tf/WhereDecomposition.py b/model-optimizer/extensions/front/tf/WhereDecomposition.py
new file mode 100644
index 00000000000..656c015b0cf
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/WhereDecomposition.py
@@ -0,0 +1,48 @@
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.non_zero import NonZero
+from extensions.ops.transpose import Transpose
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementOp
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Node, Graph, rename_nodes
+
+
+class WhereDecomposition(FrontReplacementOp):
+    """
+    This transformation decomposes the TF layer Where (when x = None, y = None) using the formula
+        Where(condition) = Transpose(NonZero(condition), [1, 0])
+    """
+    op = 'Where'
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.tf.sparse_weighted_sum import ExperimentalSparseWeightedSumFrontReplacer
+        from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
+        return [ExperimentalSparseWeightedSumFrontReplacer, TransposeOrderNormalizer]
+
+    def replace_op(self, graph: Graph, node: Node):
+        node_name = node.soft_get('name', node.id)
+        non_zero_node = NonZero(graph, {'name': node_name + '/NonZero_', 'output_type': np.int64}).create_node()
+        transpose_node = create_op_node_with_second_input(graph, Transpose, int64_array([1, 0]), op_attrs={})
+        non_zero_node.out_port(0).connect(transpose_node.in_port(0))
+        rename_nodes([(node, node_name + '/delete'), (transpose_node, node_name)])
+
+        non_zero_node.in_port(0).connect(node.in_port(0).get_source())
+        return [transpose_node.id]
diff --git a/model-optimizer/extensions/front/tf/WhereDecomposition_test.py b/model-optimizer/extensions/front/tf/WhereDecomposition_test.py
new file mode 100644
index 00000000000..432a619771a
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/WhereDecomposition_test.py
@@ -0,0 +1,98 @@
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+
+import unittest
+
+import numpy as np
+
+from generator import generator, generate
+
+from extensions.front.tf.WhereDecomposition import WhereDecomposition
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+
+graph_node_attrs = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': None,
+        'kind': 'data',
+        'data_type': None
+    },
+    'tf_where': {'op': 'Where', 'kind': 'op'},
+    'tf_where_data': {'kind': 'data'},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+
+graph_edges = [
+    ('placeholder', 'placeholder_data'),
+    ('placeholder_data', 'tf_where'),
+    ('tf_where', 'tf_where_data'),
+    ('tf_where_data', 'output'),
+]
+
+
+ref_graph_node_attrs = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': None,
+        'kind': 'data',
+        'data_type': None
+    },
+    'non_zero': {'kind': 'op', 'op': 'NonZero', 'output_type': np.int64},
+    'non_zero_data': {'kind': 'data'},
+    'transpose': {'kind': 'op', 'op': 'Transpose'},
+    'transpose_data': {'kind': 'data'},
+    'perm_const': {'kind': 'op', 'op': 'Const', 'shape': [2], 'value': int64_array([1, 0])},
+    'perm_const_data': {'kind': 'data', 'shape': [2], 'value': int64_array([1, 0])},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+ref_graph_edges = [
+    ('placeholder', 'placeholder_data'),
+    ('placeholder_data', 'non_zero'),
+    ('non_zero', 'non_zero_data'),
+    ('non_zero_data', 'transpose', {'in': 0}),
+    ('perm_const', 'perm_const_data'),
+    ('perm_const_data', 'transpose', {'in': 1}),
+    ('transpose', 'transpose_data'),
+    ('transpose_data', 'output'),
+]
+
+
+@generator
+class TFWhereDecompositionTest(unittest.TestCase):
+    @generate(*[[1, 100, 120, 150], [16, 125, 14]])
+    def test_1(self, input_shape):
+        in_shape = int64_array(input_shape)
+        graph = build_graph(graph_node_attrs,
+                            graph_edges,
+                            update_attributes={
+                                'placeholder_data': {'shape': in_shape}
+                            })
+        WhereDecomposition().find_and_replace_pattern(graph)
+        ref_graph = build_graph(ref_graph_node_attrs,
+                                ref_graph_edges,
+                                update_attributes={
+                                    'placeholder_data': {'shape': in_shape}
+                                })
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/tf/cumsum_ext.py b/model-optimizer/extensions/front/tf/cumsum_ext.py
new file mode 100644
index 00000000000..53409e9f27c
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/cumsum_ext.py
@@ -0,0 +1,30 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.cumsum import CumSum
+from mo.front.extractor import FrontExtractorOp
+
+
+class CumSumExtractor(FrontExtractorOp):
+    op = 'Cumsum'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        exclusive = node.pb.attr['exclusive'].b
+        reverse = node.pb.attr['reverse'].b
+        CumSum.update_node_stat(node, {'exclusive': exclusive, 'reverse': reverse})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/tf/range_ext.py b/model-optimizer/extensions/front/tf/range_ext.py
new file mode 100644
index 00000000000..44d78556480
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/range_ext.py
@@ -0,0 +1,30 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.range import Range
+from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_dtype_extractor
+from mo.graph.graph import Node
+
+
+class RangeFrontExtractor(FrontExtractorOp):
+    op = 'Range'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node: Node):
+        Range.update_node_stat(node, {'dtype': tf_dtype_extractor(node.pb.attr['type'].type)})
+        return cls.enabled
+
diff --git a/model-optimizer/extensions/middle/EltwiseChecker.py b/model-optimizer/extensions/middle/EltwiseChecker.py
index 9acd563f700..b42941aa032 100644
--- a/model-optimizer/extensions/middle/EltwiseChecker.py
+++ b/model-optimizer/extensions/middle/EltwiseChecker.py
@@ -14,8 +14,6 @@
  limitations under the License.
 """
 
-import logging as log
-
 import numpy as np
 
 from mo.graph.graph import Node, Graph
@@ -43,51 +41,55 @@ class EltwiseChecker(MiddleReplacementPattern):
         for flag in flags:
             node[flag] = False
 
-    def find_and_replace_pattern(self, graph: Graph):
+    def mark_eltwise_node(self, node, feature_channel=None):
+        tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node)
+        if tensor_port is None or value_port is None:
+            self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
+            return
+
+        connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()}
+        if len(connected_in_ports) != 2:
+            return
+
+        tensor_shape = tensor_port.data.get_shape()
+        out_shape = node.out_port(0).data.get_shape()
+        assert tensor_shape is not None and out_shape is not None
+        if not np.array_equal(tensor_shape, out_shape):
+            # ScaleShift operation doesn't support broadcasting
+            self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
+            return
+
+        value_shape = value_port.data.get_shape()
+        assert value_shape is not None
+        assert len(value_shape) <= len(tensor_shape), \
+            "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \
+            "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name)
+
+        # if both tensors are 0D they cannot be converted to scaleshift
+        if len(tensor_shape) == 0 and len(value_shape) == 0:
+            self.set_flags_to_false(node, ['can_be_scaleshift'])
+            return
+
+        broadcasted_value_shape = np.insert(value_shape, 0, [1] * (len(tensor_shape) - len(value_shape)))
+
+        feature_dim = min(1, tensor_shape.size - 1) if node.graph.graph['layout'] == 'NCHW' else -1
+        if feature_channel is not None:
+            feature_dim = feature_channel
+        ones = np.ones(len(tensor_shape))
+        possible_shape = ones.copy()
+        np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim))
+
+        if not np.array_equal(broadcasted_value_shape, ones) and \
+                not np.array_equal(broadcasted_value_shape, possible_shape):
+            # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape
+            self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
+            return
+
+        if len(tensor_shape) not in [2, 4, 5]:
+            # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs
+            self.set_flags_to_false(node, ['can_be_scaleshift'])
+            return
+
+    def find_and_replace_pattern(self, graph: Graph, feature_channel=None):
         for node in graph.get_op_nodes(is_eltwise=True):
-            log.debug('Checking eltwise op {}'.format(node.soft_get('name', node.id)))
-            tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node)
-            if tensor_port is None or value_port is None:
-                self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
-                continue
-
-            connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()}
-            if len(connected_in_ports) != 2:
-                continue
-
-            tensor_shape = tensor_port.data.get_shape()
-            out_shape = node.out_port(0).data.get_shape()
-            assert tensor_shape is not None and out_shape is not None
-            if not np.array_equal(tensor_shape, out_shape):
-                # ScaleShift operation doesn't support broadcasting
-                self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
-                continue
-
-            value_shape = value_port.data.get_shape()
-            assert value_shape is not None
-            assert len(value_shape) <= len(tensor_shape), \
-                "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \
-                "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name)
-
-            # if both tensors are 0D they cannot be converted to scaleshift
-            if len(tensor_shape) == 0 and len(value_shape) == 0:
-                self.set_flags_to_false(node, ['can_be_scaleshift'])
-                continue
-
-            broadcasted_value_shape = np.insert(value_shape, 0, [1] * (len(tensor_shape) - len(value_shape)))
-
-            feature_dim = min(1, tensor_shape.size - 1) if node.graph.graph['layout'] == 'NCHW' else -1
-            ones = np.ones(len(tensor_shape))
-            possible_shape = ones.copy()
-            np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim))
-
-            if not np.array_equal(broadcasted_value_shape, ones) and \
-                    not np.array_equal(broadcasted_value_shape, possible_shape):
-                # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape
-                self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
-                continue
-
-            if len(tensor_shape) not in [2, 4, 5]:
-                # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs
-                self.set_flags_to_false(node, ['can_be_scaleshift'])
-                continue
+            self.mark_eltwise_node(node)
diff --git a/model-optimizer/extensions/middle/quantize_fuses.py b/model-optimizer/extensions/middle/quantize_fuses.py
index 27db169753d..a392cc20f47 100644
--- a/model-optimizer/extensions/middle/quantize_fuses.py
+++ b/model-optimizer/extensions/middle/quantize_fuses.py
@@ -13,10 +13,13 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
+import numpy as np
+
 from extensions.middle.BinarizeWeightsM1P1 import BinarizeWeightsM1P1
 from extensions.middle.DeleteControlFlowEdges import DeleteControlFlowEdges
 from extensions.middle.EltwiseChecker import EltwiseChecker
 from mo.graph.graph import Graph
+from mo.middle.passes.fusing.helpers import get_value_in_port
 from mo.middle.replacement import MiddleReplacementPattern
 
 
@@ -35,9 +38,27 @@ class MarkNodesToFuseUpToFakeQuantize(MiddleReplacementPattern):
     def run_before(self):
         return []
 
+    @staticmethod
+    def mark_fusable_muls_on_weights(graph):
+        for node in graph.get_op_nodes(op='Mul'):
+            children = node.out_port(0).get_destinations()
+            if len(children) > 1 or children[0].node.soft_get('type') not in ['Convolution', 'Deconvolution', 'MatMul']:
+                continue
+            value_in_port = get_value_in_port(node)
+            if value_in_port is None:
+                continue
+            value_shape = value_in_port.data.get_shape()
+            non_one_axis = np.argwhere(value_shape != 1)
+            if non_one_axis.size != 1:
+                continue
+            non_one_axis = non_one_axis.item(0)
+            node['can_be_fused'] = True
+            EltwiseChecker().mark_eltwise_node(node, non_one_axis)
+
     def find_and_replace_pattern(self, graph: Graph):
         # to prevent fusing of non per channel lin ops, we run EltwiseChecker to mark nodes with can_be_fused attribute
         EltwiseChecker().find_and_replace_pattern(graph)
+        self.mark_fusable_muls_on_weights(graph)
         eltwise_nodes = graph.get_op_nodes(op='Mul', can_be_fused=True) + \
                         graph.get_op_nodes(op='Sub', can_be_fused=True) + \
                         graph.get_op_nodes(op='Add', can_be_fused=True)
diff --git a/model-optimizer/extensions/ops/cumsum.py b/model-optimizer/extensions/ops/cumsum.py
new file mode 100644
index 00000000000..6cc192ac90b
--- /dev/null
+++ b/model-optimizer/extensions/ops/cumsum.py
@@ -0,0 +1,85 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+def cumsum(a, axis=None, exclusive=False, reverse=False):
+    if reverse:
+        a = np.flip(a, axis)
+    res = np.cumsum(a, axis=axis)
+    if exclusive:
+        res -= a
+    if reverse:
+        res = np.flip(res, axis)
+    return res
+
+
+class CumSum(Op):
+    enabled = False
+    op = 'CumSum'
+    version = 'opset3'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'op': self.op,
+            'type': self.op,
+            'version': self.version,
+
+            'infer': self.infer,
+
+            'in_ports_count': 2,
+            'out_ports_count': 1,
+        }, attrs)
+
+    def supported_attrs(self):
+        return ["exclusive", "reverse"]
+
+    @staticmethod
+    def infer(node: Node):
+        node_name = node.soft_get('name', node.id)
+
+        input_shape = node.in_port(0).data.get_shape()
+        assert input_shape is not None, 'Input shape is None for node "{}"'.format(node_name)
+        if not node.in_port(1).disconnected():
+            assert len(node.in_port(1).data.get_shape()) == 0, 'Axis is not scalar for node: {}'.format(node_name)
+
+        node.out_port(0).data.set_shape(input_shape.copy())
+
+        input_value = node.in_port(0).data.get_value()
+        if input_value is not None:
+            axis = None if node.in_port(1).disconnected() else node.in_port(1).data.get_value()
+            reverse = node.reverse if node.has_valid('reverse') else False
+            exclusive = node.exclusive if node.has_valid('exclusive') else False
+            node.out_port(0).data.set_value(cumsum(input_value, axis=axis, reverse=reverse, exclusive=exclusive))
+
+
+class MXNetCumSum(Op):
+    enabled = False
+    op = 'MXNetCumSum'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'op': self.op,
+            'type': None,
+
+            'infer': None,
+
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }, attrs)
diff --git a/model-optimizer/extensions/ops/cumsum_test.py b/model-optimizer/extensions/ops/cumsum_test.py
new file mode 100644
index 00000000000..e6756551a42
--- /dev/null
+++ b/model-optimizer/extensions/ops/cumsum_test.py
@@ -0,0 +1,133 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.cumsum import CumSum
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph, valued_const_with_data, regular_op_with_shaped_data, result, connect
+
+nodes_attributes = {
+    **regular_op_with_shaped_data('data', [1, 3, 224, 224], {'type': 'Parameter', 'value': None,
+                                                             '_out_port_data_type': {0: np.float32}}),
+    **valued_const_with_data('axis', int64_array(0)),
+    **regular_op_with_shaped_data('cumsum', None, {'op': 'CumSum', 'type': 'CumSum', 'name': 'cumsum'}),
+    **regular_op_with_shaped_data('identity', None, {'op': 'Identity', 'name': 'identity'}),
+    **result('output'),
+}
+
+
+class TestCumSum(unittest.TestCase):
+    def test_cumsum_axis(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('data', '0:cumsum'),
+                             *connect('axis', '1:cumsum'),
+                             *connect('cumsum', '0:identity'),
+                             ('identity', 'identity_d', {'out': 0}),
+                             ('identity_d', 'output'),
+                             ],
+                            {'cumsum': {'reverse': False, 'exclusive': False}
+                             }, nodes_with_edges_only=True)
+
+        cumsum_node = Node(graph, 'cumsum')
+        CumSum.infer(cumsum_node)
+        self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_shape(), int64_array([1, 3, 224, 224])))
+
+    def test_cumsum_value_prop(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('data', '0:cumsum'),
+                             *connect('axis', '1:cumsum'),
+                             ('cumsum', 'cumsum_d', {'out': 0}),
+                             ('cumsum_d', 'output'),
+                             ],
+                            {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+                             'cumsum': {'reverse': False, 'exclusive': False}
+                             }, nodes_with_edges_only=True)
+
+        cumsum_node = Node(graph, 'cumsum')
+        CumSum.infer(cumsum_node)
+        self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+                                       np.array([1., 3., 6., 10., 15.]).astype(np.float32)))
+
+    def test_cumsum_value_prop_exclusive(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('data', '0:cumsum'),
+                             *connect('axis', '1:cumsum'),
+                             ('cumsum', 'cumsum_d', {'out': 0}),
+                             ('cumsum_d', 'output'),
+                             ],
+                            {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+                             'cumsum': {'reverse': False, 'exclusive': True}
+                             }, nodes_with_edges_only=True)
+
+        cumsum_node = Node(graph, 'cumsum')
+        CumSum.infer(cumsum_node)
+        self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+                                       np.array([0., 1., 3., 6., 10.]).astype(np.float32)))
+
+    def test_cumsum_value_prop_reverse(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('data', '0:cumsum'),
+                             *connect('axis', '1:cumsum'),
+                             ('cumsum', 'cumsum_d', {'out': 0}),
+                             ('cumsum_d', 'output'),
+                             ],
+                            {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+                             'cumsum': {'reverse': True, 'exclusive': False}
+                             }, nodes_with_edges_only=True)
+
+        cumsum_node = Node(graph, 'cumsum')
+        CumSum.infer(cumsum_node)
+        self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+                                       np.array([15., 14., 12., 9., 5.]).astype(np.float32)))
+
+    def test_cumsum_value_prop_exclusive_reverse(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('data', '0:cumsum'),
+                             *connect('axis', '1:cumsum'),
+                             ('cumsum', 'cumsum_d', {'out': 0}),
+                             ('cumsum_d', 'output'),
+                             ],
+                            {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+                             'cumsum': {'reverse': True, 'exclusive': True}
+                             }, nodes_with_edges_only=True)
+
+        cumsum_node = Node(graph, 'cumsum')
+        CumSum.infer(cumsum_node)
+        self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+                                       np.array([14., 12., 9., 5., 0.]).astype(np.float32)))
+
+    def test_cumsum_value_prop_axis_1(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('data', '0:cumsum'),
+                             *connect('axis', '1:cumsum'),
+                             ('cumsum', 'cumsum_d', {'out': 0}),
+                             ('cumsum_d', 'output'),
+                             ],
+                            {'data_d': {'value': np.array([[1., 2., 3.], [4., 5., 6.]]).astype(np.float32),
+                                        'shape': [2, 3]},
+                             'axis_d': {'value': int64_array(1),
+                                        'shape': []},
+                             'cumsum': {'reverse': False, 'exclusive': False}
+                             }, nodes_with_edges_only=True)
+
+        cumsum_node = Node(graph, 'cumsum')
+        CumSum.infer(cumsum_node)
+        self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+                                       np.array([[1., 3., 6.], [4., 9., 15.]]).astype(np.float32)))
diff --git a/model-optimizer/extensions/ops/range.py b/model-optimizer/extensions/ops/range.py
index 4bb80c092ea..f806f1a26e2 100644
--- a/model-optimizer/extensions/ops/range.py
+++ b/model-optimizer/extensions/ops/range.py
@@ -23,52 +23,50 @@ from mo.ops.op import Op
 
 
 class Range(Op):
+    """
+    Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy
+    by default creates array with elements of type int64 and float64, but TF does not widen data types and
+    keep them int32 and float32.
+    Compare:
+
+    >>> tf.range(1, 5, 0.5)
+    <tf.Tensor 'range_1:0' shape = (8,) dtype = float32>
+    >>> tf.range(1, 5, 2)
+    <tf.Tensor 'range_2:0' shape = (2,) dtype = int32>
+
+    >>> np.array([0.5], dtype=np.float32)
+    array([0.5], dtype=float32)
+    >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype
+    dtype('int64')
+    >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype
+    dtype('float64')
+    """
     op = 'Range'
 
     def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
-            'type': __class__.op,
-            'op': __class__.op,
+            'type': self.op,
+            'op': self.op,
+
             'version': 'opset1',
+            'infer': self.infer,
+
             'in_ports_count': 3,
             'out_ports_count': 1,
-            'infer': __class__.infer,
         }
         super().__init__(graph, mandatory_props, attrs)
 
     @staticmethod
     def infer(node: Node):
-        start = node.in_node(0)
-        limit = node.in_node(1)
-        delta = node.in_node(2)
-        output = node.out_node()
+        name = node.soft_get('name', node.id)
+        connected_input_ports = [in_port.idx for in_port in node.in_ports().values() if not in_port.disconnected()]
+        assert len(connected_input_ports) == 3 and [0, 1, 2] == sorted(connected_input_ports), \
+            'Range operation should have 3 inputs, {} found for {}'.format(len(connected_input_ports), name)
 
-        if not start.has_valid('value') or not limit.has_valid('value') or not delta.has_valid('value'):
-            log.error("Range operation is supported with constant inputs only")
-            return
-        if node.has_valid('pb') and 'type' in node.pb.attr:
-            from mo.front.tf.extractors.utils import tf_dtype_extractor
-            result_data_type = tf_dtype_extractor(node.pb.attr["type"].type)
-        elif node.has_valid('dtype'):
-            result_data_type = node.dtype
-        else:
-            result_data_type = start.value.dtype
-        output.value = np.arange(start.value, limit.value, delta.value, dtype=result_data_type)
-        output.shape = np.array(output.value.shape, dtype=np.int64)
+        start = node.in_port(0).data.get_value()
+        limit = node.in_port(1).data.get_value()
+        delta = node.in_port(2).data.get_value()
 
-        # Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy
-        # by default creates array with elements of type int64 and float64, but TF does not widen data types and keep them
-        # int32 and float32.
-        # Compare:
-
-        # >>> tf.range(1, 5, 0.5)
-        # <tf.Tensor 'range_1:0' shape = (8,) dtype = float32>
-        # >>> tf.range(1, 5, 2)
-        # <tf.Tensor 'range_2:0' shape = (2,) dtype = int32>
-
-        # >>> np.array([0.5], dtype=np.float32)
-        # array([0.5], dtype=float32)
-        # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype
-        # dtype('int64')
-        # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype
-        # dtype('float64')
+        assert start is not None and limit is not None and delta is not None, \
+            'Range operation {} with dynamic inputs is not supported'.format(name)
+        node.out_port(0).data.set_value(np.arange(start, limit, delta, dtype=node.soft_get('dtype', start.dtype)))
diff --git a/model-optimizer/extensions/ops/shufflechannel.py b/model-optimizer/extensions/ops/shufflechannel.py
index ff65280f7db..4faf5c71849 100644
--- a/model-optimizer/extensions/ops/shufflechannel.py
+++ b/model-optimizer/extensions/ops/shufflechannel.py
@@ -13,23 +13,34 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-from mo.graph.graph import Graph
+from mo.graph.graph import Graph, Node
 from mo.ops.op import Op
 
 
-class ShuffleChannelOp(Op):
-    op = 'ShuffleChannel'
+class ShuffleChannels(Op):
+    op = 'ShuffleChannels'
     enabled = False
 
     def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'op': self.op,
-            'type': None,
+            'type': self.op,
+            'version': 'opset3',
 
-            # operation should be resolved on the front phase, partial inference is not needed
-            'infer': None,
+            'infer': self.infer,
+
+            'axis': 1,
+            'group': None,
 
             'in_ports_count': 1,
             'out_ports_count': 1,
         }, attrs)
+
+    def backend_attrs(self):
+        return ['group', 'axis']
+
+    @staticmethod
+    def infer(node: Node):
+        node_name = node.soft_get('name', node.id)
+        assert node.soft_get('group') is not None, 'The attribute "group" must be set for node {}'.format(node_name)
+        node.out_port(0).data.set_shape(node.in_port(0).data.get_shape())
diff --git a/model-optimizer/mo/front/mxnet/extractors/utils.py b/model-optimizer/mo/front/mxnet/extractors/utils.py
index 7914ed674bd..0be96531c95 100644
--- a/model-optimizer/mo/front/mxnet/extractors/utils.py
+++ b/model-optimizer/mo/front/mxnet/extractors/utils.py
@@ -15,6 +15,7 @@
 """
 
 import mxnet as mx
+import numpy as np
 
 from extensions.ops.elementwise import Elementwise
 from mo.graph.graph import Node, Graph
@@ -52,6 +53,11 @@ class AttrDictionary(object):
             return self._dict[key]
         return default
 
+    def dtype(self, key, default=None):
+        if self.is_valid and key in self._dict:
+            return mxnet_str_dtype_to_np(self._dict[key])
+        return default
+
     def bool(self, key, default=None):
         attr = self.str(key, default)
         if isinstance(attr, str):
@@ -143,7 +149,7 @@ def get_json_layer_attrs(json_dic):
     return json_dic[attr]
 
 
-def load_params(input_model, data_names = ('data',)):
+def load_params(input_model, data_names=('data',)):
     arg_params = {}
     aux_params = {}
     arg_keys = []
@@ -153,10 +159,10 @@ def load_params(input_model, data_names = ('data',)):
     if file_format == 'params':
         for key in loaded_weight:
             keys = key.split(':')
-            if len(keys)>1 and 'aux' == keys[0]:
+            if len(keys) > 1 and 'aux' == keys[0]:
                 aux_keys.append(keys[1])
                 aux_params[keys[1]] = loaded_weight[key]
-            elif len(keys)>1 and 'arg' == keys[0]:
+            elif len(keys) > 1 and 'arg' == keys[0]:
                 arg_keys.append(keys[1])
                 arg_params[keys[1]] = loaded_weight[key]
             else:
@@ -205,3 +211,17 @@ def scalar_ops_replacer(graph: Graph, node: Node, elementwise_op_type=Elementwis
     lin_node.in_port(1).get_connection().set_source(scalar_value.out_port(0))
     node.out_port(0).get_connection().set_source(lin_node.out_port(0))
     return lin_node
+
+
+MXNET_DATA_TYPES = {
+    'float16': np.float16,
+    'float32': np.float32,
+    'float64': np.float64,
+    'int8': np.int8,
+    'int32': np.int32,
+    'int64': np.int64,
+}
+
+
+def mxnet_str_dtype_to_np(dtype: str):
+    return MXNET_DATA_TYPES[dtype]
diff --git a/model-optimizer/mo/ops/slice.py b/model-optimizer/mo/ops/slice.py
index ae34af607db..f37c3cb7940 100644
--- a/model-optimizer/mo/ops/slice.py
+++ b/model-optimizer/mo/ops/slice.py
@@ -40,6 +40,7 @@ class Slice(Op):
 
     @staticmethod
     def infer(node: Node):
+        input_shape = node.in_port(0).data.get_shape()
         axis = None
         steps = None
         if len(node.in_nodes()) == 1:
@@ -90,6 +91,11 @@ class Slice(Op):
                     end = start + size
                     axis = None
 
+                    # Check for situation when size[i] == -1 in TF
+                    for i in range(start.size):
+                        if end[i] < start[i]:
+                            end[i] = input_shape[i]
+
                     # Delete edges to start, size nodes
                     node.graph.remove_edge(node.in_node(1).id, node.id)
                     node.graph.remove_edge(node.in_node(2).id, node.id)
@@ -104,16 +110,11 @@ class Slice(Op):
             log.warning('Incorrect number of input nodes in slice operation')
             return
 
-        input_shape = node.in_node(0).shape
-        # Check for situation when size[i] == -1 in TF
-        for i in range(start.size):
-            if end[i] < start[i]:
-                end[i] = input_shape[i]
         # Update end param
         node.end = end
         value = node.in_node(0).value
 
-        # If value is None create dummy vaue for shape propogation
+        # If value is None create dummy value for shape propagation
         if value is None:
             value = np.zeros(input_shape)
 
@@ -131,7 +132,6 @@ class Slice(Op):
             # Ranged for output value for specified axis
             slice_idx[axis[id]] = slice(start[id], end[id], steps[id])
 
-        # TODO: check whether this check is really important
         for axis, s in enumerate(slice_idx):
             if s is None:
                 slice_idx[axis] = slice(0, input_shape[axis], 1)
diff --git a/model-optimizer/mo/ops/slice_test.py b/model-optimizer/mo/ops/slice_test.py
index 500b7cb3bec..dac0c4fdb60 100644
--- a/model-optimizer/mo/ops/slice_test.py
+++ b/model-optimizer/mo/ops/slice_test.py
@@ -38,6 +38,16 @@ nodes_attributes = {
         'shape': None,
         'value': None,
     },
+    'starts': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    },
+    'ends': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    },
     'slice': {
         'op': 'Slice',
         'axis': None,
@@ -96,7 +106,7 @@ class TestSliceOp(unittest.TestCase):
         self.assertTrue(np.array_equal(slice_node['slices'], np.array([slice(1, 4, 1), slice(2, 3, 1), slice(0, 6, 1)])))
 
     def test_slice_infer_multiply_params(self):
-        # Test case when size[i] == -1 (that means all
+        # Test case for TF when size[i] == -1 (that means all
         # remaining elements in dimension i are included in the slice)
         graph = build_graph(nodes_attributes,
                             [('data_1', 'slice'),
@@ -115,3 +125,25 @@ class TestSliceOp(unittest.TestCase):
         self.assertTrue(np.array_equal(slice_node.out_node().value, None))
         self.assertTrue(np.array_equal(slice_node.out_node().shape, np.array([3, 3, 6])))
         self.assertTrue(np.array_equal(slice_node['slices'], np.array([slice(1, 4, 1), slice(2, 5, 1), slice(0, 6, 1)])))
+
+    def test_slice_onnx_10_opset_case(self):
+        # check for negative end value in the case of ONNX 10 opset
+        input = np.array([[4, 5, 6, 7], [2, 3, 5, 6], [5, 6, 8, 9], [5, 6, 8, 9]])
+        starts = np.array([0, 1])
+        ends = np.array([3, -2])
+        expected_values = np.array([[5], [3], [6]])
+
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'slice'),
+                             ('starts', 'slice'),
+                             ('ends', 'slice'),
+                             ('slice', 'data_2')],
+                            {'data_1': {'value': input, 'shape': input.shape},
+                             'starts': {'value': starts, 'shape': starts.shape},
+                             'ends': {'value': ends, 'shape': ends.shape},
+                             'slice': {'format': 'onnx'}})
+
+        slice_node = Node(graph, 'slice')
+
+        Slice.infer(slice_node)
+        self.assertTrue(np.array_equal(slice_node.out_node().value, expected_values))
diff --git a/ngraph/ABOUT.md b/ngraph/ABOUT.md
index e116abb4263..37c7e06db90 100644
--- a/ngraph/ABOUT.md
+++ b/ngraph/ABOUT.md
@@ -1,18 +1,6 @@
 About nGraph Compiler stack
 ===========================
 
-nGraph Compiler stack architecture
-----------------------------------
-
-The diagram below represents our current release stack. In the diagram, 
-nGraph components are colored in gray. Please note
-that the stack diagram is simplified to show how nGraph executes deep
-learning workloads with two hardware backends; however, many other
-deep learning frameworks and backends currently are functioning.
-
-![](doc/sphinx/source/graphics/ngraph_arch_diag.png)
-
-
 ## Bridge
 
 Starting from the top of the stack, nGraph receives a computational graph
@@ -44,21 +32,6 @@ ResNet for TensorFlow, the same optimization can be readily applied
 to MXNet* or ONNX* implementations of ResNet.
 
 
-## Hybrid Transformer
-
-Hybrid transformer takes the nGraph IR, and partitions it into
-subgraphs, which can then be assigned to the best-performing backend.
-There are two hardware backends shown in the stack diagram to demonstrate
-this graph partitioning. The Hybrid transformer assigns complex operations
-(subgraphs) to Intel® Nervana™ Neural Network Processor (NNP) to expedite the
-computation, and the remaining operations default to CPU. In the future,
-we will further expand the capabilities of Hybrid transformer
-by enabling more features, such as localized cost modeling and memory
-sharing.
-
-Once the subgraphs are assigned, the corresponding backend will
-execute the IR.
-
 Features
 --------
 
@@ -71,24 +44,3 @@ non-device-specific optimizations:
     available device.
 -   **Data reuse** -- Save results and reuse for subgraphs with the
     same input.
--   **Graph scheduling** -- Run similar subgraphs in parallel via
-    multi-threading.
--   **Graph partitioning** -- Partition subgraphs to run on different
-    devices to speed up computation; make better use of spare CPU cycles
-    with nGraph.
--   **Memory management** -- Prevent peak memory usage by intercepting
-    a graph with or by a "saved checkpoint," and to enable data auditing.
-
-Limitations
------------
-
-The Beta release of nGraph only supports Just-In-Time (JiT) compilation; 
-Ahead-of Time (AoT) compilation will be supported in the official release. 
-nGraph currently has limited support for dynamic shapes.
-
-
-Current nGraph Compiler full stack
-----------------------------------
-
-![](doc/sphinx/source/graphics/ngraph_full_stack_diagrams.png)
-
diff --git a/ngraph/CMakeLists.txt b/ngraph/CMakeLists.txt
index 40781879c17..c55dc2ad412 100644
--- a/ngraph/CMakeLists.txt
+++ b/ngraph/CMakeLists.txt
@@ -112,7 +112,6 @@ endif()
 
 option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
 option(NGRAPH_TEST_UTIL_ENABLE "Control the building of test utility" TRUE)
-option(NGRAPH_DOC_BUILD_ENABLE "Control the building of documentation" FALSE)
 option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
 option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE)
 option(NGRAPH_DEPRECATED_ENABLE "Enable compiler deprecation pragmas for deprecated APIs (recommended only for development use)" FALSE)
@@ -150,7 +149,6 @@ endmacro()
 
 NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE)
 NORMALIZE_BOOL(NGRAPH_TEST_UTIL_ENABLE)
-NORMALIZE_BOOL(NGRAPH_DOC_BUILD_ENABLE)
 NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE)
 NORMALIZE_BOOL(NGRAPH_DEBUG_ENABLE)
 NORMALIZE_BOOL(NGRAPH_DEPRECATED_ENABLE)
@@ -172,7 +170,6 @@ message(STATUS "NGRAPH_ADDRESS_SANITIZER_ENABLE:      ${NGRAPH_ADDRESS_SANITIZER
 message(STATUS "NGRAPH_CODE_COVERAGE_ENABLE:          ${NGRAPH_CODE_COVERAGE_ENABLE}")
 message(STATUS "NGRAPH_DEBUG_ENABLE:                  ${NGRAPH_DEBUG_ENABLE}")
 message(STATUS "NGRAPH_DEPRECATED_ENABLE:             ${NGRAPH_DEPRECATED_ENABLE}")
-message(STATUS "NGRAPH_DOC_BUILD_ENABLE:              ${NGRAPH_DOC_BUILD_ENABLE}")
 message(STATUS "NGRAPH_DYNAMIC_COMPONENTS_ENABLE:     ${NGRAPH_DYNAMIC_COMPONENTS_ENABLE}")
 message(STATUS "NGRAPH_EXPORT_TARGETS_ENABLE:         ${NGRAPH_EXPORT_TARGETS_ENABLE}")
 message(STATUS "NGRAPH_IE_ENABLE:                     ${NGRAPH_IE_ENABLE}")
@@ -211,6 +208,13 @@ set(NGRAPH_FORWARD_CMAKE_ARGS
     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${CMAKE_POSITION_INDEPENDENT_CODE}
     )
 
+if(CMAKE_TOOLCHAIN_FILE)
+    set(NGRAPH_FORWARD_CMAKE_ARGS
+            ${NGRAPH_FORWARD_CMAKE_ARGS}
+            -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
+            )
+endif()
+
 if (CMAKE_OSX_SYSROOT)
     set(NGRAPH_FORWARD_CMAKE_ARGS
             ${NGRAPH_FORWARD_CMAKE_ARGS}
@@ -496,10 +500,6 @@ endif()
 
 add_subdirectory(test)
 
-if (NGRAPH_DOC_BUILD_ENABLE)
-    add_subdirectory(doc)
-endif()
-
 if (NGRAPH_PYTHON_BUILD_ENABLE)
     add_subdirectory(python)
 endif()
diff --git a/ngraph/CONTRIB.md b/ngraph/CONTRIB.md
deleted file mode 100644
index b8b7f07b04b..00000000000
--- a/ngraph/CONTRIB.md
+++ /dev/null
@@ -1,241 +0,0 @@
-Contributor Guidelines
-======================
-
-The latest version of this file can be found at: 
-
-https://www.ngraph.ai/documentation/contributing/guide
-
-License
--------
-
-All contributed code must be compatible with the [Apache
-2](https://www.apache.org/licenses/LICENSE-2.0) license, preferably by
-being contributed under the Apache 2 license. Code contributed with
-another license will need the license reviewed by Intel before it can be
-accepted.
-
-Code formatting
----------------
-
-All C/C++ source code in the repository, including the test code, must
-adhere to the source-code formatting and style guidelines described
-here. The coding style described here applies to the nGraph repository.
-Related repositories may make adjustements to better match the coding
-styles of libraries they are using.
-
-### Adding ops to nGraph Core
-
-Our design philosophy is that the graph is not a script for running
-optimized kernels; rather, the graph is a specification for a
-computation composed of basic building blocks which we call `ops`.
-Compilation should match groups of `ops` to appropriate optimal and
-semantically-equivalent groups of kernels for the backend(s) in use.
-Thus, we expect that adding of new Core ops should be infrequent and
-that most functionality instead gets added with new functions that build
-sub-graphs from existing core ops.
-
-### Coding style
-
-We have a coding standard to help us to get development done. If part of
-the standard is impeding progress, we either adjust that part or remove
-it. To this end, we employ coding standards that facilitate
-understanding of *what nGraph components are doing*. Programs are
-easiest to understand when they can be understood locally; if most local
-changes have local impact, you do not need to dig through multiple files
-to understand what something does and if it is safe to modify.
-
-#### Names
-
-Names should *briefly* describe the thing being named and follow these
-casing standards:
-
--   Define C++ class or type names with `CamelCase`.
--   Assign template parameters with `UPPER_SNAKE_CASE`.
--   Case variable and function names with `lower_snake_case`.
-
-Method names for basic accessors are prefixed by `get_`, `is_`, or
-`set_` and should have simple $\mathcal{O}(1)$ implementations:
-
--   A `get_` method should be externally idempotent. It may perform some
-    simple initialization and cache the result for later use. Trivial
-    `get_` methods can be defined in a header file. If a method is
-    non-trivial, that is often a sign that it is not a basic accessor.
--   An `is_` may be used instead of `get_` for boolean accessors.
--   A `set_` method should change the value returned by the
-    corresponding `get_` method.
-    -   Use `set_is_` if using `is_` to get a value.
-    -   Trivial `set_` methods may be defined in a header file.
--   Names of variables should indicate the use of the variable.
-    -   Member variables should be prefixed with `m_`.
-    -   Static member variables should be rare and be prefixed with
-        `s_`.
--   Do not use `using` to define a type alias at top-level in header
-    file. If the abstraction is useful, give it a class.
-    -   C++ does not enforce the abstraction. For example if `X` and `Y`
-        are aliases for the same type, you can pass an `X` to something
-        expecting a `Y`.
-    -   If one of the aliases were later changed, or turned into a real
-        type, many callers could require changes.
-
-#### Namespaces
-
--   `ngraph` is for the public API, although this is not
-    currently enforced.
-    -   Use a nested namespace for implementation classes.
-    -   Use an unnamed namespace or `static` for file-local names. This
-        helps prevent unintended name collisions during linking and when
-        using shared and dynamically-loaded libraries.
-    -   Never use `using` at top-level in a header file.
-
-        -   Doing so leaks the alias into users of the header, including
-            headers that follow.
-
-        - It is okay to use `using` with local scope, such as inside a class 
-          definiton.
-
-    -   Be careful of C++'s implicit namespace inclusions. For example,
-        if a parameter's type is from another namespace, that namespace
-        can be visible in the body.
-    -   Only use `using std` and/or `using ngraph` in `.cpp` files.
-        `using` a nested namespace has can result in
-        unexpected behavior.
-
-#### File Names
-
--   Do not use the same file name in multiple directories. At least one
-    IDE/debugger ignores the directory name when setting breakpoints.
--   Use `.hpp` for headers and `.cpp` for implementation.
--   Reflect the namespace nesting in the directory hierarchy.
--   Unit test files are in the `tests` directory.
-    -   Transformer-dependent tests are tests running on the default
-        transformer or specifying a transformer. For these, use the form
-
-        ``` 
-        TEST(file_name, test_name)
-        ```
-
-    -   Transformer-independent tests:
-        -   File name is `file_name.in.cpp`
-        -   Add `#include "test_control.hpp"` to the file's includes
-        -   Add the line
-            `static std::string s_manifest = "${MANIFEST}";` to the top
-            of the file.
-        -   Use
-
-            ``` 
-            NGRAPH_TEST(${BACKEND_NAME}, test_name)
-            ```
-
-            for each test. Files are generated for each transformer and
-            the `${BACKEND_NAME}` is replaced with the transformer name.
-
-            Individual unit tests may be disabled by adding the name of
-            the test to the `unit_test.manifest` file found in the
-            transformer's source file directory.
-
-#### Formatting
-
-Things that look different should look different because they are
-different. We use **clang format** to enforce certain formatting.
-Although not always ideal, it is automatically enforced and reduces
-merge conflicts.
-
--   The .clang-format file located in the root of the project specifies
-    our format.
-    -   The script maint/apply-code-format.sh enforces that formatting
-        at the C/C++ syntactic level.
-    -   The script at maint/check-code-format.sh verifies that the
-        formatting rules are met by all C/C++ code (again, at the
-        syntax level). The script has an exit code of `0` when code
-        meets the standard and non-zero otherwise. This script does
-        *not* modify the source code.
--   Formatting with `#include` files:
-    -   Put headers in groups separated by a blank line. Logically order
-        the groups downward from system-level to 3rd-party to `ngraph`.
-    -   Formatting will keep the files in each group in
-        alphabetic order.
-    -   Use this syntax for files that **do not change during nGraph
-        development**; they will not be checked for changes
-        during builds. Normally this will be everything but the ngraph
-        files:
-
-        ``` 
-        #include <file>
-        ```
-
-    -   Use this syntax for files that **are changing during nGraph
-        development**; they will be checked for changes during builds.
-        Normally this will be ngraph headers:
-
-        ``` 
-        #include "file"
-        ```
-
-    -   Use this syntax for system C headers with C++ wrappers:
-
-        ``` 
-        #include <c...>
-        ```
-
--   To guard against multiple inclusion, use:
-
-    ``` 
-    #pragma once
-    ```
-
-    -   The syntax is a compiler extension that has been adopted by all
-        supported compilers.
--   The initialization
-
-    ``` 
-    Foo x{4, 5};
-    ```
-
-    is preferred over
-
-    ``` 
-    Foo x(4, 5);
-    ```
-
--   Indentation should be accompanied by braces; this includes
-    single-line bodies for conditionals and loops.
--   Exception checking:
-    -   Throw an exception to report a problem.
-    -   Nothing that calls `abort`, `exit` or `terminate` should
-        be used. Remember that ngraph is a guest of the framework.
-    -   Do not use exclamation points in messages!
-    -   Be as specific as practical. Keep in mind that the person who
-        sees the error is likely to be on the other side of the
-        framework and the message might be the only information they see
-        about the problem.
--   If you use `auto`, know what you are doing. `auto` uses the same
-    type-stripping rules as template parameters. If something returns a
-    reference, `auto` will strip the reference unless you use `auto&`:
-    -   Don't do things like
-
-        ``` 
-        auto s = Shape{2,3};
-        ```
-
-        Instead, use
-
-        ``` 
-        Shape s{2, 3};
-        ```
-
-    -   Indicate the type in the variable name.
-
--   One variable declaration/definition per line
-    -   Don't use the C-style
-
-        ``` 
-        int x, y, *z;
-        ```
-
-        Instead, use:
-
-        ``` 
-        int x;
-        int y;
-        int* z;
-        ```
diff --git a/ngraph/README.md b/ngraph/README.md
deleted file mode 100644
index cabc210949e..00000000000
--- a/ngraph/README.md
+++ /dev/null
@@ -1,109 +0,0 @@
-![nGraph Compiler stack](doc/sphinx/source/graphics/ngraph_header.png)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/NervanaSystems/ngraph/blob/master/LICENSE) [![Build Status][build-status-badge]][build-status] 
-
-<div align="left">
-  <h4>
-    <a href="./ABOUT.md">Architecture &amp; features</a> | <a href="./ecosystem-overview.md" >Ecosystem</a> | <a href="https://www.ngraph.ai/documentation/project/release-notes">Release notes</a><span> | </span> <a href="https://www.ngraph.ai/documentation">Documentation</a><span> | </span> <a href="#How-to-contribute" >Contribution guide</a>
- </h4>
-</div>
-
-## Quick start
-
-To begin using nGraph with popular frameworks, please refer to the links below. 
-
-|  Framework (Version)       | Installation guide                     | Notes  
-|----------------------------|----------------------------------------|-----------------------------------
-| TensorFlow*                | [Pip install](https://www.ngraph.ai/tutorials/tensorflow-tutorial#use-pre-built-packages) or [Build from source](https://www.ngraph.ai/tutorials/tensorflow-tutorial#build-from-source) | 20 [Validated workloads]   
-| ONNX 1.5                   | [Pip install](https://www.ngraph.ai/tutorials/onnx-tutorial#use-pre-built-packages)                          | 17 [Validated workloads] 
-
-
-#### Python wheels for nGraph 
-
-The Python wheels for nGraph have been tested and are supported on the following 
-64-bit systems:
-
-* Ubuntu 16.04 or later
-* CentOS 7.6
-* Debian 10
-* macOS 10.14.3 (Mojave)
-
-To install via pip, run: 
-
-```
-pip install --upgrade pip==19.3.1
-pip install ngraph-core
-```
-
-
-Frameworks using nGraph Compiler stack to execute workloads have shown 
-[**up to 45X**](https://ai.intel.com/ngraph-compiler-stack-beta-release/) 
-performance boost when compared to native framework implementations. We've also 
-seen performance boosts running workloads that are not included on the list of  
-[Validated workloads], thanks to nGraph's powerful subgraph pattern matching.
-
-Additionally we have integrated nGraph with [PlaidML] to provide deep learning 
-performance acceleration on Intel, nVidia, & AMD GPUs. More details on current 
-architecture of the nGraph Compiler stack can be found in [Architecture and features],
-and recent changes to the stack are explained in the [Release Notes].
-
-## What is nGraph Compiler? 
-
-nGraph Compiler aims to accelerate developing AI workloads using any deep learning
-framework and deploying to a variety of hardware targets. We strongly believe in 
-providing freedom, performance, and ease-of-use to AI developers. 
-
-The diagram below shows deep learning frameworks and hardware targets
-supported by nGraph. NNP-T and NNP-I in the diagram refer to Intel's next generation 
-deep learning accelerators: Intel® Nervana™ Neural Network Processor for Training and 
-Inference respectively.  Future plans for supporting addtional deep learning frameworks 
-and backends are outlined in the [ecosystem] section. 
-
-![](doc/sphinx/source/graphics/nGraph_main.png)
-
-
-Our documentation has extensive information about how to use nGraph Compiler 
-stack to create an nGraph computational graph, integrate custom frameworks, 
-and to interact with supported backends. If you wish to contribute to the 
-project, please don't hesitate to ask questions in [GitHub issues] after 
-reviewing our contribution guide below. 
-	
-
-## How to contribute
-
-We welcome community contributions to nGraph. If you have an idea how
-to improve it:
-
-* See the [contrib guide] for code formatting and style guidelines.
-* Share your proposal via [GitHub issues].
-* Ensure you can build the product and run all the examples with your patch.
-* In the case of a larger feature, create a test.
-* Submit a [pull request].
-* Make sure your PR passes all CI tests. Note: You can test locally with `make check`.
-
- We will review your contribution and, if any additional fixes or modifications are 
- necessary, may provide feedback to guide you. When accepted, your pull request will 
- be merged to the repository.
-
-
-[Ecosystem]: ./ecosystem-overview.md
-[Architecture and features]: ./ABOUT.md
-[Documentation]: https://www.ngraph.ai/documentation
-[build the Library]:  https://www.ngraph.ai/documentation/buildlb
-[Getting Started Guides]: Getting-started-guides
-[Validated workloads]: https://www.ngraph.ai/documentation/frameworks/validated/list
-[Functional]: https://github.com/NervanaSystems/ngraph-onnx/ 
-[How to contribute]: How-to-contribute
-[framework integration guides]: https://ngraph.ai/documentation/frameworks/overview
-[release notes]: https://www.ngraph.ai/documentation/project/release-notes
-[Github issues]: https://github.com/NervanaSystems/ngraph/issues
-[contrib guide]: https://www.ngraph.ai/documentation/contributing/guide
-[pull request]: https://github.com/NervanaSystems/ngraph/pulls
-[how to import]: https://www.ngraph.ai/tutorials/onnx-tutorial#import-a-model-with-onnx-and-ngraph
-[ngraph_wireframes_with_notice]: doc/sphinx/source/graphics/nGraph_main.png "nGraph components"
-[build-status]: https://travis-ci.org/NervanaSystems/ngraph/branches
-[build-status-badge]: https://travis-ci.org/NervanaSystems/ngraph.svg?branch=master
-[PlaidML]: https://github.com/plaidml/plaidml
-[Source compile]: https://github.com/NervanaSystems/ngraph-mxnet/blob/master/README.md
-[nGraph-ONNX]: https://github.com/NervanaSystems/ngraph-onnx/blob/master/README.md
-[nGraph-ONNX adaptable]: https://ai.intel.com/adaptable-deep-learning-solutions-with-ngraph-compiler-and-onnx/
-[nGraph for PyTorch developers]: https://ai.intel.com/investing-in-the-pytorch-developer-community
diff --git a/ngraph/cmake/external_onnx.cmake b/ngraph/cmake/external_onnx.cmake
index a4a5c80d1c1..4258f043d4a 100644
--- a/ngraph/cmake/external_onnx.cmake
+++ b/ngraph/cmake/external_onnx.cmake
@@ -40,6 +40,7 @@ add_definitions(-DONNX_NAMESPACE=${NGRAPH_ONNX_NAMESPACE})
 set(CMAKE_CXX_FLAGS ${CMAKE_ORIGINAL_CXX_FLAGS})
 
 if (WIN32)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4251")
     string(REPLACE "/W3" "/W0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
diff --git a/ngraph/ecosystem-overview.md b/ngraph/ecosystem-overview.md
deleted file mode 100644
index aef5be475d2..00000000000
--- a/ngraph/ecosystem-overview.md
+++ /dev/null
@@ -1,43 +0,0 @@
-
-# Framework & runtime support
-
-One of nGraph’s key features is framework neutrality. We currently support 
-popular deep learning frameworks such as TensorFlow and MXNet with stable 
-bridges to pass computational graphs to nGraph. Additionally nGraph 
-Compiler has a functional bridge to PaddlePaddle. 
-For these frameworks, we have successfully tested functionality with a few 
-deep learning workloads, and we plan to bring stable support for them in the 
-upcoming releases. 
-
-To further promote framework neutrality, the nGraph team has been actively 
-contributing to the ONNX project. Developers who already have a "trained" 
-DNN (Deep Neural Network) model can use nGraph to bypass significant 
-framework-based complexity and [import it] to test or run on targeted and 
-efficient backends with our user-friendly Python-based API.
-
-nGraph is also integrated as an execution provider for [ONNX Runtime], 
-which is the first publicly available inference engine for ONNX.
-
-The table below summarizes our current progress on supported frameworks. 
-If you are an architect of a framework wishing to take advantage of speed 
-and multi-device support of nGraph Compiler, please refer to [Framework integration guide] section.  
-
-
-|  Framework & Runtime       | Supported          |  Validated 
-|----------------------------|--------------------|-------------
-| TensorFlow* 1.12           | :heavy_check_mark: |  :heavy_check_mark:
-| MXNet* 1.3                 | :heavy_check_mark: |  :heavy_check_mark:
-| ONNX 1.3                   | :heavy_check_mark: |  :heavy_check_mark:
-| ONNX Runtime               | Functional         |  No
-| PaddlePaddle               | Functional         |  No
-
-
-
-
-[Architecture and features]: ./ABOUT.md
-[Upcoming DL accelerators]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/vision-accelerator-design-product-brief.pdf
-[import it]: https://ngraph.nervanasys.com/docs/latest/core/constructing-graphs/import.html
-[ONNX Runtime]: https://azure.microsoft.com/en-us/blog/onnx-runtime-is-now-open-source/
-[WinML]: http://docs.microsoft.com/en-us/windows/ai
-[How to]: https://ngraph.nervanasys.com/docs/latest/howto/index.html
-[Framework integration guide]: https://ngraph.nervanasys.com/docs/latest/frameworks/index.html
diff --git a/ngraph/python/build_wheel.py.in b/ngraph/python/build_wheel.py.in
index 3f70d3c118d..0cba66758ea 100644
--- a/ngraph/python/build_wheel.py.in
+++ b/ngraph/python/build_wheel.py.in
@@ -66,6 +66,8 @@ try:
 except subprocess.CalledProcessError as err:
     print("Could not complete the wheel building process")
     print("Command that failed: ", err.cmd)
-    print("Command std output: ", err.stdout.decode('utf-8'))
-    print("Command err output: ", err.stderr.decode('utf-8'))
+    if err.stdout is not None:
+        print("Command std output: ", err.stdout.decode('utf-8'))
+    if err.stderr is not None:
+        print("Command err output: ", err.stderr.decode('utf-8'))
     sys.exit(1)
diff --git a/ngraph/python/setup.py b/ngraph/python/setup.py
index 7c445ae2bf4..f3bf421c5d8 100644
--- a/ngraph/python/setup.py
+++ b/ngraph/python/setup.py
@@ -381,7 +381,7 @@ setup(
     long_description=open(os.path.join(PYNGRAPH_ROOT_DIR, "README.md")).read(),
     long_description_content_type="text/markdown",
     ext_modules=ext_modules,
-    package_dir={"": "src"},
+    package_dir={'': PYNGRAPH_SRC_DIR},
     packages=packages,
     cmdclass={"build_ext": BuildExt},
     data_files=data_files,
diff --git a/ngraph/python/src/ngraph/__init__.py b/ngraph/python/src/ngraph/__init__.py
index 96f45b7188d..c94e882dd86 100644
--- a/ngraph/python/src/ngraph/__init__.py
+++ b/ngraph/python/src/ngraph/__init__.py
@@ -126,7 +126,6 @@ from ngraph.ops import rnn_cell
 from ngraph.ops import roi_align
 from ngraph.ops import roi_pooling
 from ngraph.ops import scatter_elements_update
-from ngraph.ops import scatter_nd_update
 from ngraph.ops import scatter_update
 from ngraph.ops import select
 from ngraph.ops import selu
diff --git a/ngraph/python/src/ngraph/ops.py b/ngraph/python/src/ngraph/ops.py
index 06513940e03..58c0f8970bf 100644
--- a/ngraph/python/src/ngraph/ops.py
+++ b/ngraph/python/src/ngraph/ops.py
@@ -2631,25 +2631,6 @@ def result(data: NodeInput, name: Optional[str] = None) -> Node:
     return _get_node_factory().create("Result", [data])
 
 
-@nameable_op
-def scatter_nd_update(
-    data: NodeInput, indices: NodeInput, updates: NodeInput, name: str = None
-) -> Node:
-    """Return a node which produces a ScatterNDUpdate operation.
-
-    ScatterNDUpdate creates a copy of the first input tensor
-    with updated elements specified with second and third input tensors.
-
-    :param data:    The input tensor to be updated.
-    :param indices: The tensor with indexes which will be updated.
-    :param updates: The tensor with update values.
-    :param name:    Optional name for output node.
-    :return: ScatterNDUpdate node
-    """
-    node_inputs = as_nodes(data, indices, updates)
-    return _get_node_factory().create("ScatterNDUpdate", node_inputs)
-
-
 @nameable_op
 def scatter_update(
     data: Node, indices: NodeInput, updates: NodeInput, axis: NodeInput, name: Optional[str] = None
diff --git a/ngraph/python/src/ngraph/runtime.py b/ngraph/python/src/ngraph/runtime.py
index e11b48f97de..abb6bb8f28a 100644
--- a/ngraph/python/src/ngraph/runtime.py
+++ b/ngraph/python/src/ngraph/runtime.py
@@ -16,31 +16,42 @@
 """Provide a layer of abstraction for the ngraph++ runtime environment."""
 import logging
 from typing import Dict, List, Union
+from enum import Enum
 
 import numpy as np
 
 from ngraph.exceptions import UserInputError
-from ngraph.impl import Function, Node, Shape, serialize, util
+from ngraph.impl import Function, Node, Shape, PartialShape, serialize, util
 from ngraph.impl.runtime import Backend, Executable, Tensor
 from ngraph.utils.types import NumericData, get_dtype
 
 log = logging.getLogger(__name__)
 
 
-def runtime(backend_name: str = "CPU") -> "Runtime":
+class BackendMode(Enum):
+    """DYNAMIC mode enables backend's wrapper which supports dynamic shapes."""
+
+    STATIC = 0
+    DYNAMIC = 1
+
+
+def runtime(backend_name: str = "CPU", mode: BackendMode = BackendMode.STATIC) -> "Runtime":
     """Create a Runtime object (helper factory).
 
     Use signature to parameterize runtime as needed.
     """
-    return Runtime(backend_name)
+    return Runtime(backend_name, mode)
 
 
 class Runtime:
     """Represents the ngraph++ runtime environment."""
 
-    def __init__(self, backend_name: str) -> None:
+    def __init__(self, backend_name: str, mode: BackendMode = BackendMode.STATIC) -> None:
         self.backend_name = backend_name
-        self.backend = Backend.create(backend_name)
+        if mode == BackendMode.DYNAMIC:
+            self.backend = Backend.create_dynamic(backend_name)
+        else:
+            self.backend = Backend.create(backend_name)
 
     def set_config(self, config: Dict[str, str]) -> None:
         """Set the backend configuration."""
@@ -83,9 +94,15 @@ class Computation(object):
 
         self.result_views = []  # type: List[Tensor]
         for result in self.results:
-            shape = result.get_shape()
             element_type = result.get_element_type()
-            self.result_views.append(runtime.backend.create_tensor(element_type, shape))
+            if self.function.is_dynamic():
+                output_pshape = result.get_output_partial_shape(0)
+                output_tensor = runtime.backend.create_dynamic_tensor(element_type, output_pshape)
+                self.result_views.append(output_tensor)
+            else:
+                output_shape = result.get_shape()
+                output_tensor = runtime.backend.create_tensor(element_type, output_shape)
+                self.result_views.append(output_tensor)
 
     def __repr__(self) -> str:
         params_string = ", ".join([param.name for param in self.parameters])
@@ -98,7 +115,10 @@ class Computation(object):
                 value = np.array(value)
             Computation._write_ndarray_to_tensor_view(value, tensor_view)
 
-        self.handle.call(self.result_views, self.tensor_views)
+        if self.function.is_dynamic():
+            self.handle.call_with_validate(self.result_views, self.tensor_views)
+        else:
+            self.handle.call(self.result_views, self.tensor_views)
 
         results = []
         for result_view in self.result_views:
diff --git a/ngraph/python/src/pyngraph/function.cpp b/ngraph/python/src/pyngraph/function.cpp
index 4751596fb0b..1c9364b5731 100644
--- a/ngraph/python/src/pyngraph/function.cpp
+++ b/ngraph/python/src/pyngraph/function.cpp
@@ -41,12 +41,14 @@ void regclass_pyngraph_Function(py::module m)
     function.def("get_output_op", &ngraph::Function::get_output_op);
     function.def("get_output_element_type", &ngraph::Function::get_output_element_type);
     function.def("get_output_shape", &ngraph::Function::get_output_shape);
+    function.def("get_output_partial_shape", &ngraph::Function::get_output_partial_shape);
     function.def("get_parameters", &ngraph::Function::get_parameters);
     function.def("get_results", &ngraph::Function::get_results);
     function.def("get_result", &ngraph::Function::get_result);
     function.def("get_unique_name", &ngraph::Function::get_name);
     function.def("get_name", &ngraph::Function::get_friendly_name);
     function.def("set_friendly_name", &ngraph::Function::set_friendly_name);
+    function.def("is_dynamic", &ngraph::Function::is_dynamic);
     function.def("__repr__", [](const ngraph::Function& self) {
         std::string class_name = py::cast(self).get_type().attr("__name__").cast<std::string>();
         std::string shape =
diff --git a/ngraph/python/src/pyngraph/node.cpp b/ngraph/python/src/pyngraph/node.cpp
index a51c8aac529..a8f42c69986 100644
--- a/ngraph/python/src/pyngraph/node.cpp
+++ b/ngraph/python/src/pyngraph/node.cpp
@@ -74,6 +74,7 @@ void regclass_pyngraph_Node(py::module m)
     node.def("get_output_element_type", &ngraph::Node::get_output_element_type);
     node.def("get_element_type", &ngraph::Node::get_element_type);
     node.def("get_output_shape", &ngraph::Node::get_output_shape);
+    node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape);
     node.def("get_shape", &ngraph::Node::get_shape);
     node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape);
     node.def("get_type_name", &ngraph::Node::get_type_name);
diff --git a/ngraph/python/src/pyngraph/node_factory.cpp b/ngraph/python/src/pyngraph/node_factory.cpp
index 160039a3de4..09597a78eeb 100644
--- a/ngraph/python/src/pyngraph/node_factory.cpp
+++ b/ngraph/python/src/pyngraph/node_factory.cpp
@@ -46,20 +46,6 @@ namespace
         {
         }
 
-        virtual void on_attribute(const std::string& name, std::string& value) override
-        {
-            if (m_attributes.contains(name))
-            {
-                value = m_attributes[name.c_str()].cast<std::string>();
-            }
-        }
-        virtual void on_attribute(const std::string& name, bool& value) override
-        {
-            if (m_attributes.contains(name))
-            {
-                value = m_attributes[name.c_str()].cast<bool>();
-            }
-        }
         virtual void on_adapter(const std::string& name,
                                 ngraph::ValueAccessor<void>& adapter) override
         {
@@ -69,6 +55,14 @@ namespace
                     false, "No AttributeVisitor support for accessing attribute named: ", name);
             }
         }
+        virtual void on_adapter(const std::string& name,
+                                ngraph::ValueAccessor<bool>& adapter) override
+        {
+            if (m_attributes.contains(name))
+            {
+                adapter.set(m_attributes[name.c_str()].cast<bool>());
+            }
+        }
         virtual void on_adapter(const std::string& name,
                                 ngraph::ValueAccessor<std::string>& adapter) override
         {
diff --git a/ngraph/python/src/pyngraph/runtime/backend.cpp b/ngraph/python/src/pyngraph/runtime/backend.cpp
index 888533bc1af..c2782caea56 100644
--- a/ngraph/python/src/pyngraph/runtime/backend.cpp
+++ b/ngraph/python/src/pyngraph/runtime/backend.cpp
@@ -23,6 +23,18 @@
 
 namespace py = pybind11;
 
+static std::shared_ptr<ngraph::runtime::Backend> create_static(const std::string& type)
+{
+    bool must_support_dynamic = false;
+    return ngraph::runtime::Backend::create(type, must_support_dynamic);
+}
+
+static std::shared_ptr<ngraph::runtime::Backend> create_dynamic(const std::string& type)
+{
+    bool must_support_dynamic = true;
+    return ngraph::runtime::Backend::create(type, must_support_dynamic);
+}
+
 static std::shared_ptr<ngraph::runtime::Executable> compile(ngraph::runtime::Backend* self,
                                                             std::shared_ptr<ngraph::Function> func)
 {
@@ -30,23 +42,22 @@ static std::shared_ptr<ngraph::runtime::Executable> compile(ngraph::runtime::Bac
     return self->compile(func, enable_performance_data);
 }
 
-static std::shared_ptr<ngraph::runtime::Backend> create(const std::string& type)
-{
-    bool must_support_dynamic = false;
-    return ngraph::runtime::Backend::create(type, must_support_dynamic);
-}
-
 void regclass_pyngraph_runtime_Backend(py::module m)
 {
     py::class_<ngraph::runtime::Backend, std::shared_ptr<ngraph::runtime::Backend>> backend(
         m, "Backend");
     backend.doc() = "ngraph.impl.runtime.Backend wraps ngraph::runtime::Backend";
-    backend.def_static("create", &create);
+    backend.def_static("create", &create_static);
+    backend.def_static("create_dynamic", &create_dynamic);
     backend.def_static("get_registered_devices", &ngraph::runtime::Backend::get_registered_devices);
     backend.def("create_tensor",
                 (std::shared_ptr<ngraph::runtime::Tensor>(ngraph::runtime::Backend::*)(
                     const ngraph::element::Type&, const ngraph::Shape&)) &
                     ngraph::runtime::Backend::create_tensor);
+    backend.def("create_dynamic_tensor",
+                (std::shared_ptr<ngraph::runtime::Tensor>(ngraph::runtime::Backend::*)(
+                    const ngraph::element::Type&, const ngraph::PartialShape&)) &
+                    ngraph::runtime::Backend::create_dynamic_tensor);
     backend.def("compile", &compile);
     backend.def("set_config", &ngraph::runtime::Backend::set_config);
 }
diff --git a/ngraph/python/src/pyngraph/runtime/executable.cpp b/ngraph/python/src/pyngraph/runtime/executable.cpp
index d557aab9867..09f9f1429ad 100644
--- a/ngraph/python/src/pyngraph/runtime/executable.cpp
+++ b/ngraph/python/src/pyngraph/runtime/executable.cpp
@@ -33,6 +33,11 @@ void regclass_pyngraph_runtime_Executable(py::module m)
                        const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
                        const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
                        ngraph::runtime::Executable::call);
+    executable.def("call_with_validate",
+                   (bool (ngraph::runtime::Executable::*)(
+                       const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
+                       const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
+                       ngraph::runtime::Executable::call_with_validate);
     executable.def(
         "get_performance_data",
         (std::vector<ngraph::runtime::PerformanceCounter>(ngraph::runtime::Executable::*)()) &
diff --git a/ngraph/python/test/ngraph/test_basic.py b/ngraph/python/test/ngraph/test_basic.py
index 487d726432b..e9ca0cd7b43 100644
--- a/ngraph/python/test/ngraph/test_basic.py
+++ b/ngraph/python/test/ngraph/test_basic.py
@@ -240,9 +240,8 @@ def test_constant_get_data_floating_point(data_type):
 @pytest.mark.parametrize("data_type", [np.int64, np.int32, np.int16, np.int8])
 def test_constant_get_data_signed_integer(data_type):
     np.random.seed(133391)
-    input_data = np.random.randint(
-        np.iinfo(data_type).min, np.iinfo(data_type).max, [2, 3, 4]
-    ).astype(data_type)
+    input_data = np.random.randint(np.iinfo(data_type).min, np.iinfo(data_type).max,
+                                   size=[2, 3, 4], dtype=data_type)
     node = ng.constant(input_data, dtype=data_type)
     retrieved_data = node.get_data()
     assert np.allclose(input_data, retrieved_data)
diff --git a/ngraph/python/test/ngraph/test_ops_reshape.py b/ngraph/python/test/ngraph/test_ops_reshape.py
index 7b7182fbb39..362c95937fd 100644
--- a/ngraph/python/test/ngraph/test_ops_reshape.py
+++ b/ngraph/python/test/ngraph/test_ops_reshape.py
@@ -155,15 +155,15 @@ def test_tile():
 
 
 def test_strided_slice():
-    input_tensor = np.arange(2 * 3 * 4).reshape((2, 3, 4))
-    begin = np.array([1, 0])
-    end = np.array([0, 0])
-    strides = np.array([1, 1])
-    begin_mask = np.array([0, 0, 0])
-    end_mask = np.array([0, 0, 0])
-    new_axis_mask = np.array([0, 1, 0])
-    shrink_axis_mask = np.array([1, 0, 0])
-    ellipsis_mask = np.array([0, 0, 0])
+    input_tensor = np.arange(2 * 3 * 4, dtype=np.float32).reshape((2, 3, 4))
+    begin = np.array([1, 0], dtype=np.int64)
+    end = np.array([0, 0], dtype=np.int64)
+    strides = np.array([1, 1], dtype=np.int64)
+    begin_mask = np.array([0, 0, 0], dtype=np.int64)
+    end_mask = np.array([0, 0, 0], dtype=np.int64)
+    new_axis_mask = np.array([0, 1, 0], dtype=np.int64)
+    shrink_axis_mask = np.array([1, 0, 0], dtype=np.int64)
+    ellipsis_mask = np.array([0, 0, 0], dtype=np.int64)
 
     result = run_op_node(
         [input_tensor, begin, end, strides],
@@ -175,7 +175,8 @@ def test_strided_slice():
         ellipsis_mask,
     )
 
-    expected = np.array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]).reshape((1, 3, 4))
+    expected = np.array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
+                        dtype=np.float32).reshape((1, 3, 4))
 
     assert np.allclose(result, expected)
 
diff --git a/ngraph/python/test/ngraph/test_ops_scatter.py b/ngraph/python/test/ngraph/test_ops_scatter.py
index 0a9b10bb6e9..249ec62e196 100644
--- a/ngraph/python/test/ngraph/test_ops_scatter.py
+++ b/ngraph/python/test/ngraph/test_ops_scatter.py
@@ -18,19 +18,6 @@ import ngraph as ng
 from ngraph.impl import Type
 
 
-def test_scatter_nd_update_props():
-    dtype = np.int32
-    parameter_r = ng.parameter([1000, 256, 10, 15], dtype=dtype, name="data")
-    parameter_i = ng.parameter([25, 125, 3], dtype=dtype, name="indices")
-    parameter_u = ng.parameter([25, 125, 15], dtype=dtype, name="updates")
-
-    node = ng.scatter_nd_update(parameter_r, parameter_i, parameter_u)
-    assert node.get_type_name() == "ScatterNDUpdate"
-    assert node.get_output_size() == 1
-    assert list(node.get_output_shape(0)) == [1000, 256, 10, 15]
-    assert node.get_output_element_type(0) == Type.i32
-
-
 def test_scatter_update_props():
     dtype = np.int8
     parameter_r = ng.parameter([2, 3, 4], dtype=dtype, name="data")
diff --git a/ngraph/src/ngraph/CMakeLists.txt b/ngraph/src/ngraph/CMakeLists.txt
index 19604294f9b..c2977e054a9 100644
--- a/ngraph/src/ngraph/CMakeLists.txt
+++ b/ngraph/src/ngraph/CMakeLists.txt
@@ -19,6 +19,7 @@ set (SRC
     assertion.hpp
     attribute_adapter.cpp
     attribute_adapter.hpp
+    attribute_visitor.cpp
     attribute_visitor.hpp
     autodiff/adjoints.cpp
     autodiff/adjoints.hpp
@@ -91,6 +92,7 @@ set (SRC
     except.hpp
     factory.cpp
     factory.hpp
+    factory_adapter.hpp 
     file_util.cpp
     file_util.hpp
     function.cpp
diff --git a/ngraph/src/ngraph/attribute_adapter.cpp b/ngraph/src/ngraph/attribute_adapter.cpp
index c48bd78b7f4..a93bc770093 100644
--- a/ngraph/src/ngraph/attribute_adapter.cpp
+++ b/ngraph/src/ngraph/attribute_adapter.cpp
@@ -31,388 +31,32 @@ using namespace ngraph;
 
 namespace ngraph
 {
-    // float
     constexpr DiscreteTypeInfo AttributeAdapter<float>::type_info;
-    const double& AttributeAdapter<float>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<float>::set(const double& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
-    // double
     constexpr DiscreteTypeInfo AttributeAdapter<double>::type_info;
-    const double& AttributeAdapter<double>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<double>::set(const double& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
-    // bool
+    constexpr DiscreteTypeInfo AttributeAdapter<string>::type_info;
     constexpr DiscreteTypeInfo AttributeAdapter<bool>::type_info;
-    const bool& AttributeAdapter<bool>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<bool>::set(const bool& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<int8_t>::type_info;
-    const int64_t& AttributeAdapter<int8_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<int8_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<int16_t>::type_info;
-    const int64_t& AttributeAdapter<int16_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<int16_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<int32_t>::type_info;
-    const int64_t& AttributeAdapter<int32_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<int32_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<int64_t>::type_info;
-    const int64_t& AttributeAdapter<int64_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<int64_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<uint8_t>::type_info;
-    const int64_t& AttributeAdapter<uint8_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<uint8_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<uint16_t>::type_info;
-    const int64_t& AttributeAdapter<uint16_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<uint16_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<uint32_t>::type_info;
-    const int64_t& AttributeAdapter<uint32_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<uint32_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
     constexpr DiscreteTypeInfo AttributeAdapter<uint64_t>::type_info;
-    const int64_t& AttributeAdapter<uint64_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<uint64_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-
 #ifdef __APPLE__
     // size_t is not uint_64t on OSX
     constexpr DiscreteTypeInfo AttributeAdapter<size_t>::type_info;
-    const int64_t& AttributeAdapter<size_t>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = m_value;
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<size_t>::set(const int64_t& value)
-    {
-        m_value = value;
-        m_buffer_valid = false;
-    }
-#endif
-
-    // vector<int8_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<int8_t>>::type_info;
-
-    const vector<int8_t>& AttributeAdapter<vector<int8_t>>::get() { return m_value; }
-    void AttributeAdapter<vector<int8_t>>::set(const vector<int8_t>& value) { m_value = value; }
-    // vector<int16_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<int16_t>>::type_info;
-
-    const vector<int16_t>& AttributeAdapter<vector<int16_t>>::get() { return m_value; }
-    void AttributeAdapter<vector<int16_t>>::set(const vector<int16_t>& value) { m_value = value; }
-    // vector<int32_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<int32_t>>::type_info;
-
-    const vector<int32_t>& AttributeAdapter<vector<int32_t>>::get() { return m_value; }
-    void AttributeAdapter<vector<int32_t>>::set(const vector<int32_t>& value) { m_value = value; }
-    // vector<int64_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<int64_t>>::type_info;
-
-    const vector<int64_t>& AttributeAdapter<vector<int64_t>>::get() { return m_value; }
-    void AttributeAdapter<vector<int64_t>>::set(const vector<int64_t>& value) { m_value = value; }
-    // vector<uint8_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint8_t>>::type_info;
-
-    const vector<int8_t>& AttributeAdapter<vector<uint8_t>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<int8_t>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<uint8_t>>::set(const vector<int8_t>& value)
-    {
-        m_value = copy_from<vector<uint8_t>>(value);
-        m_buffer_valid = false;
-    }
-
-    // vector<uint16_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint16_t>>::type_info;
-
-    const vector<int16_t>& AttributeAdapter<vector<uint16_t>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<int16_t>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<uint16_t>>::set(const vector<int16_t>& value)
-    {
-        m_value = copy_from<vector<uint16_t>>(value);
-        m_buffer_valid = false;
-    }
-
-    // vector<uint32_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint32_t>>::type_info;
-
-    const vector<int32_t>& AttributeAdapter<vector<uint32_t>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<int32_t>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<uint32_t>>::set(const vector<int32_t>& value)
-    {
-        m_value = copy_from<vector<uint32_t>>(value);
-        m_buffer_valid = false;
-    }
-
-    // vector<uint64_t>
-    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint64_t>>::type_info;
-
-    const vector<int64_t>& AttributeAdapter<vector<uint64_t>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<int64_t>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<uint64_t>>::set(const vector<int64_t>& value)
-    {
-        m_value = copy_from<vector<uint64_t>>(value);
-        m_buffer_valid = false;
-    }
-
-#ifdef __APPLE__
-    // size_t is not uint64_t on OSX
-    // vector<size_t>
     constexpr DiscreteTypeInfo AttributeAdapter<vector<size_t>>::type_info;
-
-    const vector<int64_t>& AttributeAdapter<vector<size_t>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<int64_t>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<size_t>>::set(const vector<int64_t>& value)
-    {
-        m_value = copy_from<vector<size_t>>(value);
-        m_buffer_valid = false;
-    }
 #endif
-
-    /// vector<float>
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<int8_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<int16_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<int32_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<int64_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint8_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint16_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint32_t>>::type_info;
+    constexpr DiscreteTypeInfo AttributeAdapter<vector<uint64_t>>::type_info;
     constexpr DiscreteTypeInfo AttributeAdapter<vector<float>>::type_info;
-
-    const vector<float>& AttributeAdapter<vector<float>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<float>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<float>>::set(const vector<float>& value)
-    {
-        m_value = copy_from<vector<float>>(value);
-        m_buffer_valid = false;
-    }
-
-    /// vector<double>
     constexpr DiscreteTypeInfo AttributeAdapter<vector<double>>::type_info;
-
-    const vector<double>& AttributeAdapter<vector<double>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<double>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<double>>::set(const vector<double>& value)
-    {
-        m_value = copy_from<vector<double>>(value);
-        m_buffer_valid = false;
-    }
-
-    /// vector<string>
     constexpr DiscreteTypeInfo AttributeAdapter<vector<string>>::type_info;
-
-    const vector<string>& AttributeAdapter<vector<string>>::get()
-    {
-        if (!m_buffer_valid)
-        {
-            m_buffer = copy_from<vector<string>>(m_value);
-            m_buffer_valid = true;
-        }
-        return m_buffer;
-    }
-
-    void AttributeAdapter<vector<string>>::set(const vector<string>& value)
-    {
-        m_value = copy_from<vector<string>>(value);
-        m_buffer_valid = false;
-    }
 }
diff --git a/ngraph/src/ngraph/attribute_adapter.hpp b/ngraph/src/ngraph/attribute_adapter.hpp
index 21d7a8e3f72..e49eed83e80 100644
--- a/ngraph/src/ngraph/attribute_adapter.hpp
+++ b/ngraph/src/ngraph/attribute_adapter.hpp
@@ -23,278 +23,378 @@
 #include "ngraph/enum_names.hpp"
 #include "ngraph/type.hpp"
 
+///
 namespace ngraph
 {
-    template <typename Type>
+    class AttributeVisitor;
+
+    /// \brief Provides access to an attribute of type AT as a value accessor type VAT
+    template <typename VAT>
     class ValueAccessor;
 
     /// \brief ValueAccessor<void> provides an accessor for values that do not have get/set methonds
+    /// via AttributeVistor.on_adapter.
+    ///
+    /// All ValueAccessors must be derived from ValueAccessor<void> so that an AttributeVisitor
+    /// only needs to implement a subset of the on_adapter methods.
     template <>
     class NGRAPH_API ValueAccessor<void>
     {
     public:
+        /// \brief type info enables identification of the value accessor, as well as is_type and
+        /// as_type.
         virtual const DiscreteTypeInfo& get_type_info() const = 0;
         virtual ~ValueAccessor() {}
     };
 
-    /// \brief Provides access to values via get/set methods
-    /// \tparam T The type of the value; may be wider than the value being accessed.
-    template <typename T>
+    /// \brief Provides access to values via get/set methods from an m_value, typically from
+    /// ValueReference
+    ///
+    /// The m_buffer holds a VAT, which may be wider than the attribute AT. For example, serializers
+    /// that only
+    /// support int64_t integers would use a ValueAccessor<vector<int64_t>> to reference a
+    /// vector<int8_t> attribute. Destruction moves the value back to the attribute if it was
+    /// changed.
+    /// \tparam VAT The adapter value type; may be wider than the value being accessed.
+    template <typename VAT>
     class ValueAccessor : public ValueAccessor<void>
     {
     public:
-        virtual const DiscreteTypeInfo& get_type_info() const = 0;
         /// Returns the value
-        virtual const T& get() = 0;
+        virtual const VAT& get() = 0;
         /// Sets the value
-        virtual void set(const T& value) = 0;
+        virtual void set(const VAT& value) = 0;
+    };
+
+    template <>
+    class ValueAccessor<void*> : public ValueAccessor<void>
+    {
+    public:
+        virtual void* get_ptr() = 0;
+        virtual size_t size() = 0;
+    };
+
+    template <typename AT>
+    class DirectValueAccessor : public ValueAccessor<AT>
+    {
+    public:
+        DirectValueAccessor(AT& ref)
+            : m_ref(ref)
+        {
+        }
+        const AT& get() override { return m_ref; }
+        void set(const AT& value) override { m_ref = value; }
+    protected:
+        AT& m_ref;
+    };
+
+    template <typename AT, typename VAT>
+    class IndirectScalarValueAccessor : public ValueAccessor<VAT>
+    {
+    public:
+        IndirectScalarValueAccessor(AT& ref)
+            : m_ref(ref)
+        {
+        }
+
+        const VAT& get() override
+        {
+            if (!m_buffer_valid)
+            {
+                m_buffer = static_cast<VAT>(m_ref);
+                m_buffer_valid = true;
+            }
+            return m_buffer;
+        }
+
+        void set(const VAT& value) override
+        {
+            m_ref = static_cast<AT>(value);
+            m_buffer_valid = false;
+        }
 
     protected:
-        T m_buffer;
+        AT& m_ref;
+        VAT m_buffer;
         bool m_buffer_valid{false};
     };
 
-    /// \brief holds a reference to a value
-    /// \tparam Type the type of the referenced value
-    template <typename Type>
-    class ValueReference
+    template <typename A, typename B>
+    A copy_from(B& b)
+    {
+        A result(b.size());
+        for (size_t i = 0; i < b.size(); ++i)
+        {
+            result[i] =
+                static_cast<typename std::remove_reference<decltype(result[i])>::type>(b[i]);
+        }
+        return result;
+    }
+
+    template <typename AT, typename VAT>
+    class IndirectVectorValueAccessor : public ValueAccessor<VAT>
     {
     public:
-        operator Type&() const { return m_value; }
-    protected:
-        ValueReference(Type& value)
-            : m_value(value)
+        IndirectVectorValueAccessor(AT& ref)
+            : m_ref(ref)
         {
         }
-        Type& m_value;
+
+        const VAT& get() override
+        {
+            if (!m_buffer_valid)
+            {
+                m_buffer = copy_from<typename std::remove_cv<VAT>::type>(m_ref);
+                m_buffer_valid = true;
+            }
+            return m_buffer;
+        }
+
+        void set(const VAT& value) override
+        {
+            m_ref = copy_from<AT>(value);
+            m_buffer_valid = false;
+        }
+
+        operator AT&() { return m_ref; }
+    protected:
+        AT& m_ref;
+        VAT m_buffer;
+        bool m_buffer_valid{false};
     };
 
-    template <typename Type>
+    /// \brief An AttributeAdapter "captures" an attribute as an AT& and makes it available as a
+    /// ValueAccessor<VAT>.
+    template <typename AT>
     class AttributeAdapter
     {
     };
 
     /// \brief Access an enum via a string
-    /// \tparam Type The enum class
-    template <typename Type>
-    class EnumAttributeAdapterBase : public ValueReference<Type>, public ValueAccessor<std::string>
+    /// \tparam AT The attribute type enum class
+    template <typename AT>
+    class EnumAttributeAdapterBase : public ValueAccessor<std::string>
     {
     public:
-        EnumAttributeAdapterBase(Type& value)
-            : ValueReference<Type>(value)
+        EnumAttributeAdapterBase(AT& value)
+            : m_ref(value)
         {
         }
 
-        const std::string& get() override { return as_string(ValueReference<Type>::m_value); }
-        void set(const std::string& value) override
-        {
-            ValueReference<Type>::m_value = as_enum<Type>(value);
-        }
+        const std::string& get() override { return as_string(m_ref); }
+        void set(const std::string& value) override { m_ref = as_enum<AT>(value); }
+        operator AT&() { return m_ref; }
+    protected:
+        AT& m_ref;
+    };
+
+    /// Adapters will see visitor
+    class VisitorAdapter : public ValueAccessor<void>
+    {
+    public:
+        virtual bool visit_attributes(AttributeVisitor& visitor) = 0;
     };
 
-    /// \brief Access a float as a double
     template <>
-    class NGRAPH_API AttributeAdapter<float> : public ValueReference<float>,
-                                               public ValueAccessor<double>
+    class NGRAPH_API AttributeAdapter<float> : public IndirectScalarValueAccessor<float, double>
     {
     public:
         AttributeAdapter(float& value)
-            : ValueReference<float>(value)
+            : IndirectScalarValueAccessor<float, double>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<float>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const double& get() override;
-        void set(const double& value) override;
     };
 
     /// \brief Access a double as a double
     template <>
-    class NGRAPH_API AttributeAdapter<double> : public ValueReference<double>,
-                                                public ValueAccessor<double>
+    class NGRAPH_API AttributeAdapter<double> : public DirectValueAccessor<double>
     {
     public:
         AttributeAdapter(double& value)
-            : ValueReference<double>(value)
+            : DirectValueAccessor<double>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<double>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const double& get() override;
-        void set(const double& value) override;
+    };
+
+    /// \brief Access a string as a string
+    template <>
+    class NGRAPH_API AttributeAdapter<std::string> : public DirectValueAccessor<std::string>
+    {
+    public:
+        AttributeAdapter(std::string& value)
+            : DirectValueAccessor<std::string>(value)
+        {
+        }
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<string>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
     };
 
     /// \brief Access a bool as a bool
     template <>
-    class NGRAPH_API AttributeAdapter<bool> : public ValueReference<bool>,
-                                              public ValueAccessor<bool>
+    class NGRAPH_API AttributeAdapter<bool> : public DirectValueAccessor<bool>
     {
     public:
         AttributeAdapter(bool& value)
-            : ValueReference<bool>(value)
+            : DirectValueAccessor<bool>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<bool>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const bool& get() override;
-        void set(const bool& value) override;
     };
 
-    /// \brief Access an int8_t and an int16_t
+    /// \brief Access an int8_t and an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<int8_t> : public ValueReference<int8_t>,
-                                                public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<int8_t> : public IndirectScalarValueAccessor<int8_t, int64_t>
     {
     public:
         AttributeAdapter(int8_t& value)
-            : ValueReference<int8_t>(value)
+            : IndirectScalarValueAccessor<int8_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int8_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access an int16_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<int16_t> : public ValueReference<int16_t>,
-                                                 public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<int16_t>
+        : public IndirectScalarValueAccessor<int16_t, int64_t>
     {
     public:
         AttributeAdapter(int16_t& value)
-            : ValueReference<int16_t>(value)
+            : IndirectScalarValueAccessor<int16_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int16_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access an int32_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<int32_t> : public ValueReference<int32_t>,
-                                                 public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<int32_t>
+        : public IndirectScalarValueAccessor<int32_t, int64_t>
     {
     public:
         AttributeAdapter(int32_t& value)
-            : ValueReference<int32_t>(value)
+            : IndirectScalarValueAccessor<int32_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int32_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access an int64_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<int64_t> : public ValueReference<int64_t>,
-                                                 public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<int64_t> : public DirectValueAccessor<int64_t>
     {
     public:
         AttributeAdapter(int64_t& value)
-            : ValueReference<int64_t>(value)
+            : DirectValueAccessor<int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int64_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access a uint8_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<uint8_t> : public ValueReference<uint8_t>,
-                                                 public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<uint8_t>
+        : public IndirectScalarValueAccessor<uint8_t, int64_t>
     {
     public:
         AttributeAdapter(uint8_t& value)
-            : ValueReference<uint8_t>(value)
+            : IndirectScalarValueAccessor<uint8_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint8_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access a uint16_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<uint16_t> : public ValueReference<uint16_t>,
-                                                  public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<uint16_t>
+        : public IndirectScalarValueAccessor<uint16_t, int64_t>
     {
     public:
         AttributeAdapter(uint16_t& value)
-            : ValueReference<uint16_t>(value)
+            : IndirectScalarValueAccessor<uint16_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint16_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access a uint32_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<uint32_t> : public ValueReference<uint32_t>,
-                                                  public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<uint32_t>
+        : public IndirectScalarValueAccessor<uint32_t, int64_t>
     {
     public:
         AttributeAdapter(uint32_t& value)
-            : ValueReference<uint32_t>(value)
+            : IndirectScalarValueAccessor<uint32_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint32_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
     /// \brief Access a uint64_t as an int64_t
     template <>
-    class NGRAPH_API AttributeAdapter<uint64_t> : public ValueReference<uint64_t>,
-                                                  public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<uint64_t>
+        : public IndirectScalarValueAccessor<uint64_t, int64_t>
     {
     public:
         AttributeAdapter(uint64_t& value)
-            : ValueReference<uint64_t>(value)
+            : IndirectScalarValueAccessor<uint64_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint64_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
     };
 
 #ifdef __APPLE__
     // size_t is one of the uint types on _WIN32
     template <>
-    class NGRAPH_API AttributeAdapter<size_t> : public ValueReference<size_t>,
-                                                public ValueAccessor<int64_t>
+    class NGRAPH_API AttributeAdapter<size_t> : public IndirectScalarValueAccessor<size_t, int64_t>
     {
     public:
         AttributeAdapter(size_t& value)
-            : ValueReference<size_t>(value)
+            : IndirectScalarValueAccessor<size_t, int64_t>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<size_t>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const int64_t& get() override;
-        void set(const int64_t& value) override;
+    };
+
+    template <>
+    class NGRAPH_API AttributeAdapter<std::vector<size_t>>
+        : public IndirectVectorValueAccessor<std::vector<size_t>, std::vector<int64_t>>
+    {
+    public:
+        AttributeAdapter(std::vector<size_t>& value)
+            : IndirectVectorValueAccessor<std::vector<size_t>, std::vector<int64_t>>(value)
+        {
+        }
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<size_t>>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
     };
 #endif
 
@@ -304,219 +404,165 @@ namespace ngraph
     /// \brief Access a vector<int8_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<int8_t>>
-        : public ValueReference<std::vector<int8_t>>, public ValueAccessor<std::vector<int8_t>>
+        : public DirectValueAccessor<std::vector<int8_t>>
     {
     public:
         AttributeAdapter(std::vector<int8_t>& value)
-            : ValueReference<std::vector<int8_t>>(value)
+            : DirectValueAccessor<std::vector<int8_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int8_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int8_t>& get() override;
-        void set(const std::vector<int8_t>& value) override;
     };
 
     /// \brief Access a vector<int16_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<int16_t>>
-        : public ValueReference<std::vector<int16_t>>, public ValueAccessor<std::vector<int16_t>>
+        : public DirectValueAccessor<std::vector<int16_t>>
     {
     public:
         AttributeAdapter(std::vector<int16_t>& value)
-            : ValueReference<std::vector<int16_t>>(value)
+            : DirectValueAccessor<std::vector<int16_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int16_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int16_t>& get() override;
-        void set(const std::vector<int16_t>& value) override;
     };
 
     /// \brief Access a vector<int32_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<int32_t>>
-        : public ValueReference<std::vector<int32_t>>, public ValueAccessor<std::vector<int32_t>>
+        : public DirectValueAccessor<std::vector<int32_t>>
     {
     public:
         AttributeAdapter(std::vector<int32_t>& value)
-            : ValueReference<std::vector<int32_t>>(value)
+            : DirectValueAccessor<std::vector<int32_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int32_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int32_t>& get() override;
-        void set(const std::vector<int32_t>& value) override;
     };
 
     /// \brief Access a vector<int64_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<int64_t>>
-        : public ValueReference<std::vector<int64_t>>, public ValueAccessor<std::vector<int64_t>>
+        : public DirectValueAccessor<std::vector<int64_t>>
     {
     public:
         AttributeAdapter(std::vector<int64_t>& value)
-            : ValueReference<std::vector<int64_t>>(value)
+            : DirectValueAccessor<std::vector<int64_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int64_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int64_t>& get() override;
-        void set(const std::vector<int64_t>& value) override;
     };
 
-    /// \brief Access a vector<uint8_t> as a vector<int8_t>
+    /// \brief Access a vector<uint8_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<uint8_t>>
-        : public ValueReference<std::vector<uint8_t>>, public ValueAccessor<std::vector<int8_t>>
+        : public DirectValueAccessor<std::vector<uint8_t>>
     {
     public:
         AttributeAdapter(std::vector<uint8_t>& value)
-            : ValueReference<std::vector<uint8_t>>(value)
+            : DirectValueAccessor<std::vector<uint8_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint8_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int8_t>& get() override;
-        void set(const std::vector<int8_t>& value) override;
     };
 
-    /// \brief Access a vector<uint16_t> as a vector<int16_t>
+    /// \brief Access a vector<uint16_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<uint16_t>>
-        : public ValueReference<std::vector<uint16_t>>, public ValueAccessor<std::vector<int16_t>>
+        : public DirectValueAccessor<std::vector<uint16_t>>
     {
     public:
         AttributeAdapter(std::vector<uint16_t>& value)
-            : ValueReference<std::vector<uint16_t>>(value)
+            : DirectValueAccessor<std::vector<uint16_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint16_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int16_t>& get() override;
-        void set(const std::vector<int16_t>& value) override;
     };
 
-    /// \brief Access a vector<uint32_t> as a vector<int32_t>
+    /// \brief Access a vector<uint32_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<uint32_t>>
-        : public ValueReference<std::vector<uint32_t>>, public ValueAccessor<std::vector<int32_t>>
+        : public DirectValueAccessor<std::vector<uint32_t>>
     {
     public:
         AttributeAdapter(std::vector<uint32_t>& value)
-            : ValueReference<std::vector<uint32_t>>(value)
+            : DirectValueAccessor<std::vector<uint32_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint32_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int32_t>& get() override;
-        void set(const std::vector<int32_t>& value) override;
     };
 
-    /// \brief Access a vector<uint64_t> as a vector<int64_t>
+    /// \brief Access a vector<uint64_t>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<uint64_t>>
-        : public ValueReference<std::vector<uint64_t>>, public ValueAccessor<std::vector<int64_t>>
+        : public DirectValueAccessor<std::vector<uint64_t>>
     {
     public:
         AttributeAdapter(std::vector<uint64_t>& value)
-            : ValueReference<std::vector<uint64_t>>(value)
+            : DirectValueAccessor<std::vector<uint64_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint64_t>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int64_t>& get() override;
-        void set(const std::vector<int64_t>& value) override;
     };
 
-#ifdef __APPLE__
-    // size_t is not uint64_t on OSX
-    template <>
-    class NGRAPH_API AttributeAdapter<std::vector<size_t>>
-        : public ValueReference<std::vector<size_t>>, public ValueAccessor<std::vector<int64_t>>
-    {
-    public:
-        AttributeAdapter(std::vector<size_t>& value)
-            : ValueReference<std::vector<size_t>>(value)
-        {
-        }
-
-        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<size_t>>", 0};
-        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int64_t>& get() override;
-        void set(const std::vector<int64_t>& value) override;
-    };
-#endif
-
     /// \brief Access a vector<float>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<float>>
-        : public ValueReference<std::vector<float>>, public ValueAccessor<std::vector<float>>
+        : public DirectValueAccessor<std::vector<float>>
     {
     public:
         AttributeAdapter(std::vector<float>& value)
-            : ValueReference<std::vector<float>>(value)
+            : DirectValueAccessor<std::vector<float>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<float>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<float>& get() override;
-        void set(const std::vector<float>& value) override;
     };
 
     /// \brief Access a vector<double>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<double>>
-        : public ValueReference<std::vector<double>>, public ValueAccessor<std::vector<double>>
+        : public DirectValueAccessor<std::vector<double>>
     {
     public:
         AttributeAdapter(std::vector<double>& value)
-            : ValueReference<std::vector<double>>(value)
+            : DirectValueAccessor<std::vector<double>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<double>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<double>& get() override;
-        void set(const std::vector<double>& value) override;
     };
 
     /// \brief Access a vector<string>
     template <>
     class NGRAPH_API AttributeAdapter<std::vector<std::string>>
-        : public ValueReference<std::vector<std::string>>,
-          public ValueAccessor<std::vector<std::string>>
+        : public DirectValueAccessor<std::vector<std::string>>
     {
     public:
         AttributeAdapter(std::vector<std::string>& value)
-            : ValueReference<std::vector<std::string>>(value)
+            : DirectValueAccessor<std::vector<std::string>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<string>>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<std::string>& get() override;
-        void set(const std::vector<std::string>& value) override;
     };
-
-    template <typename A, typename B>
-    A copy_from(B& b)
-    {
-        A result(b.size());
-        for (int i = 0; i < b.size(); ++i)
-        {
-            result[i] =
-                static_cast<typename std::remove_reference<decltype(result[i])>::type>(b[i]);
-        }
-        return result;
-    }
 }
diff --git a/ngraph/src/ngraph/attribute_visitor.cpp b/ngraph/src/ngraph/attribute_visitor.cpp
new file mode 100644
index 00000000000..b3fc8ccf3b3
--- /dev/null
+++ b/ngraph/src/ngraph/attribute_visitor.cpp
@@ -0,0 +1,196 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/attribute_adapter.hpp"
+#include "ngraph/node.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+void AttributeVisitor::start_structure(const string& name)
+{
+    m_context.push_back(name);
+}
+
+string AttributeVisitor::finish_structure()
+{
+    string result = m_context.back();
+    m_context.pop_back();
+    return result;
+}
+
+string AttributeVisitor::get_name_with_context()
+{
+    ostringstream result;
+    string sep = "";
+    for (auto c : m_context)
+    {
+        result << sep << c;
+        sep = ".";
+    }
+    return result.str();
+}
+
+void AttributeVisitor::on_adapter(const std::string& name, VisitorAdapter& adapter)
+{
+    adapter.visit_attributes(*this);
+}
+
+void AttributeVisitor::on_adapter(const std::string& name, ValueAccessor<void*>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<string>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+};
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<bool>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+};
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int8_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int16_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int32_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int64_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint8_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint16_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint32_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint64_t>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<float>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<double>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int8_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int16_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int32_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int64_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint8_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint16_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint32_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint64_t>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<float>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<double>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<string>>& adapter)
+{
+    on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+const AttributeVisitor::node_id_t AttributeVisitor::invalid_node_id = "";
+
+void AttributeVisitor::register_node(const std::shared_ptr<Node>& node, node_id_t id)
+{
+    if (id == invalid_node_id)
+    {
+        id = node->get_friendly_name();
+    }
+    m_id_node_map[id] = node;
+    m_node_id_map[node] = id;
+}
+
+std::shared_ptr<Node> AttributeVisitor::get_registered_node(node_id_t id)
+{
+    auto it = m_id_node_map.find(id);
+    return it == m_id_node_map.end() ? shared_ptr<Node>() : it->second;
+}
+
+AttributeVisitor::node_id_t
+    AttributeVisitor::get_registered_node_id(const std::shared_ptr<Node>& node)
+{
+    auto it = m_node_id_map.find(node);
+    return it == m_node_id_map.end() ? invalid_node_id : it->second;
+}
diff --git a/ngraph/src/ngraph/attribute_visitor.hpp b/ngraph/src/ngraph/attribute_visitor.hpp
index 9813e1adfc6..b837444bacc 100644
--- a/ngraph/src/ngraph/attribute_visitor.hpp
+++ b/ngraph/src/ngraph/attribute_visitor.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <string>
+#include <unordered_map>
 #include <utility>
 
 #include "ngraph/partial_shape.hpp"
@@ -27,137 +28,128 @@ namespace ngraph
 {
     template <typename T>
     class ValueAccessor;
+    class VisitorAdapter;
+    class Node;
 
-    /// \brief Visits the attributes of a node.
+    /// \brief Visits the attributes of a node, primarily for serialization-like tasks.
     ///
-    /// Attributes are the values set when building a graph which are not
-    /// computed as the graph executes. Values computed from the graph topology and attributes
-    /// during compilation are not attributes.
+    /// Attributes are the node parameters that are always compile-time constants.
+    /// Values computed from the graph topology and attributes during compilation are not
+    /// attributes.
+    ///
+    /// Attributes have a wide variety of types, but serialization formats are more restricted.
+    /// We asume serialation easily supports scalar types of bool 64-bit signed, string, and double,
+    /// and has specialized ways to support numeric arrays and raw data+size. The visitor and
+    /// adapter convert between the limited serialization types and the unlimited attribute types.
+    ///
+    /// A visitor is passed to an op's visit_attributes method. The visit_attributes method calls
+    /// the template method visitor.on_attribute<AT>(const std::string& name, AT& value) on each
+    /// attribute. The visitor can read or write the attribute's value. The on_attribute
+    /// method creates an AttributeAdapter<AT> for the value and passes it to one of the visitors
+    /// on_adapter methods. The on_adapter methods expect a reference to a ValueAccessor<VAT> or a
+    /// VisitorAdapter. A ValueAccessor<VAT> has get/set methods that can be used to read/write the
+    /// attribute value as type VAT. These methods are triggered by deriving AttributeAdapter<AT>
+    /// from ValueAccessor<VAT>. For more complex cases, such as structs, the on_adapter method for
+    /// VisitorAdapter passes the name and visitor to the adapter, so that the adapter can perform
+    /// additional work such as visiting struct members or sequence values.
+    ///
+    /// When a node visits an attribute with structure, the node's on_attribute passes a name for
+    /// the entire attribute, but the struct will have its own methods to be visited. Similarly, a
+    /// vector will have a sequence of members to be visited. The adapter may use the visitor
+    /// methods start_struct/finish_struct and start_vector/next_vector/finish_vector to inidicate
+    /// nexted members.
+    ///
+    /// The visitor method get_name_with_context creates a generic nested version of the name.
+    /// Visitors can override according to their serialization requirements.
+    ///
+    /// Attributes that are shared_ptr<Node> are special. They must have been already been
+    /// registered with the visitor using register_node, which needs a shared pointer to a node and
+    /// a string ID. The ID string will be used to serialize the node or find the node during
+    /// deserialization.
     class NGRAPH_API AttributeVisitor
     {
     public:
         virtual ~AttributeVisitor() {}
         // Must implement these methods
-        virtual void on_attribute(const std::string& name, std::string& value) = 0;
-        virtual void on_attribute(const std::string& name, bool& value) = 0;
-        virtual void on_attribute(const std::string& name, void* data, size_t size) {}
+        /// \brief handles all specialized on_adapter methods implemented by the visitor.
+        ///
+        /// The adapter implements get_type_info(), which can be used to determine the adapter
+        /// directly
+        /// or via is_type and as_type on any platform
         virtual void on_adapter(const std::string& name, ValueAccessor<void>& adapter) = 0;
         // The remaining adapter methods fall back on the void adapter if not implemented
-        virtual void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        };
-        virtual void on_adapter(const std::string& name, ValueAccessor<int8_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<int16_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<int32_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<uint8_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<uint16_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<uint32_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<uint64_t>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<float>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<double>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+        virtual void on_adapter(const std::string& name, ValueAccessor<void*>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<bool>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<int8_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<int16_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<int32_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<uint8_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<uint16_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<uint32_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<uint64_t>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<float>& adapter);
+        virtual void on_adapter(const std::string& name, ValueAccessor<double>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<int8_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<int8_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<int16_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<int16_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<int32_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<int32_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<int64_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<int64_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<uint8_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<uint8_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<uint16_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<uint16_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<uint32_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<uint32_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<uint64_t>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        virtual void on_adapter(const std::string& name, ValueAccessor<std::vector<float>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<uint64_t>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<double>>& adapter)
-        {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
+                                ValueAccessor<std::vector<float>>& adapter);
         virtual void on_adapter(const std::string& name,
-                                ValueAccessor<std::vector<std::string>>& adapter)
+                                ValueAccessor<std::vector<double>>& adapter);
+        virtual void on_adapter(const std::string& name,
+                                ValueAccessor<std::vector<std::string>>& adapter);
+        /// \brief Hook for adapters that need visitor access
+        virtual void on_adapter(const std::string& name, VisitorAdapter& adapter);
+
+        /// The generic visitor. There must be a definition of AttributeAdapter<T> that can convert
+        /// to a ValueAccessor<U> for one of the on_adpater methods.
+        template <typename AT>
+        void on_attribute(const std::string& name, AT& value)
         {
-            on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
-        }
-        // Use an adapter for non-primitive types
-        template <typename T>
-        // typename std::enable_if<std::is_class<T>::value, void>::type
-        void on_attribute(const std::string& name, T& value)
-        {
-            AttributeAdapter<T> adapter(value);
-            on_adapter(name, adapter);
-        }
-        void on_attribute(const std::string& name, op::AutoBroadcastSpec& value)
-        {
-            AttributeAdapter<op::AutoBroadcastType> adapter(value.m_type);
-            on_adapter(name, adapter);
-        }
-        void on_attribute(const std::string& name, op::BroadcastModeSpec& value)
-        {
-            AttributeAdapter<op::BroadcastType> adapter(value.m_type);
-            on_adapter(name, adapter);
+            AttributeAdapter<AT> adapter(value);
+            start_structure(name);
+            on_adapter(get_name_with_context(), adapter);
+            finish_structure();
         }
+        /// \returns The nested context of visits
+        const std::vector<std::string>& get_context() const { return m_context; }
+        /// \returns context prepended to names
+        virtual std::string get_name_with_context();
+        /// \brief Start visiting a nested structure
+        virtual void start_structure(const std::string& name);
+        /// \brief Finish visiting a nested structure
+        virtual std::string finish_structure();
+        using node_id_t = std::string;
+        static const node_id_t invalid_node_id;
+        /// \brief Associate a node with an id.
+        ///
+        /// No node may be used as an attribute unless it has already been registered with an ID.
+        /// References to nodes are visited with a ValueAccessor of their ID.
+        virtual void register_node(const std::shared_ptr<Node>& node,
+                                   node_id_t id = invalid_node_id);
+        /// Returns the node with the given id, or nullptr if there is no registered node
+        virtual std::shared_ptr<Node> get_registered_node(node_id_t id);
+        /// Returns the id for the node, or -1 if the node is not registered
+        virtual node_id_t get_registered_node_id(const std::shared_ptr<Node>& node);
+
+    protected:
+        std::vector<std::string> m_context;
+        std::unordered_map<std::shared_ptr<Node>, node_id_t> m_node_id_map;
+        std::unordered_map<node_id_t, std::shared_ptr<Node>> m_id_node_map;
     };
 }
diff --git a/ngraph/src/ngraph/axis_set.cpp b/ngraph/src/ngraph/axis_set.cpp
index 5ffac23bbfc..4978518d8df 100644
--- a/ngraph/src/ngraph/axis_set.cpp
+++ b/ngraph/src/ngraph/axis_set.cpp
@@ -71,20 +71,22 @@ const std::vector<int64_t>& ngraph::AttributeAdapter<ngraph::AxisSet>::get()
 {
     if (!m_buffer_valid)
     {
-        for (auto elt : m_value)
+        m_buffer.clear();
+        for (auto elt : m_ref)
         {
             m_buffer.push_back(elt);
         }
+        m_buffer_valid = true;
     }
     return m_buffer;
 }
 
 void ngraph::AttributeAdapter<ngraph::AxisSet>::set(const std::vector<int64_t>& value)
 {
-    m_value = AxisSet();
+    m_ref = AxisSet();
     for (auto elt : value)
     {
-        m_value.insert(elt);
+        m_ref.insert(elt);
     }
     m_buffer_valid = false;
 }
diff --git a/ngraph/src/ngraph/axis_set.hpp b/ngraph/src/ngraph/axis_set.hpp
index 59b8706f6fc..aad365f8190 100644
--- a/ngraph/src/ngraph/axis_set.hpp
+++ b/ngraph/src/ngraph/axis_set.hpp
@@ -48,19 +48,22 @@ namespace ngraph
     };
 
     template <>
-    class NGRAPH_API AttributeAdapter<AxisSet> : public ValueReference<AxisSet>,
-                                                 public ValueAccessor<std::vector<int64_t>>
+    class NGRAPH_API AttributeAdapter<AxisSet> : public ValueAccessor<std::vector<int64_t>>
     {
     public:
         AttributeAdapter(AxisSet& value)
-            : ValueReference<AxisSet>(value)
+            : m_ref(value)
         {
         }
 
-        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<AxisSet>", 0};
-        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
         const std::vector<int64_t>& get() override;
         void set(const std::vector<int64_t>& value) override;
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<AxisSet>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        AxisSet& m_ref;
+        std::vector<int64_t> m_buffer;
+        bool m_buffer_valid{false};
     };
 
     NGRAPH_API
diff --git a/ngraph/src/ngraph/axis_vector.cpp b/ngraph/src/ngraph/axis_vector.cpp
index 434c0b975d4..c41b2f7a3a1 100644
--- a/ngraph/src/ngraph/axis_vector.cpp
+++ b/ngraph/src/ngraph/axis_vector.cpp
@@ -64,3 +64,5 @@ ngraph::AxisVector& ngraph::AxisVector::operator=(AxisVector&& v) noexcept
     static_cast<std::vector<size_t>*>(this)->operator=(v);
     return *this;
 }
+
+constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter<ngraph::AxisVector>::type_info;
diff --git a/ngraph/src/ngraph/axis_vector.hpp b/ngraph/src/ngraph/axis_vector.hpp
index 1607dd1c4e0..0347a0f44f1 100644
--- a/ngraph/src/ngraph/axis_vector.hpp
+++ b/ngraph/src/ngraph/axis_vector.hpp
@@ -20,6 +20,7 @@
 #include <ostream>
 #include <vector>
 
+#include "ngraph/attribute_adapter.hpp"
 #include "ngraph/ngraph_visibility.hpp"
 
 namespace ngraph
@@ -51,6 +52,20 @@ namespace ngraph
         NGRAPH_API AxisVector& operator=(AxisVector&& v) noexcept;
     };
 
+    template <>
+    class NGRAPH_API AttributeAdapter<AxisVector>
+        : public IndirectVectorValueAccessor<AxisVector, std::vector<int64_t>>
+    {
+    public:
+        AttributeAdapter(AxisVector& value)
+            : IndirectVectorValueAccessor<AxisVector, std::vector<int64_t>>(value)
+        {
+        }
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<AxisVector>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    };
+
     NGRAPH_API
     std::ostream& operator<<(std::ostream& s, const AxisVector& axis_vector);
 }
diff --git a/ngraph/src/ngraph/builder/reshape.cpp b/ngraph/src/ngraph/builder/reshape.cpp
index e5d6ab4db9c..0ae89840062 100644
--- a/ngraph/src/ngraph/builder/reshape.cpp
+++ b/ngraph/src/ngraph/builder/reshape.cpp
@@ -258,58 +258,41 @@ namespace ngraph
 
 shared_ptr<Node> builder::opset1::flatten(const Output<Node>& value, int axis)
 {
-    if (value.get_partial_shape().is_static())
+    // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of
+    // input tensor. The last dimension is the product of the rest of input tensor dimensions:
+    // [d_{axis}, ..., d_n]
+    shared_ptr<Node> output_shape;
+    if (axis == 0)
     {
-        auto data_shape = value.get_shape();
-        // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of
-        // input
-        // tensor. The last dimension is the product of the rest of input tensor dimensions:
-        // [d_{axis}, ..., d_n]
-        size_t first_dim_size =
-            accumulate(begin(data_shape), next(begin(data_shape), axis), 1UL, multiplies<size_t>());
-
-        size_t last_dim_size =
-            accumulate(next(begin(data_shape), axis), end(data_shape), 1UL, multiplies<size_t>());
-
-        return builder::opset1::reshape(value, Shape{first_dim_size, last_dim_size});
+        output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {1, -1});
+    }
+    else if (axis == 1)
+    {
+        output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {0, -1});
     }
     else
     {
-        shared_ptr<Node> output_shape;
-        if (axis == 0)
-        {
-            output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {1, -1});
-        }
-        else if (axis == 1)
-        {
-            output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {0, -1});
-        }
-        else
-        {
-            const auto value_shape = make_shared<ngraph::opset1::ShapeOf>(value);
-            const auto value_rank = make_shared<ngraph::opset1::ShapeOf>(value_shape);
-            const auto axis_node = get_normalized_axis_node(value_rank, axis);
-            const auto remaining_part_length =
-                ngraph::opset1::Constant::create(element::i64, Shape{1}, {-1});
-            const auto shape_split_lengths = make_shared<ngraph::opset1::Concat>(
-                OutputVector{axis_node, remaining_part_length}, 0);
-            const auto split_parts = make_shared<ngraph::opset1::VariadicSplit>(
-                value_shape,
-                ngraph::opset1::Constant::create(element::i64, Shape{}, {0}),
-                shape_split_lengths);
-            // We're reducing vectors thus, just single zero axis to reduce and keep dims to true.
-            const auto first_part_dim = make_shared<ngraph::opset1::ReduceProd>(
-                split_parts->get_output_as_single_output_node(0),
-                ngraph::opset1::Constant::create(element::i64, Shape{}, {0}),
-                true);
-            // TODO, handle edge case where first part is empty - then should equal to one
-            output_shape = make_shared<ngraph::opset1::Concat>(
-                OutputVector{first_part_dim, remaining_part_length}, 0);
-        }
+        const auto value_shape = make_shared<ngraph::opset1::ShapeOf>(value);
+        const auto value_rank = make_shared<ngraph::opset1::ShapeOf>(value_shape);
+        const auto axis_node = get_normalized_axis_node(value_rank, axis);
 
-        return make_shared<ngraph::opset1::Reshape>(value, output_shape, true)
-            ->add_provenance_group_members_above({value});
+        const auto first_part_dims = make_shared<ngraph::opset1::StridedSlice>(
+            value_shape,
+            ngraph::opset1::Constant::create(element::i64, {1}, {0}),
+            axis_node,
+            vector<int64_t>{},
+            vector<int64_t>{});
+        const auto first_part_dims_length = make_shared<ngraph::opset1::ReduceProd>(
+            first_part_dims, ngraph::opset1::Constant::create(element::i64, {}, {0}), true);
+
+        const auto remaining_part_length =
+            ngraph::opset1::Constant::create(element::i64, {1}, {-1});
+
+        output_shape = make_shared<ngraph::opset1::Concat>(
+            OutputVector{first_part_dims_length, remaining_part_length}, 0);
     }
+    return make_shared<ngraph::opset1::Reshape>(value, output_shape, true)
+        ->add_provenance_group_members_above({value});
 }
 
 shared_ptr<Node> builder::opset1::expand_dims(const Output<Node>& value, size_t axis)
diff --git a/ngraph/src/ngraph/coordinate.cpp b/ngraph/src/ngraph/coordinate.cpp
index f04f8b30574..ca14ced153f 100644
--- a/ngraph/src/ngraph/coordinate.cpp
+++ b/ngraph/src/ngraph/coordinate.cpp
@@ -73,20 +73,4 @@ ngraph::Coordinate& ngraph::Coordinate::operator=(Coordinate&& v) noexcept
     return *this;
 }
 
-const vector<uint64_t>& AttributeAdapter<Coordinate>::get()
-{
-    if (!m_buffer_valid)
-    {
-        m_buffer = copy_from<vector<uint64_t>>(m_value);
-        m_buffer_valid = true;
-    }
-    return m_buffer;
-}
-
-void AttributeAdapter<Coordinate>::set(const vector<uint64_t>& value)
-{
-    m_value = copy_from<Coordinate>(m_value);
-    m_buffer_valid = false;
-}
-
 constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter<ngraph::Coordinate>::type_info;
diff --git a/ngraph/src/ngraph/coordinate.hpp b/ngraph/src/ngraph/coordinate.hpp
index c5476c820a0..c2b2f1b269a 100644
--- a/ngraph/src/ngraph/coordinate.hpp
+++ b/ngraph/src/ngraph/coordinate.hpp
@@ -54,19 +54,17 @@ namespace ngraph
     };
 
     template <>
-    class NGRAPH_API AttributeAdapter<Coordinate> : public ValueReference<Coordinate>,
-                                                    public ValueAccessor<std::vector<uint64_t>>
+    class NGRAPH_API AttributeAdapter<Coordinate>
+        : public IndirectVectorValueAccessor<Coordinate, std::vector<int64_t>>
     {
     public:
         AttributeAdapter(Coordinate& value)
-            : ValueReference<Coordinate>(value)
+            : IndirectVectorValueAccessor<Coordinate, std::vector<int64_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Coordinate>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<uint64_t>& get() override;
-        void set(const std::vector<uint64_t>& value) override;
     };
 
     NGRAPH_API
diff --git a/ngraph/src/ngraph/coordinate_diff.cpp b/ngraph/src/ngraph/coordinate_diff.cpp
index c7aa14e438c..4865dd99c16 100644
--- a/ngraph/src/ngraph/coordinate_diff.cpp
+++ b/ngraph/src/ngraph/coordinate_diff.cpp
@@ -68,20 +68,4 @@ ngraph::CoordinateDiff& ngraph::CoordinateDiff::operator=(CoordinateDiff&& v) no
     return *this;
 }
 
-const vector<int64_t>& AttributeAdapter<CoordinateDiff>::get()
-{
-    if (!m_buffer_valid)
-    {
-        m_buffer = copy_from<vector<int64_t>>(m_value);
-        m_buffer_valid = true;
-    }
-    return m_buffer;
-}
-
-void AttributeAdapter<CoordinateDiff>::set(const vector<int64_t>& value)
-{
-    m_value = copy_from<CoordinateDiff>(value);
-    m_buffer_valid = false;
-}
-
 constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter<ngraph::CoordinateDiff>::type_info;
diff --git a/ngraph/src/ngraph/coordinate_diff.hpp b/ngraph/src/ngraph/coordinate_diff.hpp
index c49a00c3314..db172564868 100644
--- a/ngraph/src/ngraph/coordinate_diff.hpp
+++ b/ngraph/src/ngraph/coordinate_diff.hpp
@@ -53,19 +53,18 @@ namespace ngraph
     };
 
     template <>
-    class NGRAPH_API AttributeAdapter<CoordinateDiff> : public ValueReference<CoordinateDiff>,
-                                                        public ValueAccessor<std::vector<int64_t>>
+    class NGRAPH_API AttributeAdapter<CoordinateDiff>
+        : public IndirectVectorValueAccessor<CoordinateDiff, std::vector<int64_t>>
+
     {
     public:
         AttributeAdapter(CoordinateDiff& value)
-            : ValueReference<CoordinateDiff>(value)
+            : IndirectVectorValueAccessor<CoordinateDiff, std::vector<int64_t>>(value)
         {
         }
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<CoordinateDiff>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int64_t>& get() override;
-        void set(const std::vector<int64_t>& value) override;
     };
 
     NGRAPH_API
diff --git a/ngraph/src/ngraph/factory_adapter.hpp b/ngraph/src/ngraph/factory_adapter.hpp
new file mode 100644
index 00000000000..e57dac90fce
--- /dev/null
+++ b/ngraph/src/ngraph/factory_adapter.hpp
@@ -0,0 +1,71 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/attribute_adapter.hpp"
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/factory.hpp"
+
+namespace ngraph
+{
+    template <typename BASE_TYPE>
+    class FactoryAttributeAdapter : public VisitorAdapter
+    {
+    public:
+        FactoryAttributeAdapter(std::shared_ptr<BASE_TYPE>& ref)
+            : m_ref(ref)
+        {
+        }
+
+        /// \brief Hook for extra processing before other attributes
+        virtual bool on_start(AttributeVisitor& /* visitor */) { return true; }
+        /// \brief Hook for extra processing after other attributes
+        virtual bool on_finish(AttributeVisitor& /* visitor */) { return true; }
+        bool visit_attributes(AttributeVisitor& visitor) override
+        {
+            if (on_start(visitor))
+            {
+                std::string type_info_name;
+                uint64_t type_info_version;
+                if (m_ref)
+                {
+                    auto& type_info = m_ref->get_type_info();
+                    type_info_name = type_info.name;
+                    type_info_version = type_info.version;
+                }
+                visitor.on_attribute("name", type_info_name);
+                visitor.on_attribute("version", type_info_version);
+                if (!type_info_name.empty() && !m_ref)
+                {
+                    m_ref = std::shared_ptr<BASE_TYPE>(FactoryRegistry<BASE_TYPE>::get().create(
+                        DiscreteTypeInfo{type_info_name.c_str(), type_info_version}));
+                }
+                if (m_ref)
+                {
+                    visitor.start_structure("value");
+                    m_ref->visit_attributes(visitor);
+                    visitor.finish_structure();
+                }
+                on_finish(visitor);
+            }
+            return true;
+        }
+
+    protected:
+        std::shared_ptr<BASE_TYPE>& m_ref;
+    };
+}
\ No newline at end of file
diff --git a/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt b/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt
index ec44743b07a..20782def75e 100644
--- a/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt
+++ b/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt
@@ -247,16 +247,18 @@ add_library(onnx_importer SHARED
 
 set(ONNX_IMPORT_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "")
 
-target_link_libraries(onnx_importer PRIVATE ngraph onnx onnx_proto)
+target_link_libraries(onnx_importer PRIVATE onnx onnx_proto)
+target_link_libraries(onnx_importer PUBLIC  ngraph)
 
 set_target_properties(onnx_importer PROPERTIES
                       CXX_VISIBILITY_PRESET hidden
                       C_VISIBILITY_PRESET hidden
                       VISIBILITY_INLINES_HIDDEN ON
                       POSITION_INDEPENDENT_CODE ON)
-target_include_directories(onnx_importer SYSTEM PUBLIC ${ONNX_IMPORT_INCLUDE_DIR})
+target_include_directories(onnx_importer SYSTEM PUBLIC $<BUILD_INTERFACE:${ONNX_IMPORT_INCLUDE_DIR}>
+                                                       $<INSTALL_INTERFACE:include/ngraph/frontend/onnx_import>)
 target_include_directories(onnx_importer SYSTEM PRIVATE ${NGRAPH_INCLUDE_PATH}
-    SYSTEM PRIVATE ${ONNX_INCLUDE_DIR} ${ONNX_PROTO_INCLUDE_DIR} ${Protobuf_INCLUDE_DIR})
+        ${ONNX_INCLUDE_DIR} ${ONNX_PROTO_INCLUDE_DIR} ${Protobuf_INCLUDE_DIR})
 
 target_compile_definitions(onnx_importer PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION})
 
@@ -268,4 +270,7 @@ endif()
 
 target_compile_definitions(onnx_importer PRIVATE ONNX_IMPORTER_DLL_EXPORTS)
 
-install(TARGETS onnx_importer DESTINATION ${NGRAPH_INSTALL_LIB})
+install(TARGETS onnx_importer EXPORT ngraphTargets
+        RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph
+        ARCHIVE DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph
+        LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph)
diff --git a/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp b/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp
index 4eb8a550ae9..7c0ed5240e4 100644
--- a/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp
+++ b/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp
@@ -22,6 +22,7 @@
 #include "ngraph/opsets/opset0.hpp"
 #include "ngraph/validation_util.hpp"
 #include "utils/common.hpp"
+#include "utils/reshape.hpp"
 
 namespace ngraph
 {
@@ -35,16 +36,25 @@ namespace ngraph
                 {
                     const auto input = node.get_ng_inputs().at(0);
                     const auto& input_shape = input->get_output_partial_shape(0);
-                    const auto axis = node.get_attribute_value<std::int64_t>("axis", 1);
 
-                    const auto normalized_axis =
-                        ngraph::normalize_axis(node.get_description(), axis, input_shape.rank());
+                    auto axis = node.get_attribute_value<std::int64_t>("axis", 1);
+                    if (input_shape.rank().is_static())
+                    {
+                        axis = ngraph::normalize_axis(
+                            node.get_description(), axis, input_shape.rank());
+                    }
 
                     // reshape to 2D - "batch size" x "input feature dimensions" (NxD)
-                    const auto coerced_tensor =
-                        ngraph::builder::opset1::flatten(input, normalized_axis);
-                    const auto& coerced_shape = coerced_tensor->get_shape();
-                    const auto row_size = static_cast<int64_t>(coerced_shape.at(1));
+                    const auto coerced_tensor = ngraph::builder::opset1::flatten(input, axis);
+
+                    const auto coerced_tensor_shape =
+                        std::make_shared<default_opset::ShapeOf>(coerced_tensor);
+                    std::shared_ptr<ngraph::Node> row_size =
+                        std::make_shared<default_opset::Gather>(
+                            coerced_tensor_shape,
+                            default_opset::Constant::create(element::i64, {1}, {1}),
+                            default_opset::Constant::create(element::i64, {}, {0}));
+                    row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size);
 
                     const auto indices_axis = 1;
                     const auto max_indices = std::make_shared<opset0::GetOutputElement>(
@@ -56,15 +66,13 @@ namespace ngraph
                             default_opset::TopK::SortType::NONE),
                         1);
 
-                    const auto depth =
-                        default_opset::Constant::create(ngraph::element::i64, Shape{}, {row_size});
                     const auto on_value =
                         default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
                     const auto off_value =
                         default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
 
                     const auto results = std::make_shared<default_opset::OneHot>(
-                        max_indices, depth, on_value, off_value, indices_axis);
+                        max_indices, row_size, on_value, off_value, indices_axis);
                     const auto converted_results = std::make_shared<default_opset::Convert>(
                         results, input->get_element_type());
 
diff --git a/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp b/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp
index 3bf384d3796..ca055cbb258 100644
--- a/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp
+++ b/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp
@@ -70,9 +70,10 @@ namespace ngraph
                         axes.insert(ax);
                     }
 
-                    auto attrs = default_opset::Interpolate::InterpolateAttrs();
+                    auto attrs = ngraph::op::v0::InterpolateAttrs();
                     attrs.axes = axes;
-                    attrs.mode = as_enum<default_opset::Interpolate::InterpolateMode>(mode);
+                    attrs.mode = mode;
+                    attrs.align_corners = false;
 
                     if (scales->is_constant() && data_shape.is_static())
                     {
diff --git a/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp b/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp
index f9cefa6b4b8..695958ad096 100644
--- a/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp
+++ b/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp
@@ -191,7 +191,7 @@ namespace ngraph
             auto dm = m_map.find(domain);
             if (dm == std::end(m_map))
             {
-                NGRAPH_WARN << "Domain '" << domain << "' not recognized by nGraph";
+                NGRAPH_DEBUG << "Domain '" << domain << "' not recognized by nGraph";
                 return OperatorSet{};
             }
             if (domain == "" && version > OperatorsBridge::LATEST_SUPPORTED_ONNX_OPSET_VERSION)
diff --git a/ngraph/src/ngraph/lambda.cpp b/ngraph/src/ngraph/lambda.cpp
index 49f66dc9b08..34f774da41e 100644
--- a/ngraph/src/ngraph/lambda.cpp
+++ b/ngraph/src/ngraph/lambda.cpp
@@ -15,7 +15,9 @@
 //*****************************************************************************
 
 #include "ngraph/lambda.hpp"
-#include "validation_util.hpp"
+#include "ngraph/factory_adapter.hpp"
+#include "ngraph/graph_util.hpp"
+#include "ngraph/validation_util.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -94,3 +96,274 @@ bool Lambda::evaluate(const HostTensorVector& output_tensors, const HostTensorVe
     evaluate_nodes(value_map, output_tensor_map, outputs);
     return true;
 }
+
+bool Lambda::visit_attributes(AttributeVisitor& visitor)
+{
+    visitor.on_attribute("parameters", m_parameters);
+    visitor.on_attribute("results", m_results);
+    return true;
+}
+
+constexpr DiscreteTypeInfo AttributeAdapter<shared_ptr<Lambda>>::type_info;
+
+AttributeAdapter<shared_ptr<Lambda>>::AttributeAdapter(shared_ptr<Lambda>& ref)
+    : m_ref(ref)
+{
+}
+
+class NodeAttributeAdapter : public FactoryAttributeAdapter<Node>
+{
+public:
+    using FactoryAttributeAdapter::FactoryAttributeAdapter;
+    bool on_start(AttributeVisitor& visitor) override
+    {
+        // Indicate that there is a node following
+        m_id = visitor.get_registered_node_id(m_ref);
+        m_set_id = (m_ref == nullptr);
+        visitor.on_attribute("id", m_id);
+        return m_ref == nullptr || m_id != AttributeVisitor::invalid_node_id;
+    }
+    bool on_finish(AttributeVisitor&) override
+    {
+        if (m_set_id && m_ref)
+        {
+            m_ref->set_friendly_name(m_id);
+        }
+        return true;
+    }
+    void visit(AttributeVisitor& visitor, const std::string& id)
+    {
+        visitor.start_structure(id);
+        visitor.on_adapter(id, *this);
+        visitor.finish_structure();
+    }
+    static constexpr DiscreteTypeInfo type_info{"Lambda.NodeAttributeAdapter", 0};
+    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    string m_id;
+    bool m_set_id;
+};
+
+constexpr DiscreteTypeInfo NodeAttributeAdapter::type_info;
+
+bool AttributeAdapter<shared_ptr<Lambda>>::visit_attributes(AttributeVisitor& visitor)
+{
+    if (m_ref->get_results().size() > 0)
+    {
+        NodeVector serialized_nodes;
+        {
+            // Start with all nodes not already serialized
+            visitor.start_structure("nodes");
+            NodeVector results;
+            for (auto result : m_ref->get_results())
+            {
+                results.push_back(result);
+            }
+
+            int64_t i = 0;
+            ostringstream index;
+            traverse_nodes(
+                results, [&i, &index, &visitor, &serialized_nodes](shared_ptr<Node> node) -> void {
+                    if (AttributeVisitor::invalid_node_id == visitor.get_registered_node_id(node))
+                    {
+                        // This node hasn't been seen before
+                        visitor.register_node(node);
+                        index.str("");
+                        index << i++;
+                        string id = index.str();
+                        NodeAttributeAdapter adapter(node);
+                        adapter.visit(visitor, id);
+                        serialized_nodes.push_back(node);
+                    }
+                });
+            {
+                // Sentinel at end
+                index.str("");
+                index << i++;
+                string id = index.str();
+                shared_ptr<Node> null_node;
+                NodeAttributeAdapter adapter(null_node);
+                adapter.visit(visitor, id);
+            }
+            visitor.finish_structure();
+        }
+        {
+            // Now do all the edges
+            visitor.start_structure("edges");
+            int64_t i = 0;
+            ostringstream index;
+            for (auto node : serialized_nodes)
+            {
+                for (auto input : node->inputs())
+                {
+                    index.str("");
+                    index << i++;
+                    string id = index.str();
+                    visitor.start_structure(id);
+                    string input_node_id = visitor.get_registered_node_id(node);
+                    uint64_t input_index = input.get_index();
+                    visitor.on_attribute("input_node", input_node_id);
+                    visitor.on_attribute("input_index", input_index);
+                    auto output = input.get_source_output();
+                    string output_node_id =
+                        visitor.get_registered_node_id(output.get_node_shared_ptr());
+                    uint64_t output_index = output.get_index();
+                    visitor.on_attribute("output_node", output_node_id);
+                    visitor.on_attribute("output_index", output_index);
+                    visitor.finish_structure();
+                }
+            }
+            {
+                // Add a sentinel
+                index.str("");
+                index << i++;
+                string id = index.str();
+                visitor.start_structure(id);
+                string input_node_id = AttributeVisitor::invalid_node_id;
+                visitor.on_attribute("input_node", input_node_id);
+                visitor.finish_structure();
+            }
+            visitor.finish_structure();
+        }
+        {
+            // Control dependencies
+            visitor.start_structure("control");
+            int64_t i = 0;
+            ostringstream index;
+            for (auto node : serialized_nodes)
+            {
+                for (auto control : node->get_control_dependencies())
+                {
+                    index.str("");
+                    index << i++;
+                    string id = index.str();
+                    visitor.start_structure(id);
+                    string node_id = visitor.get_registered_node_id(node);
+                    string dependency_id = visitor.get_registered_node_id(control);
+                    visitor.on_attribute("node", node_id);
+                    visitor.on_attribute("dependency", dependency_id);
+                    visitor.finish_structure();
+                }
+            }
+            {
+                // Add a sentinel
+                index.str("");
+                index << i++;
+                string id = index.str();
+                visitor.start_structure(id);
+                string node_id = AttributeVisitor::invalid_node_id;
+                visitor.on_attribute("node", node_id);
+                visitor.finish_structure();
+            }
+            visitor.finish_structure();
+        }
+    }
+    else
+    {
+        NodeVector deserialized_nodes;
+        {
+            // Read the graph
+            visitor.start_structure("nodes");
+            int64_t i = 0;
+            ostringstream index;
+            while (true)
+            {
+                index.str("");
+                index << i++;
+                string id = index.str();
+                shared_ptr<Node> node;
+                NodeAttributeAdapter adapter(node);
+                adapter.visit(visitor, id);
+                if (node)
+                {
+                    visitor.register_node(node);
+                    deserialized_nodes.push_back(node);
+                }
+                else
+                {
+                    break;
+                }
+            }
+            visitor.finish_structure();
+        }
+        {
+            visitor.start_structure("edges");
+            // Connect the nodes
+            int64_t i = 0;
+            ostringstream index;
+            bool more_edges = true;
+            while (more_edges)
+            {
+                index.str("");
+                index << i++;
+                string id = index.str();
+                visitor.start_structure(id);
+                string input_node_id;
+                visitor.on_attribute("input_node", input_node_id);
+                if (!input_node_id.empty())
+                {
+                    shared_ptr<Node> input_node = visitor.get_registered_node(input_node_id);
+                    NGRAPH_CHECK(input_node, "input node of edge not known");
+                    uint64_t input_index;
+                    string output_node_id;
+                    uint64_t output_index;
+                    visitor.on_attribute("input_index", input_index);
+                    visitor.on_attribute("output_node", output_node_id);
+                    visitor.on_attribute("output_index", output_index);
+                    shared_ptr<Node> output_node = visitor.get_registered_node(output_node_id);
+                    NGRAPH_CHECK(output_node, "output_node of edge not known");
+                    input_node->set_argument(input_index, output_node->output(output_index));
+                }
+                else
+                {
+                    more_edges = false;
+                }
+                visitor.finish_structure();
+            }
+            visitor.finish_structure();
+        }
+        {
+            // Control dependencies
+            visitor.start_structure("control");
+            int64_t i = 0;
+            ostringstream index;
+            bool more_control = true;
+            while (more_control)
+            {
+                index.str("");
+                index << i++;
+                string id = index.str();
+                visitor.start_structure(id);
+                string node_id;
+                visitor.on_attribute("node", node_id);
+                if (!node_id.empty())
+                {
+                    shared_ptr<Node> node = visitor.get_registered_node(node_id);
+                    NGRAPH_CHECK(node, "node of control edge not known");
+                    string dependency_id;
+                    visitor.on_attribute("dependency", dependency_id);
+                    shared_ptr<Node> dependency = visitor.get_registered_node(dependency_id);
+                    NGRAPH_CHECK(dependency, "dependency of control edge not known");
+                    node->add_control_dependency(dependency);
+                }
+                else
+                {
+                    more_control = false;
+                }
+                visitor.finish_structure();
+            }
+            visitor.finish_structure();
+        }
+        for (auto node : topological_sort(deserialized_nodes))
+        {
+            node->validate_and_infer_types();
+        }
+    }
+
+    {
+        // Finally visit the object attributes
+        visitor.start_structure("value");
+        m_ref->visit_attributes(visitor);
+        visitor.finish_structure();
+    }
+    return true;
+}
diff --git a/ngraph/src/ngraph/lambda.hpp b/ngraph/src/ngraph/lambda.hpp
index 10c37362bf3..317e273bec3 100644
--- a/ngraph/src/ngraph/lambda.hpp
+++ b/ngraph/src/ngraph/lambda.hpp
@@ -26,9 +26,11 @@ namespace ngraph
     class NGRAPH_API Lambda
     {
     public:
+        virtual ~Lambda() {}
         static constexpr DiscreteTypeInfo type_info{"Lamdba", 0};
         const DiscreteTypeInfo& get_type_info() const { return type_info; }
         /// Return the function parameters
+        virtual bool visit_attributes(AttributeVisitor& visitor);
         const ParameterVector& get_parameters() const { return m_parameters; };
         /// Index for parameter, or -1
         int64_t get_parameter_index(const std::shared_ptr<op::Parameter>& parameter) const;
@@ -43,10 +45,25 @@ namespace ngraph
                       const HostTensorVector& input_tensors);
 
     protected:
+        Lambda() = default;
         Lambda(const ResultVector& results, const ParameterVector& parameters);
         Lambda(const OutputVector& results, const ParameterVector& parameters);
 
         ResultVector m_results;
         ParameterVector m_parameters;
     };
+
+    template <>
+    class NGRAPH_API AttributeAdapter<std::shared_ptr<Lambda>> : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(std::shared_ptr<Lambda>& ref);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<shared_ptr<Lambda>>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        std::shared_ptr<Lambda>& m_ref;
+    };
 }
diff --git a/ngraph/src/ngraph/node.cpp b/ngraph/src/ngraph/node.cpp
index 7f55a128d65..150618f4906 100644
--- a/ngraph/src/ngraph/node.cpp
+++ b/ngraph/src/ngraph/node.cpp
@@ -1144,3 +1144,56 @@ bool Node::constant_fold(OutputVector& output_values, const OutputVector& input_
     }
     return false;
 }
+
+constexpr DiscreteTypeInfo AttributeAdapter<shared_ptr<Node>>::type_info;
+
+AttributeAdapter<std::shared_ptr<Node>>::AttributeAdapter(std::shared_ptr<Node>& value)
+    : m_ref(value)
+{
+}
+
+bool AttributeAdapter<std::shared_ptr<Node>>::visit_attributes(AttributeVisitor& visitor)
+{
+    auto original_id = visitor.get_registered_node_id(m_ref);
+    auto id = original_id;
+    visitor.on_attribute("ID", id);
+    if (id != original_id)
+    {
+        m_ref = visitor.get_registered_node(id);
+    }
+    return true;
+}
+
+constexpr DiscreteTypeInfo AttributeAdapter<NodeVector>::type_info;
+
+AttributeAdapter<NodeVector>::AttributeAdapter(NodeVector& ref)
+    : m_ref(ref)
+{
+}
+
+bool AttributeAdapter<NodeVector>::visit_attributes(AttributeVisitor& visitor)
+{
+    int64_t size = m_ref.size();
+    visitor.on_attribute("size", size);
+    if (size != m_ref.size())
+    {
+        m_ref.resize(size);
+    }
+    ostringstream index;
+    for (int64_t i = 0; i < size; i++)
+    {
+        index.str("");
+        index << i;
+        string id;
+        if (m_ref[i])
+        {
+            id = visitor.get_registered_node_id(m_ref[i]);
+        }
+        visitor.on_attribute(index.str(), id);
+        if (!m_ref[i])
+        {
+            m_ref[i] = visitor.get_registered_node(id);
+        }
+    }
+    return true;
+}
diff --git a/ngraph/src/ngraph/node.hpp b/ngraph/src/ngraph/node.hpp
index 46031b45c10..e2bcea960c9 100644
--- a/ngraph/src/ngraph/node.hpp
+++ b/ngraph/src/ngraph/node.hpp
@@ -637,6 +637,34 @@ namespace ngraph
         bool operator>=(const RawNodeOutput& other) const { return !(*this < other); }
     };
 
+    /// \brief Visits a reference to a node that has been registered with the visitor.
+    template <>
+    class NGRAPH_API AttributeAdapter<std::shared_ptr<Node>> : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(std::shared_ptr<Node>& value);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<std::shared_ptr<Node>>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        std::shared_ptr<Node>& m_ref;
+    };
+
+    template <>
+    class NGRAPH_API AttributeAdapter<NodeVector> : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(NodeVector& ref);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<NodeVector>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        NodeVector& m_ref;
+    };
+
     using RawNodeOutputMap = std::map<RawNodeOutput, Output<Node>>;
 
     class NGRAPH_API NodeValidationFailure : public CheckFailure
diff --git a/ngraph/src/ngraph/op/constant.cpp b/ngraph/src/ngraph/op/constant.cpp
index 24addf48593..33dd1f9facc 100644
--- a/ngraph/src/ngraph/op/constant.cpp
+++ b/ngraph/src/ngraph/op/constant.cpp
@@ -338,26 +338,26 @@ string op::Constant::convert_value_to_string(size_t index) const
 #endif
     switch (get_element_type())
     {
-    case element::Type_t::boolean: rc = to_string(get_vector<char>()[index]); break;
+    case element::Type_t::boolean: rc = to_string(get_data_ptr<char>()[index]); break;
     case element::Type_t::bf16:
-        rc = to_cpp_string(static_cast<float>(get_vector<bfloat16>()[index]));
+        rc = to_cpp_string(static_cast<float>(get_data_ptr<bfloat16>()[index]));
         break;
     case element::Type_t::f16:
-        rc = to_cpp_string(static_cast<float>(get_vector<float16>()[index]));
+        rc = to_cpp_string(static_cast<float>(get_data_ptr<float16>()[index]));
         break;
-    case element::Type_t::f32: rc = to_cpp_string(get_vector<float>()[index]); break;
-    case element::Type_t::f64: rc = to_cpp_string(get_vector<double>()[index]); break;
-    case element::Type_t::i8: rc = to_string(get_vector<int8_t>()[index]); break;
-    case element::Type_t::i16: rc = to_string(get_vector<int16_t>()[index]); break;
-    case element::Type_t::i32: rc = to_string(get_vector<int32_t>()[index]); break;
-    case element::Type_t::i64: rc = to_string(get_vector<int64_t>()[index]); break;
+    case element::Type_t::f32: rc = to_cpp_string(get_data_ptr<float>()[index]); break;
+    case element::Type_t::f64: rc = to_cpp_string(get_data_ptr<double>()[index]); break;
+    case element::Type_t::i8: rc = to_string(get_data_ptr<int8_t>()[index]); break;
+    case element::Type_t::i16: rc = to_string(get_data_ptr<int16_t>()[index]); break;
+    case element::Type_t::i32: rc = to_string(get_data_ptr<int32_t>()[index]); break;
+    case element::Type_t::i64: rc = to_string(get_data_ptr<int64_t>()[index]); break;
     case element::Type_t::u1:
-        rc = to_string((get_vector<uint8_t>()[index / 8] >> (7 - (index % 8))) & 1);
+        rc = to_string((get_data_ptr<uint8_t>()[index / 8] >> (7 - (index % 8))) & 1);
         break;
-    case element::Type_t::u8: rc = to_string(get_vector<uint8_t>()[index]); break;
-    case element::Type_t::u16: rc = to_string(get_vector<uint16_t>()[index]); break;
-    case element::Type_t::u32: rc = to_string(get_vector<uint32_t>()[index]); break;
-    case element::Type_t::u64: rc = to_string(get_vector<uint64_t>()[index]); break;
+    case element::Type_t::u8: rc = to_string(get_data_ptr<uint8_t>()[index]); break;
+    case element::Type_t::u16: rc = to_string(get_data_ptr<uint16_t>()[index]); break;
+    case element::Type_t::u32: rc = to_string(get_data_ptr<uint32_t>()[index]); break;
+    case element::Type_t::u64: rc = to_string(get_data_ptr<uint64_t>()[index]); break;
     case element::Type_t::undefined: throw runtime_error("unsupported type");
     case element::Type_t::dynamic: throw runtime_error("unsupported type");
     }
@@ -623,7 +623,7 @@ bool op::v0::Constant::visit_attributes(AttributeVisitor& visitor)
         // Filling in a fresh constant
         allocate_buffer();
     }
-    visitor.on_attribute("value", get_data_ptr_nc(), shape_size(m_shape) * m_element_type.size());
+    visitor.on_attribute("value", m_data);
     return true;
 }
 
diff --git a/ngraph/src/ngraph/op/constant.hpp b/ngraph/src/ngraph/op/constant.hpp
index 23cb06eb42f..cb7c6939931 100644
--- a/ngraph/src/ngraph/op/constant.hpp
+++ b/ngraph/src/ngraph/op/constant.hpp
@@ -323,18 +323,8 @@ namespace ngraph
                 template <typename T>
                 std::vector<T> get_vector() const
                 {
-                    if (sizeof(T) > m_element_type.size() && shape_size(m_shape) > 0)
-                    {
-                        throw ngraph_error("Buffer over-read");
-                    }
-
-                    std::vector<T> rc;
-                    const T* p = static_cast<const T*>(get_data_ptr());
-                    for (size_t i = 0; i < shape_size(m_shape); i++)
-                    {
-                        rc.push_back(p[i]);
-                    }
-                    return rc;
+                    const T* p = get_data_ptr<T>();
+                    return std::vector<T>(p, p + shape_size(m_shape));
                 }
 
                 /// \brief Return the Constant's value as a vector cast to type T
@@ -435,6 +425,11 @@ namespace ngraph
                 template <typename T>
                 const T* get_data_ptr() const
                 {
+                    if (sizeof(T) > m_element_type.size() && shape_size(m_shape) > 0)
+                    {
+                        throw ngraph_error("Buffer over-read");
+                    }
+
                     return static_cast<const T*>(get_data_ptr());
                 }
 
diff --git a/ngraph/src/ngraph/op/fused/shuffle_channels.cpp b/ngraph/src/ngraph/op/fused/shuffle_channels.cpp
index cceab1c9bd5..34d345f5a40 100644
--- a/ngraph/src/ngraph/op/fused/shuffle_channels.cpp
+++ b/ngraph/src/ngraph/op/fused/shuffle_channels.cpp
@@ -23,10 +23,12 @@ using namespace ngraph;
 
 constexpr NodeTypeInfo op::ShuffleChannels::type_info;
 
-op::ShuffleChannels::ShuffleChannels(const Output<Node>& data, const int axis, const size_t groups)
+op::ShuffleChannels::ShuffleChannels(const Output<Node>& data,
+                                     const int64_t axis,
+                                     const int64_t group)
     : FusedOp({data})
     , m_axis(axis)
-    , m_groups{groups}
+    , m_group{group}
 {
     constructor_validate_and_infer_types();
 }
@@ -34,7 +36,7 @@ op::ShuffleChannels::ShuffleChannels(const Output<Node>& data, const int axis, c
 bool ngraph::op::v0::ShuffleChannels::visit_attributes(AttributeVisitor& visitor)
 {
     visitor.on_attribute("axis", m_axis);
-    visitor.on_attribute("groups", m_groups);
+    visitor.on_attribute("group", m_group);
     return true;
 }
 
@@ -72,10 +74,13 @@ void op::ShuffleChannels::pre_validate_and_infer_types()
                               "The 'axis' parameter for ShuffleChannels has to point to one of the "
                               "input tensor's shape dimensions.");
 
+        NODE_VALIDATION_CHECK(
+            this, m_group >= 1, "The 'group' parameter must be greater or equal to 1.");
+
         const auto channel_dim_size = shape.at(axis_zb);
         NODE_VALIDATION_CHECK(
             this,
-            channel_dim_size % m_groups == 0,
+            channel_dim_size % m_group == 0,
             "The channel dimension size has to be a multiple of the groups parameter value.");
     }
 }
@@ -99,7 +104,7 @@ shared_ptr<Node> op::ShuffleChannels::clone_with_new_inputs(const OutputVector&
                            std::to_string(new_args.size()));
     }
 
-    return make_shared<ShuffleChannels>(new_args.at(0), m_axis, m_groups);
+    return make_shared<ShuffleChannels>(new_args.at(0), m_axis, m_group);
 }
 
 Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const
@@ -108,8 +113,8 @@ Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const
 
     // in general the resulting shape should contain the following values:
     // [0]: ds[0] * ds[1] * ... * ds[m_axis-1] (or 1 if m_axis == 0)
-    // [1]: m_groups
-    // [2]: ds[axis] / m_groups
+    // [1]: m_group
+    // [2]: ds[axis] / m_group
     // [3]: ds[axis+1] * ds[axis+2] * ... * ds[ds.size()-1] (or 1 if m_axis points to the last elem
     //                                                       of ds)
     Shape res(4, 1);
@@ -120,8 +125,8 @@ Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const
         res[0] *= ds[i];
     }
 
-    res[1] = m_groups;
-    res[2] = ds[axis_zb] / m_groups;
+    res[1] = m_group;
+    res[2] = ds[axis_zb] / m_group;
 
     for (size_t i = axis_zb + 1; i < ds.size(); ++i)
     {
diff --git a/ngraph/src/ngraph/op/fused/shuffle_channels.hpp b/ngraph/src/ngraph/op/fused/shuffle_channels.hpp
index 47c281de310..33995993810 100644
--- a/ngraph/src/ngraph/op/fused/shuffle_channels.hpp
+++ b/ngraph/src/ngraph/op/fused/shuffle_channels.hpp
@@ -41,12 +41,12 @@ namespace ngraph
                 ///               that the index should be calculated from the back of the input
                 ///               data
                 ///               shape.
-                /// \param groups - number of groups the channel dimension specified by axis should
+                /// \param group - number of group the channel dimension specified by axis should
                 /// be
                 ///                 split into
                 ShuffleChannels(const Output<Node>& data,
-                                const int axis = 1,
-                                const size_t groups = 1UL);
+                                const int64_t axis = 1,
+                                const int64_t group = 1);
 
                 bool visit_attributes(AttributeVisitor& visitor) override;
                 size_t get_zero_based_axis() const;
@@ -58,8 +58,8 @@ namespace ngraph
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
 
-                int get_axis() const { return m_axis; }
-                size_t get_groups() const { return m_groups; }
+                int64_t get_axis() const { return m_axis; }
+                int64_t get_group() const { return m_group; }
             private:
                 /// \brief Generates a shape required to permute the data
                 ///
@@ -67,8 +67,8 @@ namespace ngraph
                 /// \return A 4D tensor to be used to reshape the input data before shuffling it
                 Shape get_pre_shuffle_shape(const Shape& data_shape) const;
 
-                int m_axis;
-                size_t m_groups;
+                int64_t m_axis;
+                int64_t m_group;
             };
         }
         using v0::ShuffleChannels;
diff --git a/ngraph/src/ngraph/op/parameter.cpp b/ngraph/src/ngraph/op/parameter.cpp
index fba520da27d..1594e873d58 100644
--- a/ngraph/src/ngraph/op/parameter.cpp
+++ b/ngraph/src/ngraph/op/parameter.cpp
@@ -70,3 +70,37 @@ void op::Parameter::set_is_relevant_to_shapes(bool is_relevant)
 {
     m_is_relevant_to_shapes = is_relevant;
 }
+
+constexpr DiscreteTypeInfo AttributeAdapter<ParameterVector>::type_info;
+
+AttributeAdapter<ParameterVector>::AttributeAdapter(ParameterVector& ref)
+    : m_ref(ref)
+{
+}
+
+bool AttributeAdapter<ParameterVector>::visit_attributes(AttributeVisitor& visitor)
+{
+    int64_t size = m_ref.size();
+    visitor.on_attribute("size", size);
+    if (size != m_ref.size())
+    {
+        m_ref.resize(size);
+    }
+    ostringstream index;
+    for (int64_t i = 0; i < size; i++)
+    {
+        index.str("");
+        index << i;
+        string id;
+        if (m_ref[i])
+        {
+            id = visitor.get_registered_node_id(m_ref[i]);
+        }
+        visitor.on_attribute(index.str(), id);
+        if (!m_ref[i])
+        {
+            m_ref[i] = as_type_ptr<op::v0::Parameter>(visitor.get_registered_node(id));
+        }
+    }
+    return true;
+}
diff --git a/ngraph/src/ngraph/op/parameter.hpp b/ngraph/src/ngraph/op/parameter.hpp
index 5c942499b6c..dbfd627f080 100644
--- a/ngraph/src/ngraph/op/parameter.hpp
+++ b/ngraph/src/ngraph/op/parameter.hpp
@@ -84,4 +84,18 @@ namespace ngraph
         using v0::Parameter;
     }
     using ParameterVector = std::vector<std::shared_ptr<op::Parameter>>;
+
+    template <>
+    class NGRAPH_API AttributeAdapter<ParameterVector> : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(ParameterVector& ref);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<ParameterVector>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        ParameterVector& m_ref;
+    };
 }
diff --git a/ngraph/src/ngraph/op/reshape.cpp b/ngraph/src/ngraph/op/reshape.cpp
index e089f85ff9d..5c330cdcb6b 100644
--- a/ngraph/src/ngraph/op/reshape.cpp
+++ b/ngraph/src/ngraph/op/reshape.cpp
@@ -183,6 +183,13 @@ shared_ptr<Node> op::Reshape::clone_with_new_inputs(const OutputVector& new_args
     return make_shared<Reshape>(new_args.at(0), m_input_order, m_output_shape);
 }
 
+bool op::Reshape::visit_attributes(AttributeVisitor& visitor)
+{
+    visitor.on_attribute("input_order", m_input_order);
+    visitor.on_attribute("output_shape", m_output_shape);
+    return true;
+}
+
 void op::Reshape::generate_adjoints(autodiff::Adjoints& adjoints, const OutputVector& deltas)
 {
     auto delta = deltas.at(0);
@@ -244,6 +251,7 @@ void op::v1::Reshape::validate_and_infer_types()
         this, pattern_et.is_integral_number(), "Pattern must be an integral number.");
 
     // check shapes
+    const PartialShape& input_pshape = get_input_partial_shape(0);
     const PartialShape& pattern_shape = get_input_partial_shape(1);
     NODE_VALIDATION_CHECK(this,
                           pattern_shape.rank().compatible(1),
@@ -290,22 +298,33 @@ void op::v1::Reshape::validate_and_infer_types()
         else
         {
             std::vector<Dimension> partial_shape(output_rank.get_length());
-            // Replace zeros and negatives with Dynamic dimensions as needed
-            std::transform(out_shape_val.begin(),
-                           out_shape_val.end(),
-                           partial_shape.begin(),
-                           [&](const int64_t& v) {
-                               return (v < 0) ? Dimension()
-                                              : ((v == 0 && m_special_zero) ? Dimension()
-                                                                            : Dimension(v));
-                           });
+            // Replace zeros with Dynamic dimensions as needed
+            for (size_t i = 0; i < out_shape_val.size(); ++i)
+            {
+                const auto& v = out_shape_val[i];
+                if (v < 0)
+                {
+                    partial_shape[i] = Dimension();
+                }
+                else if (v == 0 && m_special_zero)
+                {
+                    partial_shape[i] = ((input_pshape.rank().is_static() &&
+                                         input_pshape.rank().get_length() == out_shape_val.size())
+                                            ? input_pshape[i]
+                                            : Dimension());
+                }
+                else
+                {
+                    partial_shape[i] = Dimension(v);
+                }
+            }
 
-            if (get_input_partial_shape(0).is_static())
+            if (input_pshape.is_static())
             {
                 size_t output_elements = 1;
                 int negative_dim = -1;
 
-                auto input_shape = get_input_partial_shape(0).to_shape();
+                auto input_shape = input_pshape.to_shape();
                 size_t input_elements = shape_size(input_shape);
                 for (size_t i = 0; i < output_rank.get_length(); i++)
                 {
diff --git a/ngraph/src/ngraph/op/reshape.hpp b/ngraph/src/ngraph/op/reshape.hpp
index fdd827cca29..ef6d4350f3a 100644
--- a/ngraph/src/ngraph/op/reshape.hpp
+++ b/ngraph/src/ngraph/op/reshape.hpp
@@ -91,6 +91,7 @@ namespace ngraph
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
                 /// \return The order in which to iterate over input axes.
                 const AxisVector& get_input_order() const { return m_input_order; }
diff --git a/ngraph/src/ngraph/op/result.cpp b/ngraph/src/ngraph/op/result.cpp
index 7d4bbe61de4..5f4d492f06a 100644
--- a/ngraph/src/ngraph/op/result.cpp
+++ b/ngraph/src/ngraph/op/result.cpp
@@ -75,3 +75,37 @@ bool op::Result::constant_fold(OutputVector& output_values, const OutputVector&
 {
     return false;
 }
+
+constexpr DiscreteTypeInfo AttributeAdapter<ResultVector>::type_info;
+
+AttributeAdapter<ResultVector>::AttributeAdapter(ResultVector& ref)
+    : m_ref(ref)
+{
+}
+
+bool AttributeAdapter<ResultVector>::visit_attributes(AttributeVisitor& visitor)
+{
+    int64_t size = m_ref.size();
+    visitor.on_attribute("size", size);
+    if (size != m_ref.size())
+    {
+        m_ref.resize(size);
+    }
+    ostringstream index;
+    for (int64_t i = 0; i < size; i++)
+    {
+        index.str("");
+        index << i;
+        string id;
+        if (m_ref[i])
+        {
+            id = visitor.get_registered_node_id(m_ref[i]);
+        }
+        visitor.on_attribute(index.str(), id);
+        if (!m_ref[i])
+        {
+            m_ref[i] = as_type_ptr<op::v0::Result>(visitor.get_registered_node(id));
+        }
+    }
+    return true;
+}
diff --git a/ngraph/src/ngraph/op/result.hpp b/ngraph/src/ngraph/op/result.hpp
index c8ec098cb0e..63954183bf9 100644
--- a/ngraph/src/ngraph/op/result.hpp
+++ b/ngraph/src/ngraph/op/result.hpp
@@ -64,4 +64,18 @@ namespace ngraph
         using v0::Result;
     }
     using ResultVector = std::vector<std::shared_ptr<op::Result>>;
+
+    template <>
+    class NGRAPH_API AttributeAdapter<ResultVector> : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(ResultVector& ref);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<ResultVector>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        ResultVector& m_ref;
+    };
 }
diff --git a/ngraph/src/ngraph/op/split.cpp b/ngraph/src/ngraph/op/split.cpp
index f894b0aff21..4aedd3d3241 100644
--- a/ngraph/src/ngraph/op/split.cpp
+++ b/ngraph/src/ngraph/op/split.cpp
@@ -53,7 +53,7 @@ void op::v0::Split::pre_validate_and_infer_types()
     const auto axis_node = input_value(1).get_node_shared_ptr();
     NODE_VALIDATION_CHECK(this, axis_node->is_constant(), "The 'axis' input node must be constant");
     const auto axis_node_const = as_type_ptr<op::Constant>(axis_node);
-    m_axis = axis_node_const->cast_vector<int64_t>()[0];
+    m_axis = axis_node_const->get_data_ptr<int64_t>()[0];
 
     // Create dynamic-typed outputs. Actual shape/type will be computed during shape inference
     for (size_t i = 0; i < std::max(m_splits.size(), m_num_split); i++)
diff --git a/ngraph/src/ngraph/op/tensor_iterator.cpp b/ngraph/src/ngraph/op/tensor_iterator.cpp
index 0ed8269eec4..87325b62ec8 100644
--- a/ngraph/src/ngraph/op/tensor_iterator.cpp
+++ b/ngraph/src/ngraph/op/tensor_iterator.cpp
@@ -15,6 +15,7 @@
 //*****************************************************************************
 
 #include "ngraph/op/tensor_iterator.hpp"
+#include "ngraph/factory.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/pass/get_output_element_elimination.hpp"
 #include "ngraph/specialize_function.hpp"
@@ -22,93 +23,135 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::TensorIterator::type_info;
+constexpr NodeTypeInfo op::v0::TensorIterator::type_info;
 
-constexpr DiscreteTypeInfo op::TensorIterator::SliceInputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::MergedInputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::InvariantInputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::SliceInputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::MergedInputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::InvariantInputDescription::type_info;
 
-constexpr DiscreteTypeInfo op::TensorIterator::BodyOutputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::ConcatOutputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::BodyOutputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::ConcatOutputDescription::type_info;
 
-constexpr DiscreteTypeInfo op::TensorIterator::BodyLambda::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::BodyLambda::type_info;
 
-op::TensorIterator::TensorIterator(const OutputVector& values)
+bool op::v0::TensorIterator::BodyLambda::visit_attributes(AttributeVisitor& visitor)
+{
+    return true;
+}
+
+op::v0::TensorIterator::TensorIterator(const OutputVector& values)
     : op::util::FusedOp(values)
 {
 }
 
-op::TensorIterator::InputDescription::InputDescription(uint64_t input_index,
-                                                       uint64_t body_parameter_index)
+op::v0::TensorIterator::InputDescription::InputDescription(uint64_t input_index,
+                                                           uint64_t body_parameter_index)
     : m_input_index(input_index)
     , m_body_parameter_index(body_parameter_index)
 {
 }
 
-op::TensorIterator::SliceInputDescription::SliceInputDescription(uint64_t input_index,
-                                                                 uint64_t body_parameter_index,
-                                                                 int64_t start,
-                                                                 int64_t stride,
-                                                                 int64_t part_size,
-                                                                 int64_t end,
-                                                                 int64_t axis)
-    : InputDescription(input_index, body_parameter_index)
-    , m_start(start)
-    , m_stride(stride)
-    , m_part_size(part_size)
-    , m_end(end)
-    , m_axis(axis)
+bool op::v0::TensorIterator::InputDescription::visit_attributes(AttributeVisitor& visitor)
 {
+    visitor.on_attribute("input_index", m_input_index);
+    visitor.on_attribute("body_parameter_index", m_body_parameter_index);
+    return true;
 }
 
-shared_ptr<op::TensorIterator::InputDescription>
-    op::TensorIterator::SliceInputDescription::copy() const
-{
-    return make_shared<SliceInputDescription>(
-        m_input_index, m_body_parameter_index, m_start, m_stride, m_part_size, m_end, m_axis);
-}
-
-op::TensorIterator::MergedInputDescription::MergedInputDescription(uint64_t input_index,
-                                                                   uint64_t body_parameter_index,
-                                                                   uint64_t body_value_index)
-    : InputDescription(input_index, body_parameter_index)
-    , m_body_value_index(body_value_index)
-{
-}
-
-shared_ptr<op::TensorIterator::InputDescription>
-    op::TensorIterator::MergedInputDescription::copy() const
-{
-    return make_shared<MergedInputDescription>(
-        m_input_index, m_body_parameter_index, m_body_value_index);
-}
-
-op::TensorIterator::InvariantInputDescription::InvariantInputDescription(
-    uint64_t input_index, uint64_t body_parameter_index)
-    : InputDescription(input_index, body_parameter_index)
-{
-}
-
-shared_ptr<op::TensorIterator::InputDescription>
-    op::TensorIterator::InvariantInputDescription::copy() const
-{
-    return make_shared<InvariantInputDescription>(m_input_index, m_body_parameter_index);
-}
-
-op::TensorIterator::OutputDescription::OutputDescription(uint64_t body_value_index,
-                                                         uint64_t output_index)
-    : m_body_value_index(body_value_index)
-    , m_output_index(output_index)
-{
-}
-
-op::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t body_value_index,
-                                                                     uint64_t output_index,
+op::v0::TensorIterator::SliceInputDescription::SliceInputDescription(uint64_t input_index,
+                                                                     uint64_t body_parameter_index,
                                                                      int64_t start,
                                                                      int64_t stride,
                                                                      int64_t part_size,
                                                                      int64_t end,
                                                                      int64_t axis)
+    : InputDescription(input_index, body_parameter_index)
+    , m_start(start)
+    , m_stride(stride)
+    , m_part_size(part_size)
+    , m_end(end)
+    , m_axis(axis)
+{
+}
+
+shared_ptr<op::v0::TensorIterator::InputDescription>
+    op::v0::TensorIterator::SliceInputDescription::copy() const
+{
+    return make_shared<SliceInputDescription>(
+        m_input_index, m_body_parameter_index, m_start, m_stride, m_part_size, m_end, m_axis);
+}
+
+bool op::v0::TensorIterator::SliceInputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+    InputDescription::visit_attributes(visitor);
+    visitor.on_attribute("start", m_start);
+    visitor.on_attribute("stride", m_stride);
+    visitor.on_attribute("part_size", m_part_size);
+    visitor.on_attribute("end", m_end);
+    visitor.on_attribute("axis", m_axis);
+    return true;
+}
+
+op::v0::TensorIterator::MergedInputDescription::MergedInputDescription(
+    uint64_t input_index, uint64_t body_parameter_index, uint64_t body_value_index)
+    : InputDescription(input_index, body_parameter_index)
+    , m_body_value_index(body_value_index)
+{
+}
+
+shared_ptr<op::v0::TensorIterator::InputDescription>
+    op::v0::TensorIterator::MergedInputDescription::copy() const
+{
+    return make_shared<MergedInputDescription>(
+        m_input_index, m_body_parameter_index, m_body_value_index);
+}
+
+bool op::v0::TensorIterator::MergedInputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+    InputDescription::visit_attributes(visitor);
+    visitor.on_attribute("body_value_index", m_body_value_index);
+    return true;
+}
+
+op::v0::TensorIterator::InvariantInputDescription::InvariantInputDescription(
+    uint64_t input_index, uint64_t body_parameter_index)
+    : InputDescription(input_index, body_parameter_index)
+{
+}
+
+shared_ptr<op::v0::TensorIterator::InputDescription>
+    op::v0::TensorIterator::InvariantInputDescription::copy() const
+{
+    return make_shared<InvariantInputDescription>(m_input_index, m_body_parameter_index);
+}
+
+bool op::v0::TensorIterator::InvariantInputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+    InputDescription::visit_attributes(visitor);
+    return true;
+}
+
+op::v0::TensorIterator::OutputDescription::OutputDescription(uint64_t body_value_index,
+                                                             uint64_t output_index)
+    : m_body_value_index(body_value_index)
+    , m_output_index(output_index)
+{
+}
+
+bool op::v0::TensorIterator::OutputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+    visitor.on_attribute("body_value_index", m_body_value_index);
+    visitor.on_attribute("output_index", m_output_index);
+    return true;
+}
+
+op::v0::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t body_value_index,
+                                                                         uint64_t output_index,
+                                                                         int64_t start,
+                                                                         int64_t stride,
+                                                                         int64_t part_size,
+                                                                         int64_t end,
+                                                                         int64_t axis)
     : OutputDescription(body_value_index, output_index)
     , m_start(start)
     , m_stride(stride)
@@ -118,41 +161,180 @@ op::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t bo
 {
 }
 
-shared_ptr<op::TensorIterator::OutputDescription>
-    op::TensorIterator::ConcatOutputDescription::copy() const
+bool op::v0::TensorIterator::ConcatOutputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+    OutputDescription::visit_attributes(visitor);
+    visitor.on_attribute("start", m_start);
+    visitor.on_attribute("stride", m_stride);
+    visitor.on_attribute("part_size", m_part_size);
+    visitor.on_attribute("end", m_end);
+    visitor.on_attribute("axis", m_axis);
+    return true;
+}
+
+shared_ptr<op::v0::TensorIterator::OutputDescription>
+    op::v0::TensorIterator::ConcatOutputDescription::copy() const
 {
     return make_shared<ConcatOutputDescription>(
         m_body_value_index, m_output_index, m_start, m_stride, m_part_size, m_end, m_axis);
 }
 
-op::TensorIterator::BodyOutputDescription::BodyOutputDescription(uint64_t body_value_index,
-                                                                 uint64_t output_index,
-                                                                 int64_t iteration)
+op::v0::TensorIterator::BodyOutputDescription::BodyOutputDescription(uint64_t body_value_index,
+                                                                     uint64_t output_index,
+                                                                     int64_t iteration)
     : OutputDescription(body_value_index, output_index)
     , m_iteration(iteration)
 {
 }
 
-shared_ptr<op::TensorIterator::OutputDescription>
-    op::TensorIterator::BodyOutputDescription::copy() const
+shared_ptr<op::v0::TensorIterator::OutputDescription>
+    op::v0::TensorIterator::BodyOutputDescription::copy() const
 {
     return make_shared<BodyOutputDescription>(m_body_value_index, m_output_index, m_iteration);
 }
 
-Input<Node> op::TensorIterator::input_for_value(const Output<Node>& value)
+bool op::v0::TensorIterator::BodyOutputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+    OutputDescription::visit_attributes(visitor);
+    visitor.on_attribute("iteration", m_iteration);
+    return true;
+}
+
+namespace
+{
+}
+
+namespace ngraph
+{
+    template <>
+    FactoryRegistry<op::v0::TensorIterator::InputDescription>&
+        FactoryRegistry<op::v0::TensorIterator::InputDescription>::get()
+    {
+        static FactoryRegistry<op::v0::TensorIterator::InputDescription> registry;
+        static mutex init_guard;
+        if (registry.m_factory_map.size() == 0)
+        {
+            lock_guard<mutex> guard(init_guard);
+            if (registry.m_factory_map.size() == 0)
+            {
+                registry.register_factory<op::v0::TensorIterator::SliceInputDescription>();
+                registry.register_factory<op::v0::TensorIterator::MergedInputDescription>();
+                registry.register_factory<op::v0::TensorIterator::InvariantInputDescription>();
+            }
+        }
+        return registry;
+    }
+
+    constexpr DiscreteTypeInfo
+        AttributeAdapter<std::shared_ptr<op::TensorIterator::InputDescription>>::type_info;
+
+    constexpr DiscreteTypeInfo AttributeAdapter<
+        std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>::type_info;
+
+    AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>::
+        AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>& ref)
+        : m_ref(ref)
+    {
+    }
+
+    bool AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>::
+        visit_attributes(AttributeVisitor& visitor)
+    {
+        int64_t size = m_ref.size();
+        visitor.on_attribute("size", size);
+        if (size != m_ref.size())
+        {
+            m_ref.resize(size);
+        }
+        ostringstream index;
+        for (int64_t i = 0; i < size; i++)
+        {
+            index.str("");
+            index << i;
+            visitor.on_attribute(index.str(), m_ref[i]);
+        }
+        return true;
+    }
+
+    template <>
+    FactoryRegistry<op::v0::TensorIterator::OutputDescription>&
+        FactoryRegistry<op::v0::TensorIterator::OutputDescription>::get()
+    {
+        static FactoryRegistry<op::v0::TensorIterator::OutputDescription> registry;
+        static mutex init_guard;
+        // TODO: Add a lock
+        if (registry.m_factory_map.size() == 0)
+        {
+            lock_guard<mutex> guard(init_guard);
+            if (registry.m_factory_map.size() == 0)
+            {
+                registry.register_factory<op::v0::TensorIterator::ConcatOutputDescription>();
+                registry.register_factory<op::v0::TensorIterator::BodyOutputDescription>();
+            }
+        }
+        return registry;
+    }
+
+    constexpr DiscreteTypeInfo AttributeAdapter<
+        std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>::type_info;
+
+    constexpr DiscreteTypeInfo
+        AttributeAdapter<std::shared_ptr<op::TensorIterator::OutputDescription>>::type_info;
+
+    AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>::
+        AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>& ref)
+        : m_ref(ref)
+    {
+    }
+
+    bool AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>::
+        visit_attributes(AttributeVisitor& visitor)
+    {
+        int64_t size = m_ref.size();
+        visitor.on_attribute("size", size);
+        if (size != m_ref.size())
+        {
+            m_ref.resize(size);
+        }
+        ostringstream index;
+        for (int64_t i = 0; i < size; i++)
+        {
+            index.str("");
+            index << i;
+            visitor.on_attribute(index.str(), m_ref[i]);
+        }
+        return true;
+    }
+}
+
+bool op::v0::TensorIterator::visit_attributes(AttributeVisitor& visitor)
+{
+    if (!m_body)
+    {
+        m_body = make_shared<BodyLambda>();
+    }
+    shared_ptr<Lambda> lambda = m_body;
+    visitor.on_attribute("body", lambda);
+    visitor.on_attribute("input_descriptions", m_input_descriptions);
+    visitor.on_attribute("output_descriptions", m_output_descriptions);
+
+    return false;
+}
+
+Input<Node> op::v0::TensorIterator::input_for_value(const Output<Node>& value)
 {
     auto input_index = get_input_size();
     set_argument(input_index, value);
     return Input<Node>(this, input_index);
 }
 
-void op::TensorIterator::set_sliced_input(const std::shared_ptr<op::Parameter>& body_parameter,
-                                          const Output<Node>& value,
-                                          int64_t start,
-                                          int64_t stride,
-                                          int64_t part_size,
-                                          int64_t end,
-                                          int64_t axis)
+void op::v0::TensorIterator::set_sliced_input(const std::shared_ptr<op::Parameter>& body_parameter,
+                                              const Output<Node>& value,
+                                              int64_t start,
+                                              int64_t stride,
+                                              int64_t part_size,
+                                              int64_t end,
+                                              int64_t axis)
 {
     m_input_descriptions.push_back(
         make_shared<SliceInputDescription>(input_for_value(value).get_index(),
@@ -164,9 +346,9 @@ void op::TensorIterator::set_sliced_input(const std::shared_ptr<op::Parameter>&
                                            axis));
 }
 
-void op::TensorIterator::set_merged_input(const std::shared_ptr<Parameter>& body_parameter,
-                                          const Output<Node>& initial_value,
-                                          const Output<Node>& successive_value)
+void op::v0::TensorIterator::set_merged_input(const std::shared_ptr<Parameter>& body_parameter,
+                                              const Output<Node>& initial_value,
+                                              const Output<Node>& successive_value)
 {
     m_input_descriptions.push_back(
         make_shared<MergedInputDescription>(input_for_value(initial_value).get_index(),
@@ -174,14 +356,15 @@ void op::TensorIterator::set_merged_input(const std::shared_ptr<Parameter>& body
                                             m_body->get_result_index(successive_value)));
 }
 
-void op::TensorIterator::set_invariant_input(const std::shared_ptr<Parameter>& body_parameter,
-                                             const Output<Node>& value)
+void op::v0::TensorIterator::set_invariant_input(const std::shared_ptr<Parameter>& body_parameter,
+                                                 const Output<Node>& value)
 {
     m_input_descriptions.push_back(make_shared<InvariantInputDescription>(
         input_for_value(value).get_index(), m_body->get_parameter_index(body_parameter)));
 }
 
-Output<Node> op::TensorIterator::get_iter_value(const Output<Node>& body_value, int64_t iteration)
+Output<Node> op::v0::TensorIterator::get_iter_value(const Output<Node>& body_value,
+                                                    int64_t iteration)
 {
     auto output_index = get_output_size();
     m_output_descriptions.push_back(make_shared<BodyOutputDescription>(
@@ -190,12 +373,12 @@ Output<Node> op::TensorIterator::get_iter_value(const Output<Node>& body_value,
     return Output<Node>(shared_from_this(), output_index);
 }
 
-Output<Node> op::TensorIterator::get_concatenated_slices(const Output<Node>& body_value,
-                                                         int64_t start,
-                                                         int64_t stride,
-                                                         int64_t part_size,
-                                                         int64_t end,
-                                                         int64_t axis)
+Output<Node> op::v0::TensorIterator::get_concatenated_slices(const Output<Node>& body_value,
+                                                             int64_t start,
+                                                             int64_t stride,
+                                                             int64_t part_size,
+                                                             int64_t end,
+                                                             int64_t axis)
 {
     auto output_index = get_output_size();
     m_output_descriptions.push_back(make_shared<ConcatOutputDescription>(
@@ -204,13 +387,13 @@ Output<Node> op::TensorIterator::get_concatenated_slices(const Output<Node>& bod
     return Output<Node>(shared_from_this(), output_index);
 }
 
-NodeVector op::TensorIterator::decompose_op() const
+NodeVector op::v0::TensorIterator::decompose_op() const
 {
     // Stub
     return NodeVector{};
 }
 
-void op::TensorIterator::revalidate_and_infer_types_for_body_ops()
+void op::v0::TensorIterator::revalidate_and_infer_types_for_body_ops()
 {
     std::stack<std::shared_ptr<Node>, std::vector<std::shared_ptr<Node>>> nodes_to_do;
     std::unordered_set<std::shared_ptr<Node>> nodes_done;
@@ -224,7 +407,7 @@ void op::TensorIterator::revalidate_and_infer_types_for_body_ops()
         auto node = nodes_to_do.top();
         if (nodes_done.count(node) == 0)
         {
-            NGRAPH_CHECK(as_type_ptr<op::TensorIterator>(node) == nullptr,
+            NGRAPH_CHECK(as_type_ptr<op::v0::TensorIterator>(node) == nullptr,
                          "No nested TensorIterator");
             bool can_add = true;
             size_t arg_count = node->get_input_size();
@@ -254,7 +437,7 @@ void op::TensorIterator::revalidate_and_infer_types_for_body_ops()
     }
 }
 
-void op::TensorIterator::validate_and_infer_types()
+void op::v0::TensorIterator::validate_and_infer_types()
 {
     NODE_VALIDATION_CHECK(this,
                           get_input_size() == m_input_descriptions.size(),
@@ -429,9 +612,10 @@ void op::TensorIterator::validate_and_infer_types()
     }
 }
 
-std::shared_ptr<Node> op::TensorIterator::clone_with_new_inputs(const OutputVector& new_args) const
+std::shared_ptr<Node>
+    op::v0::TensorIterator::clone_with_new_inputs(const OutputVector& new_args) const
 {
-    auto op = make_shared<op::TensorIterator>(new_args);
+    auto op = make_shared<op::v0::TensorIterator>(new_args);
     op->set_output_size(m_output_descriptions.size());
 
     std::vector<::ngraph::element::Type> types(m_body->get_parameters().size());
@@ -451,7 +635,8 @@ std::shared_ptr<Node> op::TensorIterator::clone_with_new_inputs(const OutputVect
                 if (new_shapes[input_description->m_body_parameter_index].is_static())
                 {
                     if (auto slice_in = ::ngraph::as_type_ptr<
-                            ngraph::op::TensorIterator::SliceInputDescription>(input_description))
+                            ngraph::op::v0::TensorIterator::SliceInputDescription>(
+                            input_description))
                     {
                         new_shapes[slice_in->m_body_parameter_index][slice_in->m_axis] =
                             slice_in->m_part_size;
@@ -485,3 +670,7 @@ std::shared_ptr<Node> op::TensorIterator::clone_with_new_inputs(const OutputVect
     }
     return move(op);
 }
+
+namespace ngraph
+{
+}
diff --git a/ngraph/src/ngraph/op/tensor_iterator.hpp b/ngraph/src/ngraph/op/tensor_iterator.hpp
index b68410a36f8..5d418c22512 100644
--- a/ngraph/src/ngraph/op/tensor_iterator.hpp
+++ b/ngraph/src/ngraph/op/tensor_iterator.hpp
@@ -18,6 +18,7 @@
 
 #include <vector>
 
+#include "ngraph/factory_adapter.hpp"
 #include "ngraph/lambda.hpp"
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/op/util/fused_op.hpp"
@@ -34,6 +35,7 @@ namespace ngraph
             public:
                 static constexpr NodeTypeInfo type_info{"TensorIterator", 0};
                 const NodeTypeInfo& get_type_info() const override { return type_info; }
+                bool visit_attributes(AttributeVisitor& visitor) override;
                 // Forward declarations
                 class SliceInputDescription;
                 class MergedInputDescription;
@@ -45,8 +47,9 @@ namespace ngraph
                 class NGRAPH_API BodyLambda : public Lambda
                 {
                 public:
-                    static constexpr DiscreteTypeInfo type_info{"BodyLamdba", 0};
-                    const DiscreteTypeInfo& get_type_info() const { return type_info; }
+                    using type_info_t = DiscreteTypeInfo;
+                    static constexpr type_info_t type_info{"BodyLamdba", 0};
+                    const type_info_t& get_type_info() const { return type_info; }
                     BodyLambda(const OutputVector& outputs, const ParameterVector& parameters)
                         : Lambda(outputs, parameters)
                     {
@@ -55,6 +58,8 @@ namespace ngraph
                         : Lambda(results, parameters)
                     {
                     }
+                    BodyLambda() = default;
+                    virtual bool visit_attributes(AttributeVisitor& visitor);
                 };
 
                 /// \brief Describes a connection between a TensorIterator input and the body.
@@ -64,15 +69,18 @@ namespace ngraph
                     /// \param input_index Position of the TensorIterator input
                     /// \param body_parameter Body parameter to receive input
                     InputDescription(uint64_t input_index, uint64_t body_parameter_index);
+                    InputDescription() = default;
 
                 public:
+                    using type_info_t = DiscreteTypeInfo;
                     virtual ~InputDescription() {}
                     virtual std::shared_ptr<InputDescription> copy() const = 0;
 
-                    virtual const DiscreteTypeInfo& get_type_info() const = 0;
+                    virtual const type_info_t& get_type_info() const = 0;
+                    virtual bool visit_attributes(AttributeVisitor& visitor);
 
-                    uint64_t m_input_index;
-                    uint64_t m_body_parameter_index;
+                    uint64_t m_input_index{0};
+                    uint64_t m_body_parameter_index{0};
                 };
 
                 /// \brief Describes a body input formed from slices of an input to
@@ -80,8 +88,8 @@ namespace ngraph
                 class NGRAPH_API SliceInputDescription : public InputDescription
                 {
                 public:
-                    static constexpr DiscreteTypeInfo type_info{"SliceInputDescription", 0};
-                    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+                    static constexpr type_info_t type_info{"SliceInputDescription", 0};
+                    const type_info_t& get_type_info() const override { return type_info; }
                     /// \param input_index Position of the TensorIterator input
                     /// \param body_parameter_index Body parameter position to receive input
                     /// \param start First index for slices
@@ -96,13 +104,14 @@ namespace ngraph
                                           int64_t part_size,
                                           int64_t end,
                                           int64_t axis);
+                    SliceInputDescription() = default;
                     std::shared_ptr<InputDescription> copy() const override;
-
-                    int64_t m_start;
-                    int64_t m_stride;
-                    int64_t m_part_size;
-                    int64_t m_end;
-                    int64_t m_axis;
+                    bool visit_attributes(AttributeVisitor& visitor) override;
+                    int64_t m_start{0};
+                    int64_t m_stride{0};
+                    int64_t m_part_size{0};
+                    int64_t m_end{0};
+                    int64_t m_axis{0};
                 };
 
                 /// \brief Describes a body input initialized from a TensorIterator input on the
@@ -111,8 +120,8 @@ namespace ngraph
                 class NGRAPH_API MergedInputDescription : public InputDescription
                 {
                 public:
-                    static constexpr DiscreteTypeInfo type_info{"MergedInputDescription", 0};
-                    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+                    static constexpr type_info_t type_info{"MergedInputDescription", 0};
+                    const type_info_t& get_type_info() const override { return type_info; }
                     /// \param input_index Position of the TensorIterator input supplying a
                     /// value to
                     /// body_parameter
@@ -124,18 +133,21 @@ namespace ngraph
                     MergedInputDescription(uint64_t input_index,
                                            uint64_t body_parameter_index,
                                            uint64_t body_value_index);
+                    MergedInputDescription() = default;
                     std::shared_ptr<InputDescription> copy() const override;
-
-                    uint64_t m_body_value_index;
+                    bool visit_attributes(AttributeVisitor& visitor) override;
+                    uint64_t m_body_value_index{0};
                 };
 
                 class NGRAPH_API InvariantInputDescription : public InputDescription
                 {
                 public:
-                    static constexpr DiscreteTypeInfo type_info{"InvariantInputDescription", 0};
-                    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+                    static constexpr type_info_t type_info{"InvariantInputDescription", 0};
+                    const type_info_t& get_type_info() const override { return type_info; }
                     InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index);
+                    InvariantInputDescription() = default;
                     std::shared_ptr<InputDescription> copy() const override;
+                    bool visit_attributes(AttributeVisitor& visitor) override;
                 };
 
                 // Forward declarations
@@ -149,22 +161,25 @@ namespace ngraph
                     /// \param body_value_index A body value that produces the output
                     /// \param output_index The TensorIterator output index
                     OutputDescription(uint64_t body_value_index, uint64_t output_index);
+                    OutputDescription() = default;
 
                 public:
+                    using type_info_t = DiscreteTypeInfo;
                     virtual ~OutputDescription() {}
                     virtual std::shared_ptr<OutputDescription> copy() const = 0;
-                    virtual const DiscreteTypeInfo& get_type_info() const = 0;
+                    virtual bool visit_attributes(AttributeVisitor& visitor);
+                    virtual const type_info_t& get_type_info() const = 0;
 
-                    uint64_t m_body_value_index;
-                    uint64_t m_output_index;
+                    uint64_t m_body_value_index{0};
+                    uint64_t m_output_index{0};
                 };
 
                 /// \brief Produces an output by concatenating an output from each iteration
                 class NGRAPH_API ConcatOutputDescription : public OutputDescription
                 {
                 public:
-                    static constexpr DiscreteTypeInfo type_info{"ConcatOutputDescription", 0};
-                    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+                    static constexpr type_info_t type_info{"ConcatOutputDescription", 0};
+                    const type_info_t& get_type_info() const override { return type_info; }
                     /// \param body_value_index A body value that produces the output
                     /// \param output_index The TensorIterator output index
                     /// \param start First index for slices
@@ -179,22 +194,23 @@ namespace ngraph
                                             int64_t part_size,
                                             int64_t end,
                                             int64_t axis);
+                    ConcatOutputDescription() = default;
 
                     virtual std::shared_ptr<OutputDescription> copy() const override;
-
-                    int64_t m_start;
-                    int64_t m_stride;
-                    int64_t m_part_size;
-                    int64_t m_end;
-                    int64_t m_axis;
+                    bool visit_attributes(AttributeVisitor& visitor) override;
+                    int64_t m_start{0};
+                    int64_t m_stride{0};
+                    int64_t m_part_size{0};
+                    int64_t m_end{0};
+                    int64_t m_axis{0};
                 };
 
                 /// \brief Produces an output from a specific iteration
                 class NGRAPH_API BodyOutputDescription : public OutputDescription
                 {
                 public:
-                    static constexpr DiscreteTypeInfo type_info{"BodyOutputDescription", 0};
-                    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+                    static constexpr type_info_t type_info{"BodyOutputDescription", 0};
+                    const type_info_t& get_type_info() const override { return type_info; }
                     /// \param body_value_index A body value that produces the output
                     /// \param output_index The TensorIterator output index
                     /// \param iteration which iteration (typically -1, final) will supply the
@@ -202,9 +218,10 @@ namespace ngraph
                     BodyOutputDescription(uint64_t body_value_index,
                                           uint64_t output_index,
                                           int64_t iteration);
+                    BodyOutputDescription() = default;
                     std::shared_ptr<OutputDescription> copy() const override;
-
-                    int64_t m_iteration;
+                    bool visit_attributes(AttributeVisitor& visitor) override;
+                    int64_t m_iteration{0};
                 };
 
                 /// \brief Indicate that a body parameter comes from slices of a value
@@ -316,4 +333,69 @@ namespace ngraph
         }
         using v0::TensorIterator;
     }
+    template class NGRAPH_API FactoryRegistry<op::v0::TensorIterator::InputDescription>;
+
+    template <>
+    class NGRAPH_API AttributeAdapter<std::shared_ptr<op::TensorIterator::InputDescription>>
+        : public FactoryAttributeAdapter<op::TensorIterator::InputDescription>
+    {
+    public:
+        using FactoryAttributeAdapter::FactoryAttributeAdapter;
+        static constexpr DiscreteTypeInfo type_info{
+            "AttributeAdapter<std::shared_ptr<op::TensorIterator::InputDescription>>"
+            ">>",
+            0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    };
+
+    template <>
+    class NGRAPH_API
+        AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>
+        : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>& ref);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+        static constexpr DiscreteTypeInfo type_info{
+            "AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>"
+            ">>",
+            0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>& m_ref;
+    };
+
+    template class NGRAPH_API FactoryRegistry<op::v0::TensorIterator::OutputDescription>;
+
+    template <>
+    class NGRAPH_API AttributeAdapter<std::shared_ptr<op::TensorIterator::OutputDescription>>
+        : public FactoryAttributeAdapter<op::TensorIterator::OutputDescription>
+    {
+    public:
+        using FactoryAttributeAdapter::FactoryAttributeAdapter;
+        static constexpr DiscreteTypeInfo type_info{
+            "AttributeAdapter<std::shared_ptr<op::TensorIterator::OutputDescription>>"
+            ">>",
+            0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    };
+
+    template <>
+    class NGRAPH_API
+        AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>
+        : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>& ref);
+
+        bool visit_attributes(AttributeVisitor& visitor) override;
+        static constexpr DiscreteTypeInfo type_info{
+            "AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>"
+            ">>",
+            0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>& m_ref;
+    };
 }
diff --git a/ngraph/src/ngraph/op/util/attr_types.cpp b/ngraph/src/ngraph/op/util/attr_types.cpp
index 4d4efdbf848..14d8eec0af8 100644
--- a/ngraph/src/ngraph/op/util/attr_types.cpp
+++ b/ngraph/src/ngraph/op/util/attr_types.cpp
@@ -15,6 +15,7 @@
 //*****************************************************************************
 #include <map>
 
+#include "ngraph/attribute_visitor.hpp"
 #include "ngraph/check.hpp"
 #include "ngraph/enum_names.hpp"
 #include "ngraph/op/util/attr_types.hpp"
@@ -171,5 +172,35 @@ namespace ngraph
         return allowed_values.at(type);
     }
 
-    NGRAPH_API constexpr DiscreteTypeInfo AttributeAdapter<op::AutoBroadcastSpec>::type_info;
+    bool AttributeAdapter<op::AutoBroadcastSpec>::visit_attributes(AttributeVisitor& visitor)
+    {
+        // Maintain back-compatibility
+        std::string name = visitor.finish_structure();
+        visitor.on_attribute(name, m_ref.m_type);
+        visitor.start_structure(name);
+        if (m_ref.m_type == op::AutoBroadcastType::PDPD)
+        {
+            visitor.on_attribute("auto_broadcast_axis", m_ref.m_axis);
+        }
+        return true;
+    }
+
+    constexpr DiscreteTypeInfo AttributeAdapter<op::AutoBroadcastSpec>::type_info;
+
+    bool AttributeAdapter<op::BroadcastModeSpec>::visit_attributes(AttributeVisitor& visitor)
+    {
+        // Maintain back-compatibility
+        std::string name = visitor.finish_structure();
+        visitor.on_attribute(name, m_ref.m_type);
+        visitor.start_structure(name);
+        if (m_ref.m_type == op::BroadcastType::PDPD)
+        {
+            visitor.start_structure(name);
+            visitor.on_attribute("axis", m_ref.m_axis);
+            visitor.finish_structure();
+        }
+        return true;
+    }
+
+    constexpr DiscreteTypeInfo AttributeAdapter<op::BroadcastModeSpec>::type_info;
 }
diff --git a/ngraph/src/ngraph/op/util/attr_types.hpp b/ngraph/src/ngraph/op/util/attr_types.hpp
index 722ae18485b..3f265c7f1a6 100644
--- a/ngraph/src/ngraph/op/util/attr_types.hpp
+++ b/ngraph/src/ngraph/op/util/attr_types.hpp
@@ -263,7 +263,7 @@ namespace ngraph
     {
         enum class TopKSortType
         {
-            // Returned values are not sorted
+            // Returned values are not sorte
             NONE,
             // Sort result based on element indices
             SORT_INDICES,
@@ -355,17 +355,19 @@ namespace ngraph
     }
 
     template <>
-    class NGRAPH_API AttributeAdapter<op::AutoBroadcastSpec>
-        : public ValueReference<op::AutoBroadcastSpec>, public ValueAccessor<void>
+    class AttributeAdapter<op::AutoBroadcastSpec> : public VisitorAdapter
     {
     public:
         AttributeAdapter(op::AutoBroadcastSpec& value)
-            : ValueReference<op::AutoBroadcastSpec>(value)
+            : m_ref(value)
         {
         }
+        bool visit_attributes(AttributeVisitor& visitor) override;
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<op::AutoBroadcastSpec>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        op::AutoBroadcastSpec& m_ref;
     };
 
     namespace op
@@ -402,17 +404,20 @@ namespace ngraph
             }
         };
     }
+
     template <>
-    class NGRAPH_API AttributeAdapter<op::BroadcastModeSpec>
-        : public ValueReference<op::BroadcastModeSpec>, public ValueAccessor<void>
+    class AttributeAdapter<op::BroadcastModeSpec> : public VisitorAdapter
     {
     public:
         AttributeAdapter(op::BroadcastModeSpec& value)
-            : ValueReference<op::BroadcastModeSpec>(value)
+            : m_ref(value)
         {
         }
+        bool visit_attributes(AttributeVisitor& visitor) override;
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<op::BroadcastModeSpec>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        op::BroadcastModeSpec& m_ref;
     };
 }
diff --git a/ngraph/src/ngraph/op/util/broadcast_base.cpp b/ngraph/src/ngraph/op/util/broadcast_base.cpp
index 9addf848385..c3dd6c62d4e 100644
--- a/ngraph/src/ngraph/op/util/broadcast_base.cpp
+++ b/ngraph/src/ngraph/op/util/broadcast_base.cpp
@@ -138,14 +138,15 @@ void op::util::BroadcastBase::validate_and_infer_types()
     }
 
     PartialShape result_shape{PartialShape::dynamic()};
-
+    auto input_rank = input_value(0).get_partial_shape().rank();
+    auto output_rank = input_value(1).get_partial_shape();
+    if (input_rank.is_static() && output_rank.is_static() && output_rank[0].is_static())
+    {
+        result_shape = PartialShape::dynamic(std::max(input_rank.get_length(), output_rank[0].get_length()));
+    }
     const auto shape_constant = as_type_ptr<op::v0::Constant>(input_value(1).get_node_shared_ptr());
 
-    if (shape_constant)
-    {
-        result_shape = shape_constant->get_shape_val();
-    }
-    else if (auto concat = as_type_ptr<op::v0::Concat>(input_value(1).get_node_shared_ptr()))
+    if (auto concat = as_type_ptr<op::v0::Concat>(input_value(1).get_node_shared_ptr()))
     {
         auto concat_inputs = concat->inputs();
 
@@ -171,6 +172,10 @@ void op::util::BroadcastBase::validate_and_infer_types()
 
     if (m_mode.m_type == BroadcastType::NONE)
     {
+        if (shape_constant)
+        {
+            result_shape = shape_constant->get_shape_val();
+        }
         // Validate axes_mapping
         if (get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static() &&
             get_input_partial_shape(2).is_static())
diff --git a/ngraph/src/ngraph/opsets/opset3_tbl.hpp b/ngraph/src/ngraph/opsets/opset3_tbl.hpp
index 7b96a4c1319..2a18e253781 100644
--- a/ngraph/src/ngraph/opsets/opset3_tbl.hpp
+++ b/ngraph/src/ngraph/opsets/opset3_tbl.hpp
@@ -21,12 +21,9 @@
 
 NGRAPH_OP(Abs, ngraph::op::v0)
 NGRAPH_OP(Acos, ngraph::op::v0)
-NGRAPH_OP(Acosh, ngraph::op::v3)
 NGRAPH_OP(Add, ngraph::op::v1)
 NGRAPH_OP(Asin, ngraph::op::v0)
-NGRAPH_OP(Asinh, ngraph::op::v3)
 NGRAPH_OP(Atan, ngraph::op::v0)
-NGRAPH_OP(Atanh, ngraph::op::v3)
 NGRAPH_OP(AvgPool, ngraph::op::v1)
 NGRAPH_OP(BatchNormInference, ngraph::op::v0)
 NGRAPH_OP(BinaryConvolution, ngraph::op::v1)
@@ -65,7 +62,7 @@ NGRAPH_OP(GroupConvolution, ngraph::op::v1)
 NGRAPH_OP(GroupConvolutionBackpropData, ngraph::op::v1)
 NGRAPH_OP(GRN, ngraph::op::v0)
 NGRAPH_OP(HardSigmoid, ngraph::op::v0)
-NGRAPH_OP(Interpolate, ngraph::op::v3)
+NGRAPH_OP(Interpolate, ngraph::op::v0)
 NGRAPH_OP(Less, ngraph::op::v1)
 NGRAPH_OP(LessEqual, ngraph::op::v1)
 NGRAPH_OP(Log, ngraph::op::v0)
@@ -129,9 +126,6 @@ NGRAPH_OP(Selu, ngraph::op::v0)
 // Superseded
 // NGRAPH_OP(ShapeOf, ngraph::op::v0)
 
-// Moved out of opset2, it was added to opset1 by mistake
-// NGRAPH_OP(ShuffleChannels, ngraph::op::v0)
-
 NGRAPH_OP(Sign, ngraph::op::v0)
 NGRAPH_OP(Sigmoid, ngraph::op::v0)
 NGRAPH_OP(Sin, ngraph::op::v0)
@@ -170,7 +164,6 @@ NGRAPH_OP(RNNCell, ngraph::op::v0)
 NGRAPH_OP(ROIAlign, ngraph::op::v3)
 NGRAPH_OP(ScatterElementsUpdate, ngraph::op::v3)
 NGRAPH_OP(ScatterUpdate, ngraph::op::v3)
-NGRAPH_OP(ScatterNDUpdate, ngraph::op::v3)
 NGRAPH_OP(ShuffleChannels, ngraph::op::v0)
 NGRAPH_OP(ShapeOf, ngraph::op::v3)
 NGRAPH_OP(TopK, ngraph::op::v3)
diff --git a/ngraph/src/ngraph/partial_shape.cpp b/ngraph/src/ngraph/partial_shape.cpp
index 3433a5cccae..b3a049adfc0 100644
--- a/ngraph/src/ngraph/partial_shape.cpp
+++ b/ngraph/src/ngraph/partial_shape.cpp
@@ -438,4 +438,45 @@ Dimension& PartialShape::operator[](size_t i)
     return m_dimensions[i];
 }
 
+const std::vector<int64_t>& ngraph::AttributeAdapter<ngraph::PartialShape>::get()
+{
+    if (!m_buffer_valid)
+    {
+        m_buffer.clear();
+        if (m_ref.rank().is_dynamic())
+        {
+            m_buffer.push_back(-2);
+        }
+        else
+        {
+            for (size_t i = 0; i < m_ref.rank().get_length(); ++i)
+            {
+                auto& elt = m_ref[i];
+                m_buffer.push_back(elt.is_dynamic() ? -1 : elt.get_length());
+            }
+        }
+        m_buffer_valid = true;
+    }
+    return m_buffer;
+}
+
+void ngraph::AttributeAdapter<ngraph::PartialShape>::set(const std::vector<int64_t>& value)
+{
+    m_ref = PartialShape();
+    if (value.size() == 1 && value[0] == -2)
+    {
+        m_ref = PartialShape::dynamic();
+    }
+    else
+    {
+        std::vector<Dimension> dims;
+        for (auto elt : value)
+        {
+            dims.push_back(elt == -1 ? Dimension::dynamic() : elt);
+        }
+        m_ref = PartialShape(dims);
+    }
+    m_buffer_valid = false;
+}
+
 NGRAPH_API constexpr DiscreteTypeInfo AttributeAdapter<PartialShape>::type_info;
diff --git a/ngraph/src/ngraph/partial_shape.hpp b/ngraph/src/ngraph/partial_shape.hpp
index f4df92ce5c8..78f34d66c82 100644
--- a/ngraph/src/ngraph/partial_shape.hpp
+++ b/ngraph/src/ngraph/partial_shape.hpp
@@ -293,16 +293,22 @@ namespace ngraph
     std::ostream& operator<<(std::ostream& str, const PartialShape& shape);
 
     template <>
-    class NGRAPH_API AttributeAdapter<PartialShape> : public ValueReference<PartialShape>,
-                                                      public ValueAccessor<void>
+    class NGRAPH_API AttributeAdapter<PartialShape> : public ValueAccessor<std::vector<int64_t>>
     {
     public:
         AttributeAdapter(PartialShape& value)
-            : ValueReference<PartialShape>(value)
+            : m_ref(value)
         {
         }
 
+        const std::vector<int64_t>& get() override;
+        void set(const std::vector<int64_t>& value) override;
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<PartialShape>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+        operator PartialShape&() { return m_ref; }
+    protected:
+        PartialShape& m_ref;
+        std::vector<int64_t> m_buffer;
+        bool m_buffer_valid{false};
     };
 }
diff --git a/ngraph/src/ngraph/pass/algebraic_simplification.cpp b/ngraph/src/ngraph/pass/algebraic_simplification.cpp
index 56e247d2c3a..0a24c0b1ee2 100644
--- a/ngraph/src/ngraph/pass/algebraic_simplification.cpp
+++ b/ngraph/src/ngraph/pass/algebraic_simplification.cpp
@@ -576,7 +576,7 @@ template <typename T>
 static shared_ptr<Node>
     multiply_by(element::Type type, size_t multiplier, shared_ptr<op::Constant> cnst)
 {
-    T sum_cnst = static_cast<T>(cnst->get_vector<T>().at(0) * multiplier);
+    T sum_cnst = static_cast<T>(cnst->get_data_ptr<T>()[0] * multiplier);
     return op::Constant::create<T>(type, Shape{}, {sum_cnst});
 }
 
@@ -584,7 +584,7 @@ template <typename T>
 static shared_ptr<Node> pow_by(element::Type type, size_t multiplier, shared_ptr<op::Constant> cnst)
 {
     T prod = static_cast<T>(1);
-    T val = cnst->get_vector<T>().at(0);
+    T val = cnst->get_data_ptr<T>()[0];
     for (size_t i = 0; i < multiplier; i++)
     {
         prod *= val;
diff --git a/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp b/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp
index d5c061fa377..ddf4cee6910 100644
--- a/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp
@@ -43,63 +43,63 @@ static shared_ptr<op::Constant>
 
     if (auto max = as_type_ptr<op::Max>(reduction_node))
     {
-        runtime::reference::max<T>(constant->get_vector<T>().data(),
+        runtime::reference::max<T>(constant->get_data_ptr<T>(),
                                    data_ptr,
                                    constant->get_output_shape(0),
                                    max->get_reduction_axes());
     }
     else if (auto reduce_max = as_type_ptr<op::v1::ReduceMax>(reduction_node))
     {
-        runtime::reference::max<T>(constant->get_vector<T>().data(),
+        runtime::reference::max<T>(constant->get_data_ptr<T>(),
                                    data_ptr,
                                    constant->get_output_shape(0),
                                    reduce_max->get_reduction_axes());
     }
     else if (auto min = as_type_ptr<op::Min>(reduction_node))
     {
-        runtime::reference::min<T>(constant->get_vector<T>().data(),
+        runtime::reference::min<T>(constant->get_data_ptr<T>(),
                                    data_ptr,
                                    constant->get_output_shape(0),
                                    min->get_reduction_axes());
     }
     else if (auto reduce_min = as_type_ptr<op::v1::ReduceMin>(reduction_node))
     {
-        runtime::reference::min<T>(constant->get_vector<T>().data(),
+        runtime::reference::min<T>(constant->get_data_ptr<T>(),
                                    data_ptr,
                                    constant->get_output_shape(0),
                                    reduce_min->get_reduction_axes());
     }
     else if (auto prod = as_type_ptr<op::Product>(reduction_node))
     {
-        runtime::reference::product<T>(constant->get_vector<T>().data(),
+        runtime::reference::product<T>(constant->get_data_ptr<T>(),
                                        data_ptr,
                                        constant->get_output_shape(0),
                                        prod->get_reduction_axes());
     }
     else if (auto reduce_prod = as_type_ptr<op::v1::ReduceProd>(reduction_node))
     {
-        runtime::reference::product<T>(constant->get_vector<T>().data(),
+        runtime::reference::product<T>(constant->get_data_ptr<T>(),
                                        data_ptr,
                                        constant->get_output_shape(0),
                                        reduce_prod->get_reduction_axes());
     }
     else if (auto sum = as_type_ptr<op::Sum>(reduction_node))
     {
-        runtime::reference::sum<T>(constant->get_vector<T>().data(),
+        runtime::reference::sum<T>(constant->get_data_ptr<T>(),
                                    data_ptr,
                                    constant->get_output_shape(0),
                                    sum->get_reduction_axes());
     }
     else if (auto reduce_sum = as_type_ptr<op::v1::ReduceSum>(reduction_node))
     {
-        runtime::reference::sum<T>(constant->get_vector<T>().data(),
+        runtime::reference::sum<T>(constant->get_data_ptr<T>(),
                                    data_ptr,
                                    constant->get_output_shape(0),
                                    reduce_sum->get_reduction_axes());
     }
     else if (auto reduce_mean = as_type_ptr<op::v1::ReduceMean>(reduction_node))
     {
-        runtime::reference::mean<T>(constant->get_vector<T>().data(),
+        runtime::reference::mean<T>(constant->get_data_ptr<T>(),
                                     data_ptr,
                                     constant->get_output_shape(0),
                                     reduce_mean->get_reduction_axes());
diff --git a/ngraph/src/ngraph/pass/constant_folding_convert.cpp b/ngraph/src/ngraph/pass/constant_folding_convert.cpp
index ee1597c272f..f10b2f05cda 100644
--- a/ngraph/src/ngraph/pass/constant_folding_convert.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_convert.cpp
@@ -33,7 +33,7 @@ shared_ptr<op::Constant> fold_constant_convert_helper1(shared_ptr<op::Constant>
     TO* data_ptr = buffer.get_ptr<TO>();
 
     runtime::reference::convert<TI, TO>(
-        constant->get_vector<TI>().data(), data_ptr, shape_size(out_shape));
+        constant->get_data_ptr<TI>(), data_ptr, shape_size(out_shape));
 
     return make_shared<op::Constant>(output_element_type, out_shape, data_ptr);
 }
diff --git a/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp b/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp
index 7746a721926..4d6c75f2d6c 100644
--- a/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp
@@ -31,9 +31,9 @@ shared_ptr<op::Constant> fold_constant_dequantize(shared_ptr<op::Constant> const
     runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(REAL));
     REAL* data_ptr = buffer.get_ptr<REAL>();
 
-    runtime::reference::dequantize<QUANT, REAL>(constant->get_vector<QUANT>().data(),
-                                                scale->get_vector<REAL>().data(),
-                                                offset->get_vector<QUANT>().data(),
+    runtime::reference::dequantize<QUANT, REAL>(constant->get_data_ptr<QUANT>(),
+                                                scale->get_data_ptr<REAL>(),
+                                                offset->get_data_ptr<QUANT>(),
                                                 data_ptr,
                                                 constant->get_shape(),
                                                 scale->get_shape(),
diff --git a/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp b/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp
index 394a7a1e9c2..675262336af 100644
--- a/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp
@@ -48,7 +48,7 @@ static shared_ptr<op::Constant> fold_constant_logical_reduction(shared_ptr<op::C
 
     if (auto all = as_type_ptr<::ngraph::op::All>(reduction_node))
     {
-        runtime::reference::all(constant->get_vector<char>().data(),
+        runtime::reference::all(constant->get_data_ptr<char>(),
                                 data_ptr,
                                 constant->get_output_shape(0),
                                 reduction_node->get_shape(),
@@ -56,7 +56,7 @@ static shared_ptr<op::Constant> fold_constant_logical_reduction(shared_ptr<op::C
     }
     else if (auto any = as_type_ptr<::ngraph::op::Any>(reduction_node))
     {
-        runtime::reference::any(constant->get_vector<char>().data(),
+        runtime::reference::any(constant->get_data_ptr<char>(),
                                 data_ptr,
                                 constant->get_output_shape(0),
                                 reduction_node->get_shape(),
@@ -67,7 +67,7 @@ static shared_ptr<op::Constant> fold_constant_logical_reduction(shared_ptr<op::C
         const auto reduction_axes = reduce_and->get_reduction_axes();
         const auto input_shape = reduce_and->get_input_shape(0);
 
-        runtime::reference::all(constant->get_vector<char>().data(),
+        runtime::reference::all(constant->get_data_ptr<char>(),
                                 data_ptr,
                                 constant->get_output_shape(0),
                                 get_shape_no_keep_dims(reduction_axes, input_shape),
@@ -78,7 +78,7 @@ static shared_ptr<op::Constant> fold_constant_logical_reduction(shared_ptr<op::C
         const auto reduction_axes = reduce_or->get_reduction_axes();
         const auto input_shape = reduce_or->get_input_shape(0);
 
-        runtime::reference::any(constant->get_vector<char>().data(),
+        runtime::reference::any(constant->get_data_ptr<char>(),
                                 data_ptr,
                                 constant->get_output_shape(0),
                                 get_shape_no_keep_dims(reduction_axes, input_shape),
diff --git a/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp b/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp
index 183b8b9352f..35c57ee3b52 100644
--- a/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp
@@ -31,13 +31,14 @@ shared_ptr<op::Constant> fold_constant_one_hot_ref(const shared_ptr<op::Constant
                                                    size_t axis)
 {
     std::vector<OUTPUT_TYPE> out_vec(shape_size(output_shape));
-    runtime::reference::one_hot<INDICES_TYPE, OUTPUT_TYPE>(indices->get_data_ptr<INDICES_TYPE>(),
-                                                           out_vec.data(),
-                                                           indices->get_shape(),
-                                                           output_shape,
-                                                           axis,
-                                                           on_value->get_vector<OUTPUT_TYPE>()[0],
-                                                           off_value->get_vector<OUTPUT_TYPE>()[0]);
+    runtime::reference::one_hot<INDICES_TYPE, OUTPUT_TYPE>(
+        indices->get_data_ptr<INDICES_TYPE>(),
+        out_vec.data(),
+        indices->get_shape(),
+        output_shape,
+        axis,
+        on_value->get_data_ptr<OUTPUT_TYPE>()[0],
+        off_value->get_data_ptr<OUTPUT_TYPE>()[0]);
 
     return make_shared<op::Constant>(on_value->get_element_type(), output_shape, out_vec);
 }
diff --git a/ngraph/src/ngraph/pass/constant_folding_quantize.cpp b/ngraph/src/ngraph/pass/constant_folding_quantize.cpp
index d35279e1fb0..27117ee53ff 100644
--- a/ngraph/src/ngraph/pass/constant_folding_quantize.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_quantize.cpp
@@ -31,9 +31,9 @@ shared_ptr<op::Constant> fold_constant_quantize(shared_ptr<op::Constant> constan
     runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(QUANT));
     QUANT* data_ptr = buffer.get_ptr<QUANT>();
 
-    runtime::reference::quantize<REAL, QUANT>(constant->get_vector<REAL>().data(),
-                                              scale->get_vector<REAL>().data(),
-                                              offset->get_vector<QUANT>().data(),
+    runtime::reference::quantize<REAL, QUANT>(constant->get_data_ptr<REAL>(),
+                                              scale->get_data_ptr<REAL>(),
+                                              offset->get_data_ptr<QUANT>(),
                                               data_ptr,
                                               constant->get_shape(),
                                               scale->get_shape(),
diff --git a/ngraph/src/ngraph/pass/constant_folding_reverse.cpp b/ngraph/src/ngraph/pass/constant_folding_reverse.cpp
index 4fa2d4b0011..1695e4c9be5 100644
--- a/ngraph/src/ngraph/pass/constant_folding_reverse.cpp
+++ b/ngraph/src/ngraph/pass/constant_folding_reverse.cpp
@@ -30,7 +30,7 @@ static shared_ptr<op::Constant> fold_constant_reverse_helper(shared_ptr<op::Cons
     T* data_ptr = buffer.get_ptr<T>();
 
     runtime::reference::reverse<T>(
-        constant->get_vector<T>().data(), data_ptr, out_shape, out_shape, reversed_axes);
+        constant->get_data_ptr<T>(), data_ptr, out_shape, out_shape, reversed_axes);
 
     return make_shared<op::Constant>(constant->get_output_element_type(0), out_shape, data_ptr);
 }
diff --git a/ngraph/src/ngraph/runtime/aligned_buffer.cpp b/ngraph/src/ngraph/runtime/aligned_buffer.cpp
index 2da52e0e7fd..3ae127d82c2 100644
--- a/ngraph/src/ngraph/runtime/aligned_buffer.cpp
+++ b/ngraph/src/ngraph/runtime/aligned_buffer.cpp
@@ -76,3 +76,20 @@ runtime::AlignedBuffer& runtime::AlignedBuffer::operator=(AlignedBuffer&& other)
     }
     return *this;
 }
+
+namespace ngraph
+{
+    constexpr DiscreteTypeInfo AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::type_info;
+
+    AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::AttributeAdapter(
+        shared_ptr<runtime::AlignedBuffer>& value)
+        : m_ref(value)
+    {
+    }
+
+    void* AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::get_ptr()
+    {
+        return m_ref->get_ptr();
+    }
+    size_t AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::size() { return m_ref->size(); }
+}
diff --git a/ngraph/src/ngraph/runtime/aligned_buffer.hpp b/ngraph/src/ngraph/runtime/aligned_buffer.hpp
index 654ce3504b3..1766dc13507 100644
--- a/ngraph/src/ngraph/runtime/aligned_buffer.hpp
+++ b/ngraph/src/ngraph/runtime/aligned_buffer.hpp
@@ -74,3 +74,22 @@ private:
     char* m_aligned_buffer;
     size_t m_byte_size;
 };
+
+namespace ngraph
+{
+    template <>
+    class NGRAPH_API AttributeAdapter<std::shared_ptr<runtime::AlignedBuffer>>
+        : public ValueAccessor<void*>
+    {
+    public:
+        AttributeAdapter(std::shared_ptr<runtime::AlignedBuffer>& value);
+        void* get_ptr() override;
+        size_t size() override;
+
+        static constexpr DiscreteTypeInfo type_info{
+            "AttributeAdapter<std::shared_ptr<runtime::AlignedBuffer>>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        std::shared_ptr<runtime::AlignedBuffer>& m_ref;
+    };
+}
diff --git a/ngraph/src/ngraph/serializer.cpp b/ngraph/src/ngraph/serializer.cpp
index 80cdc74f51a..71a9d491155 100644
--- a/ngraph/src/ngraph/serializer.cpp
+++ b/ngraph/src/ngraph/serializer.cpp
@@ -112,22 +112,13 @@ public:
         : m_json(j)
     {
     }
-
-    void on_attribute(const std::string& name, std::string& value) override
-    {
-        m_json[name] = value;
-    }
-    void on_attribute(const std::string& name, bool& value) override { m_json[name] = value; }
     void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
     {
-        if (auto a = as_type<AttributeAdapter<element::Type>>(&adapter))
-        {
-            m_json[name] = write_element_type(static_cast<element::Type&>(*a));
-        }
-        else if (auto a = as_type<AttributeAdapter<PartialShape>>(&adapter))
-        {
-            m_json[name] = write_partial_shape(static_cast<PartialShape&>(*a));
-        }
+        NGRAPH_CHECK(false, "Adapter ", adapter.get_type_info().name, " is not handled");
+    }
+    void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
+    {
+        m_json[name] = adapter.get();
     }
     void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
     {
@@ -145,6 +136,10 @@ public:
     {
         m_json[name] = adapter.get();
     }
+    void on_adapter(const std::string& name, ValueAccessor<std::vector<uint64_t>>& adapter) override
+    {
+        m_json[name] = adapter.get();
+    }
     void on_adapter(const std::string& name, ValueAccessor<std::vector<float>>& adapter) override
     {
         m_json[name] = adapter.get();
@@ -198,34 +193,9 @@ public:
         : m_json(j)
     {
     }
-    void on_attribute(const std::string& name, std::string& value) override
-    {
-        if (has_key(m_json, name))
-        {
-            value = m_json.at(name).get<std::string>();
-        }
-    }
-    void on_attribute(const std::string& name, bool& value) override
-    {
-        if (has_key(m_json, name))
-        {
-            value = m_json.at(name).get<bool>();
-        }
-    }
     void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
     {
-        if (has_key(m_json, name))
-        {
-            if (auto a = as_type<AttributeAdapter<element::Type>>(&adapter))
-            {
-                static_cast<element::Type&>(*a) =
-                    read_element_type(m_json.at(name).get<std::string>());
-            }
-            else if (auto a = as_type<AttributeAdapter<PartialShape>>(&adapter))
-            {
-                static_cast<PartialShape&>(*a) = read_partial_shape(m_json.at(name));
-            }
-        }
+        NGRAPH_CHECK(false, "Adapter ", adapter.get_type_info().name, " is not handled");
     }
     void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
     {
@@ -234,6 +204,14 @@ public:
             adapter.set(m_json.at(name).get<std::string>());
         }
     }
+    void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
+    {
+        if (has_key(m_json, name))
+        {
+            adapter.set(m_json.at(name).get<bool>());
+        }
+    }
+
     void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter) override
     {
         if (has_key(m_json, name))
@@ -256,6 +234,13 @@ public:
             adapter.set(m_json.at(name).get<std::vector<int64_t>>());
         }
     }
+    void on_adapter(const std::string& name, ValueAccessor<std::vector<uint64_t>>& adapter) override
+    {
+        if (has_key(m_json, name))
+        {
+            adapter.set(m_json.at(name).get<std::vector<uint64_t>>());
+        }
+    }
     void on_adapter(const std::string& name, ValueAccessor<std::vector<float>>& adapter) override
     {
         if (has_key(m_json, name))
@@ -3604,7 +3589,7 @@ json JSONSerializer::serialize_node(const Node& n)
     {
         const auto tmp = static_cast<const op::ShuffleChannels*>(&n);
         node["axis"] = tmp->get_axis();
-        node["groups"] = tmp->get_groups();
+        node["groups"] = tmp->get_group();
         break;
     }
     case OP_TYPEID::Sigmoid: { break;
diff --git a/ngraph/src/ngraph/shape.cpp b/ngraph/src/ngraph/shape.cpp
index 6bed1c50310..4146f6068a5 100644
--- a/ngraph/src/ngraph/shape.cpp
+++ b/ngraph/src/ngraph/shape.cpp
@@ -69,20 +69,4 @@ ngraph::Shape& ngraph::Shape::operator=(Shape&& v) noexcept
     return *this;
 }
 
-const vector<int64_t>& AttributeAdapter<Shape>::get()
-{
-    if (!m_buffer_valid)
-    {
-        m_buffer = copy_from<vector<int64_t>>(m_value);
-        m_buffer_valid = true;
-    }
-    return m_buffer;
-}
-
-void AttributeAdapter<Shape>::set(const vector<int64_t>& value)
-{
-    m_value = copy_from<Shape>(value);
-    m_buffer_valid = false;
-}
-
 constexpr DiscreteTypeInfo AttributeAdapter<Shape>::type_info;
diff --git a/ngraph/src/ngraph/shape.hpp b/ngraph/src/ngraph/shape.hpp
index cb2ca6c8563..c0f23556f3c 100644
--- a/ngraph/src/ngraph/shape.hpp
+++ b/ngraph/src/ngraph/shape.hpp
@@ -53,18 +53,17 @@ namespace ngraph
     };
 
     template <>
-    class NGRAPH_API AttributeAdapter<Shape> : public ValueReference<Shape>,
-                                               public ValueAccessor<std::vector<int64_t>>
+    class NGRAPH_API AttributeAdapter<Shape>
+        : public IndirectVectorValueAccessor<Shape, std::vector<int64_t>>
+
     {
     public:
         AttributeAdapter(Shape& value)
-            : ValueReference<Shape>(value)
+            : IndirectVectorValueAccessor<Shape, std::vector<int64_t>>(value)
         {
         }
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Shape>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int64_t>& get() override;
-        void set(const std::vector<int64_t>& value) override;
     };
 
     /// Number of elements in spanned by a shape
diff --git a/ngraph/src/ngraph/strides.cpp b/ngraph/src/ngraph/strides.cpp
index 8f5bc542f4c..ef3bd837afa 100644
--- a/ngraph/src/ngraph/strides.cpp
+++ b/ngraph/src/ngraph/strides.cpp
@@ -65,20 +65,4 @@ ngraph::Strides& ngraph::Strides::operator=(Strides&& v) noexcept
     return *this;
 }
 
-const vector<int64_t>& AttributeAdapter<Strides>::get()
-{
-    if (!m_buffer_valid)
-    {
-        m_buffer = copy_from<vector<int64_t>>(m_value);
-        m_buffer_valid = true;
-    }
-    return m_buffer;
-}
-
-void AttributeAdapter<Strides>::set(const vector<int64_t>& value)
-{
-    m_value = copy_from<Strides>(value);
-    m_buffer_valid = false;
-}
-
 constexpr DiscreteTypeInfo AttributeAdapter<Strides>::type_info;
diff --git a/ngraph/src/ngraph/strides.hpp b/ngraph/src/ngraph/strides.hpp
index 6611d5e8230..2a2128595cf 100644
--- a/ngraph/src/ngraph/strides.hpp
+++ b/ngraph/src/ngraph/strides.hpp
@@ -51,18 +51,17 @@ namespace ngraph
     };
 
     template <>
-    class NGRAPH_API AttributeAdapter<Strides> : public ValueReference<Strides>,
-                                                 public ValueAccessor<std::vector<int64_t>>
+    class NGRAPH_API AttributeAdapter<Strides>
+        : public IndirectVectorValueAccessor<Strides, std::vector<int64_t>>
+
     {
     public:
         AttributeAdapter(Strides& value)
-            : ValueReference<Strides>(value)
+            : IndirectVectorValueAccessor<Strides, std::vector<int64_t>>(value)
         {
         }
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Strides>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-        const std::vector<int64_t>& get() override;
-        void set(const std::vector<int64_t>& value) override;
     };
 
     NGRAPH_API
diff --git a/ngraph/src/ngraph/type/element_type.cpp b/ngraph/src/ngraph/type/element_type.cpp
index a66a294b080..8904b517906 100644
--- a/ngraph/src/ngraph/type/element_type.cpp
+++ b/ngraph/src/ngraph/type/element_type.cpp
@@ -357,10 +357,10 @@ constexpr DiscreteTypeInfo AttributeAdapter<element::Type_t>::type_info;
 
 const std::string& AttributeAdapter<element::Type>::get()
 {
-    return as_string(static_cast<element::Type_t>(ValueReference<element::Type>::m_value));
+    return as_string(static_cast<element::Type_t>(m_ref));
 }
 
 void AttributeAdapter<element::Type>::set(const std::string& value)
 {
-    ValueReference<element::Type>::m_value = as_enum<element::Type_t>(value);
+    m_ref = as_enum<element::Type_t>(value);
 }
diff --git a/ngraph/src/ngraph/type/element_type.hpp b/ngraph/src/ngraph/type/element_type.hpp
index 3aa0bb78c01..63656292ed8 100644
--- a/ngraph/src/ngraph/type/element_type.hpp
+++ b/ngraph/src/ngraph/type/element_type.hpp
@@ -201,12 +201,11 @@ namespace ngraph
     };
 
     template <>
-    class NGRAPH_API AttributeAdapter<element::Type> : public ValueReference<element::Type>,
-                                                       public ValueAccessor<std::string>
+    class NGRAPH_API AttributeAdapter<element::Type> : public ValueAccessor<std::string>
     {
     public:
         AttributeAdapter(element::Type& value)
-            : ValueReference<element::Type>(value)
+            : m_ref(value)
         {
         }
 
@@ -215,6 +214,9 @@ namespace ngraph
 
         static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<element::Type>", 0};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+        operator element::Type&() { return m_ref; }
+    protected:
+        element::Type& m_ref;
     };
 
     /// \brief Return the number of bytes in the compile-time representation of the element type.
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index fedadfd6c56..5bf132c8d59 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -254,17 +254,16 @@ endif()
 set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
     NGRAPH_INCLUDES="${PROJECT_SOURCE_DIR}/src/ngraph")
 
-# if (NGRAPH_IE_ENABLE)
-#     if (ENABLE_MKL_DNN)
-#         message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
-#         set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
-#     endif()
-
-#     if (ENABLE_CLDNN)
-#         message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
-#         set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
-#     endif()
-# endif()
+if (NGRAPH_IE_ENABLE)
+    if (ENABLE_MKL_DNN)
+        message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
+        set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
+    endif()
+     if (ENABLE_CLDNN)
+        message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
+        set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
+    endif()
+endif()
 
 if (NGRAPH_INTERPRETER_ENABLE)
     list(APPEND SRC
diff --git a/ngraph/test/attributes.cpp b/ngraph/test/attributes.cpp
index 041c77beccd..709b7b5fcc7 100644
--- a/ngraph/test/attributes.cpp
+++ b/ngraph/test/attributes.cpp
@@ -21,8 +21,25 @@
 #include "ngraph/opsets/opset1.hpp"
 #include "ngraph/opsets/opset3.hpp"
 
+#include "util/visitor.hpp"
+
 using namespace std;
 using namespace ngraph;
+using ngraph::test::NodeBuilder;
+using ngraph::test::ValueMap;
+
+TEST(attributes, value_map)
+{
+    ValueMap value_map;
+    bool a = true;
+    int8_t b = 2;
+    value_map.insert("a", a);
+    value_map.insert("b", b);
+    bool g_a = value_map.get<bool>("a");
+    int8_t g_b = value_map.get<int8_t>("b");
+    EXPECT_EQ(a, g_a);
+    EXPECT_EQ(b, g_b);
+}
 
 enum class TuringModel
 {
@@ -54,7 +71,45 @@ namespace ngraph
     };
 
     constexpr DiscreteTypeInfo AttributeAdapter<TuringModel>::type_info;
-} // namespace ngraph
+
+    struct Position
+    {
+        float x;
+        float y;
+        float z;
+        bool operator==(const Position& p) const { return x == p.x && y == p.y && z == p.z; }
+        Position& operator=(const Position& p)
+        {
+            x = p.x;
+            y = p.y;
+            z = p.z;
+            return *this;
+        }
+    };
+
+    template <>
+    class AttributeAdapter<Position> : public VisitorAdapter
+    {
+    public:
+        AttributeAdapter(Position& value)
+            : m_ref(value)
+        {
+        }
+        bool visit_attributes(AttributeVisitor& visitor) override
+        {
+            visitor.on_attribute("x", m_ref.x);
+            visitor.on_attribute("y", m_ref.y);
+            visitor.on_attribute("z", m_ref.z);
+            return true;
+        }
+        static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Position>", 0};
+        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+    protected:
+        Position& m_ref;
+    };
+
+    constexpr DiscreteTypeInfo AttributeAdapter<Position>::type_info;
+}
 
 // Given a Turing machine program and data, return scalar 1 if the program would
 // complete, 1 if it would not.
@@ -78,6 +133,7 @@ public:
            int16_t val_int16_t,
            int32_t val_int32_t,
            int64_t val_int64_t,
+           size_t val_size_t,
            const std::vector<std::string>& vec_string,
            const std::vector<float>& vec_float,
            const std::vector<double>& vec_double,
@@ -88,7 +144,13 @@ public:
            const std::vector<int8_t>& vec_int8_t,
            const std::vector<int16_t>& vec_int16_t,
            const std::vector<int32_t>& vec_int32_t,
-           const std::vector<int64_t>& vec_int64_t)
+           const std::vector<int64_t>& vec_int64_t,
+           const std::vector<size_t>& vec_size_t,
+           const Position& position,
+           const shared_ptr<Node>& node,
+           const NodeVector& node_vector,
+           const ParameterVector& parameter_vector,
+           const ResultVector& result_vector)
         : Op({program, data})
         , m_turing_model(turing_model)
         , m_element_type(element_type)
@@ -105,6 +167,7 @@ public:
         , m_val_int16_t(val_int16_t)
         , m_val_int32_t(val_int32_t)
         , m_val_int64_t(val_int64_t)
+        , m_val_size_t(val_size_t)
         , m_vec_string(vec_string)
         , m_vec_float(vec_float)
         , m_vec_double(vec_double)
@@ -116,6 +179,12 @@ public:
         , m_vec_int16_t(vec_int16_t)
         , m_vec_int32_t(vec_int32_t)
         , m_vec_int64_t(vec_int64_t)
+        , m_vec_size_t(vec_size_t)
+        , m_position(position)
+        , m_node(node)
+        , m_node_vector(node_vector)
+        , m_parameter_vector(parameter_vector)
+        , m_result_vector(result_vector)
     {
     }
 
@@ -138,6 +207,7 @@ public:
     int64_t get_val_int16_t() const { return m_val_int16_t; }
     int64_t get_val_int32_t() const { return m_val_int32_t; }
     int64_t get_val_int64_t() const { return m_val_int64_t; }
+    size_t get_val_size_t() const { return m_val_size_t; }
     const vector<uint8_t>& get_vec_uint8_t() const { return m_vec_uint8_t; }
     const vector<uint16_t>& get_vec_uint16_t() const { return m_vec_uint16_t; }
     const vector<uint32_t>& get_vec_uint32_t() const { return m_vec_uint32_t; }
@@ -149,6 +219,12 @@ public:
     const vector<string>& get_vec_string() const { return m_vec_string; }
     const vector<float>& get_vec_float() const { return m_vec_float; }
     const vector<double>& get_vec_double() const { return m_vec_double; }
+    const vector<size_t>& get_vec_size_t() const { return m_vec_size_t; }
+    const Position& get_position() const { return m_position; }
+    const shared_ptr<Node>& get_node() const { return m_node; }
+    const NodeVector& get_node_vector() const { return m_node_vector; }
+    const ParameterVector& get_parameter_vector() const { return m_parameter_vector; }
+    const ResultVector& get_result_vector() const { return m_result_vector; }
     shared_ptr<Node> clone_with_new_inputs(const OutputVector& args) const override
     {
         return make_shared<Oracle>(args[0],
@@ -168,6 +244,7 @@ public:
                                    m_val_int16_t,
                                    m_val_int32_t,
                                    m_val_int64_t,
+                                   m_val_size_t,
                                    m_vec_string,
                                    m_vec_float,
                                    m_vec_double,
@@ -178,7 +255,13 @@ public:
                                    m_vec_int8_t,
                                    m_vec_int16_t,
                                    m_vec_int32_t,
-                                   m_vec_int64_t);
+                                   m_vec_int64_t,
+                                   m_vec_size_t,
+                                   m_position,
+                                   m_node,
+                                   m_node_vector,
+                                   m_parameter_vector,
+                                   m_result_vector);
     }
 
     void validate_and_infer_types() override { set_output_type(0, element::i64, {}); }
@@ -199,6 +282,7 @@ public:
         visitor.on_attribute("val_int16_t", m_val_int16_t);
         visitor.on_attribute("val_int32_t", m_val_int32_t);
         visitor.on_attribute("val_int64_t", m_val_int64_t);
+        visitor.on_attribute("val_size_t", m_val_size_t);
         visitor.on_attribute("vec_string", m_vec_string);
         visitor.on_attribute("vec_float", m_vec_float);
         visitor.on_attribute("vec_double", m_vec_double);
@@ -210,6 +294,12 @@ public:
         visitor.on_attribute("vec_int16_t", m_vec_int16_t);
         visitor.on_attribute("vec_int32_t", m_vec_int32_t);
         visitor.on_attribute("vec_int64_t", m_vec_int64_t);
+        visitor.on_attribute("vec_size_t", m_vec_size_t);
+        visitor.on_attribute("position", m_position);
+        visitor.on_attribute("node", m_node);
+        visitor.on_attribute("node_vector", m_node_vector);
+        visitor.on_attribute("parameter_vector", m_parameter_vector);
+        visitor.on_attribute("result_vector", m_result_vector);
         return true;
     }
 
@@ -229,6 +319,7 @@ protected:
     int16_t m_val_int16_t;
     int32_t m_val_int32_t;
     int64_t m_val_int64_t;
+    size_t m_val_size_t{23};
     vector<string> m_vec_string;
     vector<float> m_vec_float;
     vector<double> m_vec_double;
@@ -240,290 +331,22 @@ protected:
     vector<int16_t> m_vec_int16_t;
     vector<int32_t> m_vec_int32_t;
     vector<int64_t> m_vec_int64_t;
+    vector<size_t> m_vec_size_t;
+    Position m_position;
+    shared_ptr<Node> m_node;
+    NodeVector m_node_vector;
+    ParameterVector m_parameter_vector;
+    ResultVector m_result_vector;
 };
 
 constexpr NodeTypeInfo Oracle::type_info;
 
-class NodeSaver : public AttributeVisitor
-{
-public:
-    NodeSaver(shared_ptr<Node> node)
-        : m_node_type_info(node->get_type_info())
-    {
-        node->visit_attributes(*this);
-    }
-    const NodeTypeInfo& get_node_type_info() { return m_node_type_info; }
-    string& get_string(const string& name) { return m_strings.at(name); }
-    bool get_bool(const string& name) { return m_bools.at(name); }
-    float get_float(const string& name) { return m_doubles.at(name); }
-    double get_double(const string& name) { return m_doubles.at(name); }
-    int64_t get_signed(const string& name) { return m_signeds.at(name); }
-    uint64_t get_unsigned(const string& name) { return m_unsigneds.at(name); }
-    vector<float>& get_float_vector(const string& name) { return m_float_vectors.at(name); }
-    vector<double>& get_double_vector(const string& name) { return m_double_vectors.at(name); }
-    vector<int8_t>& get_int8_t_vector(const string& name) { return m_int8_t_vectors.at(name); }
-    vector<int16_t>& get_int16_t_vector(const string& name) { return m_int16_t_vectors.at(name); }
-    vector<int32_t>& get_int32_t_vector(const string& name) { return m_int32_t_vectors.at(name); }
-    vector<int64_t>& get_int64_t_vector(const string& name) { return m_int64_t_vectors.at(name); }
-    vector<uint8_t>& get_uint8_t_vector(const string& name) { return m_uint8_t_vectors.at(name); }
-    vector<uint16_t>& get_uint16_t_vector(const string& name)
-    {
-        return m_uint16_t_vectors.at(name);
-    }
-    vector<uint32_t>& get_uint32_t_vector(const string& name)
-    {
-        return m_uint32_t_vectors.at(name);
-    }
-    vector<uint64_t>& get_uint64_t_vector(const string& name)
-    {
-        return m_uint64_t_vectors.at(name);
-    }
-
-    vector<string>& get_string_vector(const string& name) { return m_string_vectors.at(name); }
-    HostTensorPtr get_host_tensor(const string& name) { return m_host_tensors.at(name); }
-    void set_string(const string& name, const string& value) { m_strings[name] = value; }
-    void set_bool(const string& name, bool value) { m_bools[name] = value; }
-    void set_double(const string& name, double value) { m_doubles[name] = value; }
-    void set_signed(const string& name, int64_t value) { m_signeds[name] = value; }
-    void set_float_vector(const string& name, const vector<float>& value)
-    {
-        m_float_vectors[name] = value;
-    }
-    void set_double_vector(const string& name, const vector<double>& value)
-    {
-        m_double_vectors[name] = value;
-    }
-    void set_int8_t_vector(const string& name, const vector<int8_t>& value)
-    {
-        m_int8_t_vectors[name] = value;
-    }
-    void set_int16_t_vector(const string& name, const vector<int16_t>& value)
-    {
-        m_int16_t_vectors[name] = value;
-    }
-    void set_int32_t_vector(const string& name, const vector<int32_t>& value)
-    {
-        m_int32_t_vectors[name] = value;
-    }
-    void set_int64_t_vector(const string& name, const vector<int64_t>& value)
-    {
-        m_int64_t_vectors[name] = value;
-    }
-    void set_uint8_t_vector(const string& name, const vector<uint8_t>& value)
-    {
-        m_uint8_t_vectors[name] = value;
-    }
-    void set_uint16_t_vector(const string& name, const vector<uint16_t>& value)
-    {
-        m_uint16_t_vectors[name] = value;
-    }
-    void set_uint32_t_vector(const string& name, const vector<uint32_t>& value)
-    {
-        m_uint32_t_vectors[name] = value;
-    }
-    void set_uint64_t_vector(const string& name, const vector<uint64_t>& value)
-    {
-        m_uint64_t_vectors[name] = value;
-    }
-    void set_string_vector(const string& name, const vector<string>& value)
-    {
-        m_string_vectors[name] = value;
-    }
-    void set_host_tensor(const string& name, const HostTensorPtr& value)
-    {
-        m_host_tensors[name] = value;
-    }
-
-    void on_attribute(const string& name, string& value) override { set_string(name, value); };
-    void on_attribute(const string& name, bool& value) override { set_bool(name, value); }
-    void on_adapter(const string& name, ValueAccessor<void>& adapter) override
-    {
-        NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be marshalled");
-    }
-    // The remaining adapter methods fall back on the void adapter if not implemented
-    void on_adapter(const string& name, ValueAccessor<string>& adapter) override
-    {
-        set_string(name, adapter.get());
-    };
-    void on_adapter(const string& name, ValueAccessor<int64_t>& adapter) override
-    {
-        set_signed(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<double>& adapter) override
-    {
-        set_double(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<string>>& adapter) override
-    {
-        set_string_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<float>>& adapter) override
-    {
-        set_float_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<double>>& adapter) override
-    {
-        set_double_vector(name, adapter.get());
-    }
-
-    void on_adapter(const string& name, ValueAccessor<vector<int8_t>>& adapter) override
-    {
-        set_int8_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<int16_t>>& adapter) override
-    {
-        set_int16_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<int32_t>>& adapter) override
-    {
-        set_int32_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<int64_t>>& adapter) override
-    {
-        set_int64_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint8_t>>& adapter) override
-    {
-        set_uint8_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint16_t>>& adapter) override
-    {
-        set_uint16_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint32_t>>& adapter) override
-    {
-        set_uint32_t_vector(name, adapter.get());
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint64_t>>& adapter) override
-    {
-        set_uint64_t_vector(name, adapter.get());
-    }
-    void on_attribute(const std::string& name, void* constant_data, size_t size) override
-    {
-        HostTensorPtr data = make_shared<HostTensor>(element::u8, Shape{size});
-        data->write(constant_data, size);
-        set_host_tensor(name, data);
-    }
-
-protected:
-    NodeTypeInfo m_node_type_info;
-    map<string, string> m_strings;
-    map<string, bool> m_bools;
-    map<string, double> m_doubles;
-    map<string, int64_t> m_signeds;
-    map<string, uint64_t> m_unsigneds;
-    map<string, vector<int8_t>> m_int8_t_vectors;
-    map<string, vector<int16_t>> m_int16_t_vectors;
-    map<string, vector<int32_t>> m_int32_t_vectors;
-    map<string, vector<int64_t>> m_int64_t_vectors;
-    map<string, vector<uint8_t>> m_uint8_t_vectors;
-    map<string, vector<uint16_t>> m_uint16_t_vectors;
-    map<string, vector<uint32_t>> m_uint32_t_vectors;
-    map<string, vector<uint64_t>> m_uint64_t_vectors;
-    map<string, vector<float>> m_float_vectors;
-    map<string, vector<double>> m_double_vectors;
-    map<string, vector<std::string>> m_string_vectors;
-    map<string, HostTensorPtr> m_host_tensors;
-};
-
-class NodeBuilder : public AttributeVisitor
-{
-public:
-    NodeBuilder(const shared_ptr<Node>& node)
-        : m_values(node)
-    {
-    }
-
-    // Does not validate, since inputs aren't set
-    shared_ptr<Node> create()
-    {
-        shared_ptr<Node> node(FactoryRegistry<Node>::get().create(m_values.get_node_type_info()));
-        node->visit_attributes(*this);
-        return node;
-    }
-
-    void on_attribute(const string& name, string& value) override
-    {
-        value = m_values.get_string(name);
-    };
-    void on_attribute(const string& name, bool& value) override { value = m_values.get_bool(name); }
-    void on_adapter(const string& name, ValueAccessor<void>& adapter) override
-    {
-        NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be unmarshalled");
-    }
-    // The remaining adapter methods fall back on the void adapter if not implemented
-    void on_adapter(const string& name, ValueAccessor<string>& adapter) override
-    {
-        adapter.set(m_values.get_string(name));
-    };
-    void on_adapter(const string& name, ValueAccessor<int64_t>& adapter) override
-    {
-        adapter.set(m_values.get_signed(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<double>& adapter) override
-    {
-        adapter.set(m_values.get_double(name));
-    }
-
-    void on_adapter(const string& name, ValueAccessor<vector<int8_t>>& adapter) override
-    {
-        adapter.set(m_values.get_int8_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<int16_t>>& adapter) override
-    {
-        adapter.set(m_values.get_int16_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<int32_t>>& adapter) override
-    {
-        adapter.set(m_values.get_int32_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<int64_t>>& adapter) override
-    {
-        adapter.set(m_values.get_int64_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint8_t>>& adapter) override
-    {
-        adapter.set(m_values.get_uint8_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint16_t>>& adapter) override
-    {
-        adapter.set(m_values.get_uint16_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint32_t>>& adapter) override
-    {
-        adapter.set(m_values.get_uint32_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<uint64_t>>& adapter) override
-    {
-        adapter.set(m_values.get_uint64_t_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<string>>& adapter) override
-    {
-        adapter.set(m_values.get_string_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<float>>& adapter) override
-    {
-        adapter.set(m_values.get_float_vector(name));
-    }
-    void on_adapter(const string& name, ValueAccessor<vector<double>>& adapter) override
-    {
-        adapter.set(m_values.get_double_vector(name));
-    }
-    void on_attribute(const std::string& name, void* constant_data, size_t size) override
-    {
-        HostTensorPtr data = m_values.get_host_tensor(name);
-        data->read(constant_data, size);
-    }
-
-protected:
-    NodeSaver m_values;
-};
-
 TEST(attributes, user_op)
 {
     FactoryRegistry<Node>::get().register_factory<Oracle>();
     auto program = make_shared<op::Parameter>(element::i32, Shape{200});
     auto data = make_shared<op::Parameter>(element::i32, Shape{200});
+    auto result = make_shared<op::Result>(data);
     auto oracle = make_shared<Oracle>(program,
                                       data,
                                       TuringModel::XL1200,
@@ -541,6 +364,7 @@ TEST(attributes, user_op)
                                       -2,
                                       -4,
                                       -8,
+                                      34,
                                       vector<string>{"Hello", "World"},
                                       vector<float>{1.0f, 2.0f},
                                       vector<double>{1.0, 2.0},
@@ -551,8 +375,25 @@ TEST(attributes, user_op)
                                       vector<int8_t>{1, 2, 4, 8},
                                       vector<int16_t>{1, 2, 4, 8},
                                       vector<int32_t>{1, 2, 4, 8},
-                                      vector<int64_t>{1, 2, 4, 8});
-    NodeBuilder builder(oracle);
+                                      vector<int64_t>{1, 2, 4, 8},
+                                      vector<size_t>{1, 3, 8, 4, 2},
+                                      Position{1.3f, 5.1f, 2.3f},
+                                      data,
+                                      NodeVector{program, result, data},
+                                      ParameterVector{data, data, program},
+                                      ResultVector{result});
+    NodeBuilder builder;
+    AttributeVisitor& saver = builder.get_node_saver();
+    AttributeVisitor& loader = builder.get_node_loader();
+    loader.register_node(program, "program");
+    ASSERT_EQ(loader.get_registered_node("program"), program);
+    ASSERT_EQ(loader.get_registered_node_id(program), "program");
+    loader.register_node(data, "data");
+    loader.register_node(result, "result");
+    saver.register_node(program, "program");
+    saver.register_node(data, "data");
+    saver.register_node(result, "result");
+    builder.save_node(oracle);
     auto g_oracle = as_type_ptr<Oracle>(builder.create());
 
     EXPECT_EQ(g_oracle->get_turing_model(), oracle->get_turing_model());
@@ -570,6 +411,7 @@ TEST(attributes, user_op)
     EXPECT_EQ(g_oracle->get_val_int16_t(), oracle->get_val_int16_t());
     EXPECT_EQ(g_oracle->get_val_int32_t(), oracle->get_val_int32_t());
     EXPECT_EQ(g_oracle->get_val_int64_t(), oracle->get_val_int64_t());
+    EXPECT_EQ(g_oracle->get_val_size_t(), oracle->get_val_size_t());
     EXPECT_EQ(g_oracle->get_vec_uint8_t(), oracle->get_vec_uint8_t());
     EXPECT_EQ(g_oracle->get_vec_uint16_t(), oracle->get_vec_uint16_t());
     EXPECT_EQ(g_oracle->get_vec_uint32_t(), oracle->get_vec_uint32_t());
@@ -581,6 +423,12 @@ TEST(attributes, user_op)
     EXPECT_EQ(g_oracle->get_vec_string(), oracle->get_vec_string());
     EXPECT_EQ(g_oracle->get_vec_float(), oracle->get_vec_float());
     EXPECT_EQ(g_oracle->get_vec_double(), oracle->get_vec_double());
+    EXPECT_EQ(g_oracle->get_vec_size_t(), oracle->get_vec_size_t());
+    EXPECT_EQ(g_oracle->get_position(), oracle->get_position());
+    EXPECT_EQ(g_oracle->get_node(), oracle->get_node());
+    EXPECT_EQ(g_oracle->get_node_vector(), oracle->get_node_vector());
+    EXPECT_EQ(g_oracle->get_parameter_vector(), oracle->get_parameter_vector());
+    EXPECT_EQ(g_oracle->get_result_vector(), oracle->get_result_vector());
 }
 
 TEST(attributes, matmul_op)
@@ -600,6 +448,43 @@ TEST(attributes, matmul_op)
     EXPECT_EQ(g_matmul->get_transpose_b(), matmul->get_transpose_b());
 }
 
+TEST(attributes, partial_shape)
+{
+    NodeBuilder builder;
+    AttributeVisitor& loader = builder.get_node_loader();
+    AttributeVisitor& saver = builder.get_node_saver();
+
+    PartialShape dyn = PartialShape::dynamic();
+    saver.on_attribute("dyn", dyn);
+    PartialShape g_dyn;
+    loader.on_attribute("dyn", g_dyn);
+    EXPECT_EQ(dyn, g_dyn);
+
+    PartialShape scalar{};
+    saver.on_attribute("scalar", scalar);
+    PartialShape g_scalar;
+    loader.on_attribute("scalar", g_scalar);
+    EXPECT_EQ(scalar, g_scalar);
+
+    PartialShape dyn_vector{Dimension::dynamic()};
+    saver.on_attribute("dyn_vector", dyn_vector);
+    PartialShape g_dyn_vector;
+    loader.on_attribute("dyn_vector", g_dyn_vector);
+    EXPECT_EQ(dyn_vector, g_dyn_vector);
+
+    PartialShape stat_vector{7};
+    saver.on_attribute("stat_vector", stat_vector);
+    PartialShape g_stat_vector;
+    loader.on_attribute("stat_vector", g_stat_vector);
+    EXPECT_EQ(stat_vector, g_stat_vector);
+
+    PartialShape general{7, Dimension::dynamic(), 2, Dimension::dynamic(), 4};
+    saver.on_attribute("general", general);
+    PartialShape g_general;
+    loader.on_attribute("general", g_general);
+    EXPECT_EQ(general, g_general);
+}
+
 TEST(attributes, max_pool_op)
 {
     FactoryRegistry<Node>::get().register_factory<opset1::MaxPool>();
@@ -1275,7 +1160,7 @@ TEST(attributes, shuffle_channels_op)
     auto g_shuffle_channels = as_type_ptr<opset1::ShuffleChannels>(builder.create());
 
     EXPECT_EQ(g_shuffle_channels->get_axis(), shuffle_channels->get_axis());
-    EXPECT_EQ(g_shuffle_channels->get_groups(), shuffle_channels->get_groups());
+    EXPECT_EQ(g_shuffle_channels->get_group(), shuffle_channels->get_group());
 }
 
 TEST(attributes, softmax_op)
diff --git a/ngraph/test/constant.cpp b/ngraph/test/constant.cpp
index d46765c94b5..b11934ff342 100644
--- a/ngraph/test/constant.cpp
+++ b/ngraph/test/constant.cpp
@@ -1017,8 +1017,8 @@ TEST(constant, shared_data)
     Shape shape{100, 200};
     auto c1 = make_shared<op::Constant>(element::f16, shape, vector<float16>{123});
     auto c2 = static_pointer_cast<op::Constant>(c1->clone_with_new_inputs({}));
-    const float* p1 = c1->get_data_ptr<float>();
-    const float* p2 = c2->get_data_ptr<float>();
+    const int16_t* p1 = c1->get_data_ptr<int16_t>();
+    const int16_t* p2 = c2->get_data_ptr<int16_t>();
     EXPECT_EQ(p1, p2);
 }
 
diff --git a/ngraph/test/models/onnx/flatten.prototxt b/ngraph/test/models/onnx/flatten.prototxt
index b70b6caa119..53e7835dba9 100644
--- a/ngraph/test/models/onnx/flatten.prototxt
+++ b/ngraph/test/models/onnx/flatten.prototxt
@@ -21,20 +21,6 @@ graph {
     type {
       tensor_type {
         elem_type: 1
-        shape {
-          dim {
-            dim_value: 1
-          }
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
       }
     }
   }
@@ -43,17 +29,6 @@ graph {
     type {
       tensor_type {
         elem_type: 1
-        shape {
-          dim {
-            dim_value: 1
-          }
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 4
-          }
-        }
       }
     }
   }
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index b3f2bc00d23..84258db8c13 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -792,30 +792,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1)
     test_case.run(4);
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_2)
-{
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt"));
-
-    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
-    test_case.add_input<float>(SOFTMAX_INPUT);
-
-    test_case.add_expected_output<float>(
-        {0.80619486, 0.03075257, 0.1161086,  0.027393,   0.01955098, 0.07012682, 0.22670066,
-         0.18689779, 0.4614171,  0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265,
-         0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552,
-
-         0.30399806, 0.1076406,  0.03371745, 0.0950595,  0.4595844,  0.13369873, 0.04866969,
-         0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588,
-         0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432,
-
-         0.0402528,  0.31156222, 0.23747503, 0.1543129,  0.25639705, 0.10627912, 0.00436928,
-         0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175,  0.17468555, 0.34994439,
-         0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905});
-
-    test_case.run(4);
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D)
 {
     ASSERT_THROW(onnx_import::import_onnx_model(
@@ -1807,42 +1783,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_erf_int32)
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_hardmax)
-{
-    auto hardmax_fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt"));
-
-    auto test_case = ngraph::test::NgraphTestCase(hardmax_fn, "${BACKEND_NAME}");
-    test_case.add_input<float>(
-        {-2.02458119f, 0.00126542f,  -0.58045743f, -0.75186814f, 0.9406899f,
-         -0.513188f,   0.85887463f,  1.61444086f,  0.23801147f,  -0.26816885f,
-         0.6597208f,   1.43889519f,  0.28798895f,  1.44769952f,  -1.99466756f,
-         0.41386644f,  0.69389555f,  1.46118255f,  -1.67628606f, 1.49697552f,
-
-         0.06337166f,  -1.15740783f, 0.8792142f,   -0.95352717f, -1.87895792f,
-         -0.74066102f, -0.27131459f, 0.2219685f,   0.31831001f,  0.52495901f,
-         0.60283089f,  0.60397976f,  0.92401468f,  0.29565101f,  -1.14443776f,
-         -1.07399045f, -0.92266259f, 0.24017731f,  -0.30105675f, 1.18513269f,
-
-         0.55494542f,  1.12119279f,  -0.43156474f, 0.15101668f,  -1.460439f,
-         0.96375129f,  1.10411785f,  -0.30272771f, -0.48855848f, 0.12103213f,
-         -0.71388492f, 1.38398178f,  0.21924434f,  0.93105052f,  -0.21074303f,
-         0.48213503f,  -1.37810638f, 8.99060285f,  0.54794592f,  -0.46820172f});
-
-    // values for hardmax with axis==2
-    test_case.add_expected_output<float>(
-        Shape{3, 4, 5}, {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,
-                         0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
-
-                         0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
-                         0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
-
-                         0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
-                         0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f});
-
-    test_case.run();
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_float)
 {
     const auto shrink_fn = onnx_import::import_onnx_model(
diff --git a/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp b/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp
index c584ae71891..bb2a3bb339e 100644
--- a/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp
+++ b/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp
@@ -816,6 +816,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis)
     }
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_flatten)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt"));
+
+    std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
+    auto test_case =
+        ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}", BackendMode::DYNAMIC);
+    test_case.add_input<float>(Shape{1, 2, 2, 2}, data);
+    test_case.add_expected_output<float>(Shape{1, 8}, data);
+
+    test_case.run();
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_global_lp_dynamic_hw)
 {
     auto function = onnx_import::import_onnx_model(
@@ -1058,3 +1072,78 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes)
     test_case.add_input<int64_t>({2, 2, 2});
     test_case.add_expected_output<float>(Shape{1, 1, 1}, {9});
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_hardmax)
+{
+    auto hardmax_fn = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt"));
+
+    auto test_case =
+        ngraph::test::NgraphTestCase(hardmax_fn, "${BACKEND_NAME}", BackendMode::DYNAMIC);
+    test_case.add_input<float>(
+        {-2.02458119f, 0.00126542f,  -0.58045743f, -0.75186814f, 0.9406899f,
+         -0.513188f,   0.85887463f,  1.61444086f,  0.23801147f,  -0.26816885f,
+         0.6597208f,   1.43889519f,  0.28798895f,  1.44769952f,  -1.99466756f,
+         0.41386644f,  0.69389555f,  1.46118255f,  -1.67628606f, 1.49697552f,
+
+         0.06337166f,  -1.15740783f, 0.8792142f,   -0.95352717f, -1.87895792f,
+         -0.74066102f, -0.27131459f, 0.2219685f,   0.31831001f,  0.52495901f,
+         0.60283089f,  0.60397976f,  0.92401468f,  0.29565101f,  -1.14443776f,
+         -1.07399045f, -0.92266259f, 0.24017731f,  -0.30105675f, 1.18513269f,
+
+         0.55494542f,  1.12119279f,  -0.43156474f, 0.15101668f,  -1.460439f,
+         0.96375129f,  1.10411785f,  -0.30272771f, -0.48855848f, 0.12103213f,
+         -0.71388492f, 1.38398178f,  0.21924434f,  0.93105052f,  -0.21074303f,
+         0.48213503f,  -1.37810638f, 8.99060285f,  0.54794592f,  -0.46820172f});
+
+    // values for hardmax with axis==2
+    test_case.add_expected_output<float>(
+        Shape{3, 4, 5}, {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,
+                         0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
+
+                         0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
+                         0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
+
+                         0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
+                         0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f});
+
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt"));
+
+    const std::vector<float> input = {
+        2.75793882,  -0.50841322, 0.82013929,  -0.62409912, -0.96136118, 0.21004745,  1.38337255,
+        1.19030397,  2.0940445,   -0.03551657, -0.78686039, 1.992782,    0.04300319,  -0.29230777,
+        -0.56797112, -1.26732165, -0.61935399, 0.57670432,  0.92844898,  2.82469233,
+
+        0.98721677,  -0.05100663, -1.21178917, -0.17530157, 1.40051805,  -0.13259761, -1.14313018,
+        0.2673723,   -0.87996154, 1.29053106,  1.55,        0.8396538,   1.20729817,  0.23727845,
+        -0.89113606, -1.70909842, 0.26460363,  -0.70566808, 2.383518,    1.07024615,
+
+        -1.21722605, 0.82919357,  0.55765697,  0.12657686,  0.63432172,  0.75425957,  -2.43721014,
+        -1.24478184, 2.65316853,  1.19509542,  -0.95523998, 0.5149006,   -0.01151649, 0.68327026,
+        -0.4589638,  -0.46554745, 0.21055324,  0.39266729,  2.05098086,  1.83207919};
+
+    auto test_case =
+        ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}", BackendMode::DYNAMIC);
+    test_case.add_input<float>(input);
+
+    test_case.add_expected_output<float>(
+        {0.80619486, 0.03075257, 0.1161086,  0.027393,   0.01955098, 0.07012682, 0.22670066,
+         0.18689779, 0.4614171,  0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265,
+         0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552,
+
+         0.30399806, 0.1076406,  0.03371745, 0.0950595,  0.4595844,  0.13369873, 0.04866969,
+         0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588,
+         0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432,
+
+         0.0402528,  0.31156222, 0.23747503, 0.1543129,  0.25639705, 0.10627912, 0.00436928,
+         0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175,  0.17468555, 0.34994439,
+         0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905});
+
+    test_case.run(4);
+}
diff --git a/ngraph/test/onnx/onnx_import_reshape.in.cpp b/ngraph/test/onnx/onnx_import_reshape.in.cpp
index f641e15f179..d0aba31fe4f 100644
--- a/ngraph/test/onnx/onnx_import_reshape.in.cpp
+++ b/ngraph/test/onnx/onnx_import_reshape.in.cpp
@@ -424,20 +424,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_concat_negative_axis)
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_flatten)
-{
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt"));
-
-    auto input = test::NDArray<float, 4>({{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}).get_vector();
-    auto expected_output = test::NDArray<float, 3>({{{1, 2, 3, 4}, {5, 6, 7, 8}}}).get_vector();
-
-    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
-    test_case.add_input(input);
-    test_case.add_expected_output(expected_output);
-    test_case.run();
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_equal_parts_default)
 {
     auto function = onnx_import::import_onnx_model(
diff --git a/ngraph/test/runtime/backend_manager.hpp b/ngraph/test/runtime/backend_manager.hpp
index 7a41ef08515..a958c28f9e3 100644
--- a/ngraph/test/runtime/backend_manager.hpp
+++ b/ngraph/test/runtime/backend_manager.hpp
@@ -55,7 +55,7 @@ public:
     /// \param backend_constructor A BackendConstructor which will be called to
     ////     construct an instance of the registered backend.
     static BACKEND_API void register_backend(const std::string& name,
-                                            BackendConstructor backend_constructor);
+                                             BackendConstructor backend_constructor);
 
     /// \brief Query the list of registered devices
     /// \returns A vector of all registered devices.
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index 8227b742542..a18f5b22449 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -131,6 +131,10 @@ expand_5_dyn_shape
 expand_6_dyn_shape
 expand_uint16_dyn_shape
 minimum_int64
+floor_int64
+ceiling_int64
+matmul_2x2x3_2x3x1_int64
+matmul_2x2x3_2x1x3_transpose_int64
 
 # TopK Incorrect input data/index values precision
 onnx_model_argmax_int32
@@ -296,7 +300,7 @@ dyn_slice_106
 dyn_slice_109
 dyn_slice_114
 reduce_sum_keep_dynamic
-reduce_sum_keep_stable_simple_double 
+reduce_sum_keep_stable_simple_double
 reduce_sum_keep_stable_acc_double
 reduce_sum_keep_stable_acc
 reduce_sum_keep_3d_eliminate_zero_dim
@@ -386,6 +390,9 @@ dyn_slice_180
 tile_3d_small_data_rank
 tile_3d_few_repeats
 
+# Error of validate layer: MatMul_683292 with type: Gemm. Gemm input shapes must have at least 2 dimensions
+matmul_2_2
+
 # Result mismatch
 sum_large_1d_to_scalar
 sum_stable_acc
@@ -420,6 +427,77 @@ max_to_scalar_int8
 gather_4d_indices_no_axis_uint8
 tensor_constant_with_op
 constant_equality_bool
+reduce_product_matrix_rows
+reduce_product_3d_to_matrix_most_sig
+reduce_product_3d_to_matrix_least_sig
+reduce_product_keep_matrix_columns
+reduce_product_keep_matrix_rows
+reduce_product_keep_3d_to_matrix_most_sig
+reduce_product_keep_3d_to_matrix_least_sig
+reduce_product_matrix_columns_dynamic
+reduce_product_matrix_rows_dynamic
+reduce_product_keep_matrix_columns_dynamic
+reduce_product_keep_matrix_rows_dynamic
+reduce_min_matrix_columns
+reduce_min_matrix_rows
+reduce_min_matrix_rows_int32
+reduce_min_3d_to_matrix_most_sig
+reduce_min_3d_to_matrix_least_sig
+reduce_min_keep_matrix_columns
+reduce_min_keep_matrix_rows
+reduce_min_keep_matrix_rows_int32
+reduce_min_keep_3d_to_matrix_most_sig
+reduce_min_keep_3d_to_matrix_least_sig
+reduce_min_matrix_columns_dynamic
+reduce_min_matrix_rows_dynamic
+reduce_min_keep_matrix_columns_dynamic
+reduce_min_keep_matrix_rows_dynamic
+
+# zero dimension / result mismatch
+reduce_product_matrix_rows_zero
+reduce_product_matrix_cols_zero
+reduce_product_vector_zero
+reduce_product_matrix_to_scalar_zero_by_zero
+reduce_product_3d_eliminate_zero_dim
+reduce_product_to_scalar_int8
+reduce_product_keep_matrix_rows_zero
+reduce_product_keep_matrix_cols_zero
+reduce_product_keep_vector_zero
+reduce_product_keep_matrix_to_scalar_zero_by_zero
+reduce_product_keep_3d_eliminate_zero_dim
+reduce_product_keep_to_scalar_int8
+reduce_min_to_scalar_int8
+reduce_min_matrix_rows_zero
+reduce_min_matrix_cols_zero
+reduce_min_vector_zero
+reduce_min_matrix_to_scalar_zero_by_zero
+reduce_min_3d_eliminate_zero_dim
+reduce_min_keep_to_scalar_int8
+reduce_min_keep_matrix_rows_zero
+reduce_min_keep_matrix_cols_zero
+reduce_min_keep_vector_zero
+reduce_min_keep_matrix_to_scalar_zero_by_zero
+reduce_min_keep_3d_eliminate_zero_dim
+reduce_mean_to_scalar_int8
+reduce_mean_matrix_rows_int32
+reduce_mean_keep_to_scalar_int8
+reduce_mean_keep_matrix_rows_int32
+reduce_max_to_scalar_int8
+reduce_max_matrix_rows_zero
+reduce_max_matrix_rows_zero_int32
+reduce_max_matrix_cols_zero
+reduce_max_vector_zero
+reduce_max_matrix_to_scalar_zero_by_zero
+reduce_max_3d_to_scalar_double
+reduce_max_3d_eliminate_zero_dim
+reduce_max_keep_to_scalar_int8
+reduce_max_keep_matrix_rows_zero
+reduce_max_keep_matrix_rows_zero_int32
+reduce_max_keep_matrix_cols_zero
+reduce_max_keep_vector_zero
+reduce_max_keep_matrix_to_scalar_zero_by_zero
+reduce_max_keep_3d_to_scalar_double
+reduce_max_keep_3d_eliminate_zero_dim
 
 # Incorrect precision f64!
 sum_trivial_in_double
@@ -1194,7 +1272,7 @@ shape_of_matrix_v3
 shape_of_5d_v0
 shape_of_5d_v3
 
-# Need use evaluate, only applicable to INTERPRETER 
+# Need use evaluate, only applicable to INTERPRETER
 non_zero
 non_zero_all_1s
 non_zero_all_0s
@@ -1239,6 +1317,33 @@ IE_CPU.backwards_log
 # Unsupported op detected
 IE_CPU.backwards_batchmatmultranspose_tensor2_tensor2
 IE_CPU.fuse_batch_mat_mul_transpose_forward
+IE_CPU.round_int64
+
+# Can't convert type f16 to IE Precision!
+IE_CPU.fused_clamp_float16
+
+# [NOT_IMPLEMENTED] Input image format BF16 is not supported yet...
+IE_CPU.fused_clamp_bfloat16
+
+# Operations were removed from opset
+IE_CPU.atanh
+IE_CPU.asinh
+IE_CPU.acosh
+
+# Dynamic backend wrapper stops being used for IE
+IE_CPU.onnx_dyn_shapes_model_acosh_1_3
+IE_CPU.onnx_dyn_shapes_model_acosh_3_2
+IE_CPU.onnx_dyn_shapes_model_asinh_1_3
+IE_CPU.onnx_dyn_shapes_model_asinh_3_2
+IE_CPU.onnx_dyn_shapes_model_atanh_1_3
+IE_CPU.onnx_dyn_shapes_model_atanh_3_2
+IE_CPU.onnx_dyn_shapes_avg_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_max_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_global_avg_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_global_max_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_model_flatten
+IE_CPU.onnx_dyn_shapes_slice_10_default_axes
+IE_CPU.fused_clamp_float
 
 #-------------------------------------------------------------------------------
 #
diff --git a/ngraph/test/runtime/interpreter/CMakeLists.txt b/ngraph/test/runtime/interpreter/CMakeLists.txt
index 38459b16516..ee3c0c8b431 100644
--- a/ngraph/test/runtime/interpreter/CMakeLists.txt
+++ b/ngraph/test/runtime/interpreter/CMakeLists.txt
@@ -28,5 +28,6 @@ if (NGRAPH_INTERPRETER_ENABLE)
     install(TARGETS interpreter_backend
         LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}"
         ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
+        RUNTIME DESTINATION "${NGRAPH_INSTALL_LIB}"
     )
 endif()
diff --git a/ngraph/test/runtime/interpreter/unit_test.manifest b/ngraph/test/runtime/interpreter/unit_test.manifest
index b55cfcb1d25..90855f7b58c 100644
--- a/ngraph/test/runtime/interpreter/unit_test.manifest
+++ b/ngraph/test/runtime/interpreter/unit_test.manifest
@@ -9,7 +9,3 @@ INTERPRETER.onnx_top_k_opset_10
 
 reduce_sum_large_1d_to_scalar
 reduce_sum_keep_large_1d_to_scalar
-
-#ONNX Flatten with dynamic reshape
-onnx_dyn_shapes_flatten_axis
-onnx_dyn_shapes_flatten_neg_axis
diff --git a/ngraph/test/serialize.cpp b/ngraph/test/serialize.cpp
index 4e6d25ee3b5..d5e2dc8133c 100644
--- a/ngraph/test/serialize.cpp
+++ b/ngraph/test/serialize.cpp
@@ -34,6 +34,7 @@
 #include "nlohmann/json.hpp"
 #include "util/all_close_f.hpp"
 #include "util/test_tools.hpp"
+#include "util/visitor.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -498,6 +499,33 @@ TEST(serialize, tensor_iterator_raw)
     auto f = make_shared<Function>(results, ParameterVector{X, Hinit, WH, WX, bH, WY, bY});
     string s = serialize(f);
     shared_ptr<Function> g = deserialize(s);
+
+    ngraph::test::NodeBuilder builder;
+    // Uncomment to see serialization
+    // builder.set_print(true);
+    builder.save_node(tensor_iterator);
+    auto g_tensor_iterator = as_type_ptr<op::v0::TensorIterator>(builder.create());
+    ASSERT_TRUE(g_tensor_iterator);
+    auto& inputs = tensor_iterator->get_input_descriptions();
+    auto& g_inputs = g_tensor_iterator->get_input_descriptions();
+    ASSERT_EQ(inputs.size(), g_inputs.size());
+    for (size_t i = 0; i < tensor_iterator->get_input_descriptions().size(); ++i)
+    {
+        auto& val = inputs[i];
+        auto& g_val = g_inputs[i];
+        ASSERT_EQ(val->get_type_info(), g_val->get_type_info());
+        ASSERT_EQ(val->m_input_index, g_val->m_input_index);
+        ASSERT_EQ(val->m_body_parameter_index, g_val->m_body_parameter_index);
+    }
+    auto& outputs = tensor_iterator->get_output_descriptions();
+    auto& g_outputs = g_tensor_iterator->get_output_descriptions();
+    ASSERT_EQ(outputs.size(), g_outputs.size());
+    for (size_t i = 0; i < tensor_iterator->get_output_descriptions().size(); ++i)
+    {
+        auto& val = outputs[i];
+        auto& g_val = g_outputs[i];
+        ASSERT_EQ(val->get_type_info(), g_val->get_type_info());
+    }
 }
 
 TEST(serialize, tensor_iterator_lstm)
diff --git a/ngraph/test/type_prop/broadcast.cpp b/ngraph/test/type_prop/broadcast.cpp
index 48aa6899e16..b4102f87d8f 100644
--- a/ngraph/test/type_prop/broadcast.cpp
+++ b/ngraph/test/type_prop/broadcast.cpp
@@ -699,3 +699,34 @@ TEST(type_prop, broadcast_v3_incorrect_target_shape_2)
         FAIL() << "Deduced type check failed for unexpected reason";
     }
 }
+
+TEST(type_prop, broadcast_v3_output_rank_not_deduced)
+{
+    const auto arg = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    const auto shape = make_shared<op::Parameter>(element::i64, PartialShape::dynamic(1));
+    const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL;
+
+    const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, broadcast_spec);
+
+    ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+}
+
+TEST(type_prop, broadcast_v3_output_rank_deduced_from_arg)
+{
+    const auto arg = make_shared<op::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto shape = op::Constant::create(element::i64, {3}, {8, 6, 4});
+    const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL;
+
+    const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, broadcast_spec);
+    ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(4)));
+}
+
+TEST(type_prop, broadcast_v3_output_rank_deduced_from_new_shape_input)
+{
+    const auto arg = make_shared<op::Parameter>(element::f32, PartialShape::dynamic(4));
+    const auto shape = op::Constant::create(element::i64, {5}, {8, 6, 1, 5, 1});
+    const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL;
+
+    const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, broadcast_spec);
+    ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(5)));
+}
diff --git a/ngraph/test/util/CMakeLists.txt b/ngraph/test/util/CMakeLists.txt
index 74ad10a6e58..32d7321ed0c 100644
--- a/ngraph/test/util/CMakeLists.txt
+++ b/ngraph/test/util/CMakeLists.txt
@@ -25,6 +25,7 @@ set (SRC
     test_tools.cpp
     test_control.cpp
     test_case.cpp
+    visitor.hpp
     provenance_enabler.hpp
 )
 
diff --git a/ngraph/test/util/test_case.cpp b/ngraph/test/util/test_case.cpp
index 11d91a060e1..372fa353fa3 100644
--- a/ngraph/test/util/test_case.cpp
+++ b/ngraph/test/util/test_case.cpp
@@ -23,18 +23,23 @@ ngraph::test::NgraphTestCase::NgraphTestCase(const std::shared_ptr<Function>& fu
                                              const std::string& backend_name,
                                              const BackendMode mode)
     : m_function(function)
-    , m_backend(ngraph::runtime::Backend::create(backend_name, mode == BackendMode::DYNAMIC))
 {
     if (mode == BackendMode::STATIC)
     {
         NGRAPH_CHECK(!m_function->is_dynamic(),
                      "For dynamic function using dynamic backend is expected.");
     }
+
+    // IE backend test should not be run with dynamic backend wrapper
+    const bool use_dynamic =
+        mode == BackendMode::DYNAMIC && backend_name.find("IE") == std::string::npos;
+
+    m_backend = ngraph::runtime::Backend::create(backend_name, use_dynamic);
     m_executable = m_backend->compile(m_function);
     for (auto i = 0; i < m_function->get_output_size(); ++i)
     {
         const auto& output_tensor =
-            (mode == BackendMode::DYNAMIC)
+            (use_dynamic)
                 ? m_backend->create_dynamic_tensor(m_function->get_output_element_type(i),
                                                    m_function->get_output_partial_shape(i))
                 : m_backend->create_tensor(m_function->get_output_element_type(i),
diff --git a/ngraph/test/util/visitor.hpp b/ngraph/test/util/visitor.hpp
new file mode 100644
index 00000000000..f9a01cd07c6
--- /dev/null
+++ b/ngraph/test/util/visitor.hpp
@@ -0,0 +1,381 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/factory.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+
+namespace ngraph
+{
+    namespace test
+    {
+        class ValueHolder
+        {
+            template <typename T>
+            T& invalid()
+            {
+                NGRAPH_CHECK(false, "Invalid type access");
+            }
+
+        public:
+            virtual ~ValueHolder() {}
+            virtual operator bool&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator float&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator double&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::string&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator int8_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator int16_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator int32_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator int64_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator uint8_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator uint16_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator uint32_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator uint64_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<std::string>&()
+            {
+                NGRAPH_CHECK(false, "Invalid type access");
+            }
+            virtual operator std::vector<float>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<double>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<int8_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<int16_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<int32_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<int64_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<uint8_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            virtual operator std::vector<uint16_t>&()
+            {
+                NGRAPH_CHECK(false, "Invalid type access");
+            }
+            virtual operator std::vector<uint32_t>&()
+            {
+                NGRAPH_CHECK(false, "Invalid type access");
+            }
+            virtual operator std::vector<uint64_t>&()
+            {
+                NGRAPH_CHECK(false, "Invalid type access");
+            }
+            virtual operator HostTensorPtr&() { NGRAPH_CHECK(false, "Invalid type access"); }
+            uint64_t get_index() { return m_index; }
+        protected:
+            uint64_t m_index{0};
+        };
+
+        template <typename T>
+        class ValueHolderImp : public ValueHolder
+        {
+        public:
+            ValueHolderImp(const T& value, uint64_t index)
+                : m_value(value)
+            {
+                m_index = index;
+            }
+            operator T&() override { return m_value; }
+        protected:
+            T m_value;
+        };
+
+        class ValueMap
+        {
+            using map_type = std::unordered_map<std::string, std::shared_ptr<ValueHolder>>;
+
+        public:
+            /// \brief Set to print serialization information
+            void set_print(bool value) { m_print = value; }
+            template <typename T>
+            void insert(const std::string& name, const T& value)
+            {
+                std::pair<map_type::iterator, bool> result = m_values.insert(map_type::value_type(
+                    name, std::make_shared<ValueHolderImp<T>>(value, m_write_count++)));
+                NGRAPH_CHECK(result.second, name, " is already in use");
+            }
+            template <typename T>
+            void insert_scalar(const std::string& name, const T& value)
+            {
+                std::pair<map_type::iterator, bool> result = m_values.insert(map_type::value_type(
+                    name, std::make_shared<ValueHolderImp<T>>(value, m_write_count++)));
+                NGRAPH_CHECK(result.second, name, " is already in use");
+                if (m_print)
+                {
+                    std::cerr << "SER: " << name << " = " << value << std::endl;
+                }
+            }
+            template <typename T>
+            void insert_vector(const std::string& name, const T& value)
+            {
+                std::pair<map_type::iterator, bool> result = m_values.insert(map_type::value_type(
+                    name, std::make_shared<ValueHolderImp<T>>(value, m_write_count++)));
+                NGRAPH_CHECK(result.second, name, " is already in use");
+                if (m_print)
+                {
+                    std::cerr << "SER: " << name << " = [";
+                    std::string comma = "";
+                    for (auto val : value)
+                    {
+                        std::cerr << comma << val;
+                        comma = ", ";
+                    }
+                    std::cerr << "]" << std::endl;
+                }
+            }
+            template <typename T>
+            T& get(const std::string& name)
+            {
+                auto& value_holder = *m_values.at(name);
+                NGRAPH_CHECK(m_read_count++ == value_holder.get_index());
+                return static_cast<T&>(*m_values.at(name));
+            }
+
+        protected:
+            map_type m_values;
+            uint64_t m_write_count{0};
+            uint64_t m_read_count{0};
+            bool m_print{false};
+        };
+
+        class DeserializeAttributeVisitor : public AttributeVisitor
+        {
+        public:
+            DeserializeAttributeVisitor(ValueMap& value_map)
+                : m_values(value_map)
+            {
+            }
+            void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
+            {
+                NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be unmarshalled");
+            }
+            // The remaining adapter methods fall back on the void adapter if not implemented
+            void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
+            {
+                adapter.set(m_values.get<std::string>(name));
+            };
+            void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
+            {
+                adapter.set(m_values.get<bool>(name));
+            };
+            void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter) override
+            {
+                adapter.set(m_values.get<int64_t>(name));
+            }
+            void on_adapter(const std::string& name, ValueAccessor<double>& adapter) override
+            {
+                adapter.set(m_values.get<double>(name));
+            }
+
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int8_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<int8_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int16_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<int16_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int32_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<int32_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int64_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<int64_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint8_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<uint8_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint16_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<uint16_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint32_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<uint32_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint64_t>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<uint64_t>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<std::string>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<std::string>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<float>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<float>>(name));
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<double>>& adapter) override
+            {
+                adapter.set(m_values.get<std::vector<double>>(name));
+            }
+            void on_adapter(const std::string& name, ValueAccessor<void*>& adapter) override
+            {
+                HostTensorPtr& data = m_values.get<HostTensorPtr>(name);
+                data->read(adapter.get_ptr(), adapter.size());
+            }
+
+        protected:
+            ValueMap& m_values;
+        };
+
+        class SerializeAttributeVisitor : public AttributeVisitor
+        {
+        public:
+            SerializeAttributeVisitor(ValueMap& value_map)
+                : m_values(value_map)
+            {
+            }
+
+            void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
+            {
+                NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be marshalled");
+            }
+            // The remaining adapter methods fall back on the void adapter if not implemented
+            void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
+            {
+                m_values.insert_scalar(name, adapter.get());
+            };
+            void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
+            {
+                m_values.insert_scalar(name, adapter.get());
+            };
+
+            void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter) override
+            {
+                m_values.insert_scalar(name, adapter.get());
+            }
+            void on_adapter(const std::string& name, ValueAccessor<double>& adapter) override
+            {
+                m_values.insert_scalar(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<std::string>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<float>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<double>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int8_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int16_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int32_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<int64_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint8_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint16_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint32_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name,
+                            ValueAccessor<std::vector<uint64_t>>& adapter) override
+            {
+                m_values.insert_vector(name, adapter.get());
+            }
+            void on_adapter(const std::string& name, ValueAccessor<void*>& adapter) override
+            {
+                HostTensorPtr data =
+                    std::make_shared<HostTensor>(element::u8, Shape{adapter.size()});
+                data->write(adapter.get_ptr(), adapter.size());
+                m_values.insert(name, data);
+            }
+
+        protected:
+            ValueMap& m_values;
+        };
+
+        class NodeBuilder : public ValueMap, public DeserializeAttributeVisitor
+        {
+        public:
+            NodeBuilder()
+                : DeserializeAttributeVisitor(static_cast<ValueMap&>(*this))
+                , m_serializer(*this)
+            {
+            }
+
+            NodeBuilder(const std::shared_ptr<Node>& node)
+                : DeserializeAttributeVisitor(static_cast<ValueMap&>(*this))
+                , m_serializer(*this)
+            {
+                save_node(node);
+            }
+
+            void save_node(std::shared_ptr<Node> node)
+            {
+                m_node_type_info = node->get_type_info();
+                node->visit_attributes(m_serializer);
+            }
+
+            // Does not validate, since inputs aren't set
+            std::shared_ptr<Node> create()
+            {
+                std::shared_ptr<Node> node(FactoryRegistry<Node>::get().create(m_node_type_info));
+                node->visit_attributes(*this);
+                return node;
+            }
+            AttributeVisitor& get_node_saver() { return m_serializer; }
+            AttributeVisitor& get_node_loader() { return *this; }
+        protected:
+            Node::type_info_t m_node_type_info;
+            SerializeAttributeVisitor m_serializer;
+        };
+    }
+}
diff --git a/tests/stress_tests/.gitignore b/tests/stress_tests/.gitignore
index 228d3af5afa..25131b99402 100644
--- a/tests/stress_tests/.gitignore
+++ b/tests/stress_tests/.gitignore
@@ -1,2 +1,2 @@
 # Name of virtualenv created by stress_tests/scripts/get_testdata.py
-.stress_venv
+.stress_venv
\ No newline at end of file
diff --git a/tests/stress_tests/common/utils.cpp b/tests/stress_tests/common/utils.cpp
index 56d5f717205..3d47da38c72 100644
--- a/tests/stress_tests/common/utils.cpp
+++ b/tests/stress_tests/common/utils.cpp
@@ -43,7 +43,7 @@ size_t getVmSizeInKB() {
                 return pmc.WorkingSetSize;
 	    }
 #else
-size_t getVirtualMemoryInKB(char *name){
+size_t getSystemDataByName(char *name){
     FILE* file = fopen("/proc/self/status", "r");
     size_t result = 0;
     if (file != nullptr) {
@@ -60,10 +60,11 @@ size_t getVirtualMemoryInKB(char *name){
     return result;
 }
 
-size_t getVmSizeInKB() {return getVirtualMemoryInKB((char*) "VmSize:");}
-size_t getVmPeakInKB() {return getVirtualMemoryInKB((char*) "VmPeak:");}
-size_t getVmRSSInKB() {return getVirtualMemoryInKB((char*) "VmRSS:");}
-size_t getVmHWMInKB() {return getVirtualMemoryInKB((char*) "VmHWM:");}
+size_t getVmSizeInKB() {return getSystemDataByName((char*) "VmSize:");}
+size_t getVmPeakInKB() {return getSystemDataByName((char*) "VmPeak:");}
+size_t getVmRSSInKB() {return getSystemDataByName((char*) "VmRSS:");}
+size_t getVmHWMInKB() {return getSystemDataByName((char*) "VmHWM:");}
+size_t getThreadsNum() {return getSystemDataByName((char*) "Threads:");}
 
 #endif
 
diff --git a/tests/stress_tests/common/utils.h b/tests/stress_tests/common/utils.h
index 5eb9448c539..759f8d8213f 100644
--- a/tests/stress_tests/common/utils.h
+++ b/tests/stress_tests/common/utils.h
@@ -37,6 +37,7 @@ size_t getVmSizeInKB();
 size_t getVmPeakInKB();
 size_t getVmRSSInKB();
 size_t getVmHWMInKB();
+size_t getThreadsNum();
 
 template<typename Function, typename ... Args>
 int run_in_processes(const int &numprocesses, Function const &function, Args ... args) {
diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp
index 55671f95463..44182113b5e 100644
--- a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp
+++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp
@@ -28,7 +28,7 @@ using namespace InferenceEngine;
 #define THRESHOLD 0.1
 
 // Measure values
-enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, MeasureValueMax };
+enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, THREADS, MeasureValueMax };
 
 namespace util {
 template <typename In, typename Out, typename Func>
@@ -58,16 +58,17 @@ TestResult common_test_pipeline(const std::function<void()>& test_pipeline, cons
     past.resize(std::min(n / 2, MAX_AVERAGE));
 
     log_info("Warming up for " << WARMUP_STEPS << " iterations");
-    log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK");
+    log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK\tTHREADS");
     int measure_count = n;
     for (size_t iteration = 0; measure_count > 0; iteration++) {
         // Warm up to take reference values
         test_pipeline();
         getVmValues(cur[VMSIZE], cur[VMPEAK], cur[VMRSS], cur[VMHWM]);
+        cur[THREADS] = getThreadsNum();
         past[iteration % past.size()] = cur;
         progress_str = std::to_string(iteration + 1) + "\t" + std::to_string(cur[VMRSS]) + "\t" +
                        std::to_string(cur[VMHWM]) + "\t" + std::to_string(cur[VMSIZE]) + "\t" +
-                       std::to_string(cur[VMPEAK]);
+                       std::to_string(cur[VMPEAK]) + "\t" + std::to_string(cur[THREADS]);
 
         // measure
         if (iteration >= WARMUP_STEPS) {
diff --git a/tests/stress_tests/scripts/memcheck-template/base.html b/tests/stress_tests/scripts/memcheck-template/base.html
new file mode 100644
index 00000000000..f34958e832c
--- /dev/null
+++ b/tests/stress_tests/scripts/memcheck-template/base.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    {% block head %}
+    <title>{% block title %}{% endblock %}</title>
+    {% endblock %}
+</head>
+<body>
+    <div id="content">{% block content %}{% endblock %}</div>
+    <div id="footer">
+        {% block footer %}
+        {% endblock %}
+    </div>
+</body>
+</html>
diff --git a/tests/stress_tests/scripts/memcheck-template/timeline_report.html b/tests/stress_tests/scripts/memcheck-template/timeline_report.html
new file mode 100644
index 00000000000..e5674021119
--- /dev/null
+++ b/tests/stress_tests/scripts/memcheck-template/timeline_report.html
@@ -0,0 +1,128 @@
+{% extends "base.html" %}
+{% block title %}Memcheck report{% endblock %}
+{% block head %}
+    {{ super() }}
+<link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
+<script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.13.0/moment.min.js"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.9.3/Chart.js"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-plugin-annotation/0.5.7/chartjs-plugin-annotation.min.js"></script>
+{% endblock %}
+{% block content %}
+<div class="w3-container">
+<h2>Memcheck report</h2>
+    {% for timeline in timelines %}
+    <button onclick="show('{{timeline['_id'][0]|e}}-details')"
+            class="w3-button w3-block w3-border w3-left-align">
+        <h4> {{ timeline['device'][0]|e }} {{ timeline['model'][0]|e }} {{ timeline['test_name'][0]|e }} </h4> 
+        <div class="w3-text-green">
+            PASS
+        </div>
+    
+        <div id="{{timeline['_id'][0]|e}}-details" class="w3-hide w3-container">
+
+            <div class="w3-container">
+                Test timeline:
+            <canvas id="{{timeline['_id'][0]|e}}-chart" height="100"></canvas>
+            <script>
+            var ctx = document.getElementById("{{timeline['_id'][0]|e}}-chart").getContext('2d');
+            var myChart = new Chart(ctx, {
+                type: 'line',
+                data: {
+                    labels: [
+{% for point in timeline['commit_date'] %} "{{point}}", {% endfor %}
+                    ],
+                    datasets: [{
+                        label: 'vmrss',
+                        borderColor: 'red',
+                        lineTension: 0,
+                        data: [
+{% for point in timeline['metrics']['vmrss'] %} {{point}}, {% endfor %}
+                        ],
+                    },
+                    {
+                        label: 'vmhwm',
+                        borderColor: 'blue',
+                        lineTension: 0,
+                        data: [
+{% for point in timeline['metrics']['vmhwm'] %} {{point}}, {% endfor %}
+                        ],
+                    }
+                    ]
+                },
+                options: {
+                annotation: {
+                    annotations: [{
+                        type: 'line',
+                        mode: 'horizontal',
+                        scaleID: 'y-axis-0',
+                        value: '{{ timeline['ref_metrics']['vmrss'][-1]|e }}',
+                        borderColor: 'black',
+                        borderWidth: 3,
+                        label: {
+                            backgroundColor: "red",
+                            content: "vmrss waterline",
+                            enabled: true,
+                        },
+                
+                    },
+                    {
+                        type: 'line',
+                        mode: 'horizontal',
+                        scaleID: 'y-axis-0',
+                        value: '{{ timeline['ref_metrics']['vmhwm'][-1]|e }}',
+                        borderColor: 'black',
+                        borderWidth: 3,
+                        label: {
+                            backgroundColor: "blue",
+                            content: "vmhwm waterline",
+                            enabled: true,
+                        },
+                
+                    }],
+                    drawTime: "afterDraw" // (default)
+                },
+                scales: {
+                    xAxes: [{
+                        type: 'time',
+                        distribution: 'series',
+                        time:       {
+                            format: 'YYYY-MM-DD hh:mm:ss',
+                            displayFormats: {
+                                'millisecond': 'MMM DD hh:mm',
+                                'second': 'MMM DD hh:mm',
+                                'minute': 'MMM DD hh:mm',
+                                'hour': 'MMM DD hh:mm',
+                                'day': 'MMM DD hh:mm',
+                                'week': 'MMM DD hh:mm',
+                                'month': 'MMM DD hh:mm',
+                                'quarter': 'MMM DD hh:mm',
+                                'year': 'MMM DD hh:mm',
+                            },
+
+                        },
+                        scaleLabel: {
+                            display: true,
+                            labelString: 'Commit Time'
+                        }
+                    }]
+                }
+                }
+            });
+            </script>
+            </div>
+        </div>
+
+    </button>
+    {% endfor %}
+</div>
+<script>
+function show(id) {
+    var x = document.getElementById(id);
+    if (x.className.indexOf("w3-show") == -1) {
+        x.className += " w3-show";
+    } else {
+        x.className = x.className.replace(" w3-show", "");
+    }
+}
+</script>
+{% endblock %}
diff --git a/tests/stress_tests/scripts/memcheck_upload.py b/tests/stress_tests/scripts/memcheck_upload.py
index 891ded10473..d23bc4d5277 100644
--- a/tests/stress_tests/scripts/memcheck_upload.py
+++ b/tests/stress_tests/scripts/memcheck_upload.py
@@ -17,6 +17,7 @@ import os
 import re
 import sys
 import argparse
+from inspect import getsourcefile
 from glob import glob
 import xml.etree.ElementTree as ET
 import hashlib
@@ -36,14 +37,11 @@ PRECISSIONS = ('FP32', 'FP16', 'INT8')
 KEY_FIELDS = ('test_name', 'model', 'device', 'build_url')
 
 
-def globber(paths):
-    """Generator extending paths with wildcards"""
-    for path in paths:
-        if any(magic in path for magic in ['*', '?', '!', '[', ']']):
-            for resolved in glob(path, recursive=True):
-                yield resolved
-        else:
-            yield path
+def abs_path(relative_path):
+    """Return absolute path given path relative to the current file.
+    """
+    return os.path.realpath(
+        os.path.join(os.path.dirname(getsourcefile(lambda: 0)), relative_path))
 
 
 def parse_memcheck_log(log_path):
@@ -118,6 +116,79 @@ def upload_memcheck_records(records, db_url, db_collection):
         collection.replace_one({'_id': record['_id']}, record, upsert=True)
 
 
+def _transpose_dicts(items, template=None):
+    """ Build dictionary of arrays from array of dictionaries
+    Example:
+    > in = [{'a':1, 'b':3}, {'a':2}]
+    > _transpose_dicts(in, template=in[0])
+    {'a':[1,2], 'b':[3, None]}
+    """
+    result = {}
+    if not items:
+        return result
+    if not template:
+        template = items[0]
+    for key, template_val in template.items():
+        if isinstance(template_val, dict):
+            result[key] = _transpose_dicts(
+                [item[key] for item in items if key in item], template_val)
+        else:
+            result[key] = [item.get(key, None) for item in items]
+    return result
+
+
+TIMELINE_SIMILARITY = ('test_name', 'model', 'device', 'target_branch')
+
+
+def query_timeline(records, db_url, db_collection, max_items=20, similarity=TIMELINE_SIMILARITY):
+    """ Query database for similar memcheck items committed previously
+    """
+    client = MongoClient(db_url)
+    collection = client[DATABASE][db_collection]
+    result = []
+    for record in records:
+        query = dict((key, record[key]) for key in similarity)
+        query['commit_date'] = {'$lt': record['commit_date']}
+        pipeline = [
+            {'$match': query},
+            {'$addFields': {'commit_date': {'$dateFromString': {'dateString': '$commit_date'}}}},
+            {'$sort': {'commit_date': -1}},
+            {'$limit': max_items},
+            {'$sort': {'commit_date': 1}},
+        ]
+        items = list(collection.aggregate(pipeline)) + [record]
+        timeline = _transpose_dicts(items, template=record)
+        result += [timeline]
+    return result
+
+
+def create_memcheck_report(records, db_url, db_collection, output_path):
+    """ Create memcheck timeline HTML report for records.
+    """
+    if db_collection == 'pre_commit':
+        db_collection = 'commit'  # pre-commit jobs building report from past commits
+    records.sort(
+        key=lambda item: f"{item['status']}{item['device']}{item['model']}{item['test_name']}")
+    timelines = query_timeline(records, db_url, db_collection)
+    import jinja2  # pylint: disable=import-outside-toplevel
+    env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(
+            searchpath=os.path.join(abs_path('.'), 'memcheck-template')),
+        autoescape=False)
+    template = env.get_template('timeline_report.html')
+    template.stream(records=records, timelines=timelines).dump(output_path)
+
+
+def globber(paths):
+    """Generator extending paths with wildcards"""
+    for path in paths:
+        if any(magic in path for magic in ['*', '?', '!', '[', ']']):
+            for resolved in glob(path, recursive=True):
+                yield resolved
+        else:
+            yield path
+
+
 def main():
     """Main entry point.
     """
diff --git a/tests/stress_tests/scripts/requirements.txt b/tests/stress_tests/scripts/requirements.txt
index 8c7d698b2b7..79e8313d653 100644
--- a/tests/stress_tests/scripts/requirements.txt
+++ b/tests/stress_tests/scripts/requirements.txt
@@ -1 +1,2 @@
-pymongo
\ No newline at end of file
+pymongo
+Jinja2
\ No newline at end of file