publish master branch snapshot, revision 8d31237e2c3f673cbb0f0ba110fc10f5cce1d2bb

2020-05-22 02:23:12 +03:00 · 2020-05-22 02:23:12 +03:00 · deb008a26f
commit deb008a26f
parent eab7ef4895
423 changed files with 11071 additions and 5560 deletions
--- a/.gitignore
+++ b/.gitignore
@ -16,6 +16,7 @@ build/
 .gdb_history
 .vimspector.json
 doc/
+!ngraph/doc
 docs/build_documentation/work_dir/
 inference-engine/plugins/
 inference-engine/temp
@ -56,3 +57,15 @@ __pycache__
 /model-optimizer/*.mapping
 /model-optimizer/*.dat
 /model-optimizer/*.svg
+
+# ngraph
+ngraph/src/CPackConfig.cmake
+ngraph/src/CPackSourceConfig.cmake
+ngraph/src/VERSION
+ngraph/src/gtest/
+ngraph/src/json/
+ngraph/src/ngraphConfig.cmake
+ngraph/src/ngraphConfigVersion.cmake
+ngraph/src/protobuf/
+ngraph/src/src/
+ngraph/src/test/
--- a/cmake/developer_package.cmake
+++ b/cmake/developer_package.cmake
@ -64,12 +64,11 @@ endmacro()

 macro(ie_cpack)
    set(CPACK_GENERATOR "TGZ")
+    string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
    if(WIN32)
        set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
-        string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
    else()
        set(CPACK_PACKAGE_NAME inference-engine)
-        string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
    endif()
    set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
    set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@ -159,6 +159,17 @@ if(ENABLE_PYTHON)
            COMPONENT python_samples)
 endif()

+# install speech demo files
+
+if(SPEECH_LIBS_AND_DEMOS)
+    ie_cpack_add_component(speech_demo_files REQUIRED)
+
+    install(DIRECTORY ${TEMP}/deployment_tools
+                      ${TEMP}/data_processing
+            DESTINATION .
+            COMPONENT speech_demo_files)
+endif()
+
 #
 # Developer package
 #
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@ -57,7 +57,7 @@ add_subdirectory (src/openvino/inference_engine)

 # Check Cython version
 if("${CYTHON_VERSION}" VERSION_LESS "0.29")
-    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
 else()
    message(STATUS "Found Cython version ${CYTHON_VERSION}")
 endif()
--- a/inference-engine/ie_bridges/python/cmake/FindCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
@ -58,6 +58,6 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )

 # Find Cython version
 execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
-string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")

 mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@ -23,6 +23,7 @@ foreach(PYX_FILE ${OTHER_SOURCES})
    get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
    set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
    cython_add_module(${PYX_NAME} ${PYX_FILE})
+    add_dependencies(${TARGET_NAME} ${PYX_NAME})
    target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
    target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
 endforeach()
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@ -3,6 +3,7 @@ import os
 import pytest
 import warnings
 import threading
+from datetime import datetime

 from openvino.inference_engine import ie_api as ie
 from conftest import model_path, image_path
@ -195,11 +196,25 @@ def test_async_infer_wait_finish(device):
 def test_async_infer_wait_time(device):
    ie_core = ie.IECore()
    net = ie_core.read_network(test_net_xml, test_net_bin)
-    exec_net = ie_core.load_network(net, device, num_requests=1)
+    exec_net = ie_core.load_network(net, device, num_requests=2)
    img = read_image()
    request = exec_net.requests[0]
    request.async_infer({'data': img})
-    request.wait(100)
+    start_time = datetime.utcnow()
+    status = request.wait(ie.WaitMode.RESULT_READY)
+    assert status == ie.StatusCode.OK
+    time_delta = datetime.utcnow() - start_time
+    latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000)
+    timeout = max(100, latency_ms)
+    request = exec_net.requests[1]
+    request.async_infer({'data': img})
+    max_repeat = 10
+    status = ie.StatusCode.REQUEST_BUSY
+    i = 0
+    while i < max_repeat and status != ie.StatusCode.OK:
+        status = request.wait(timeout)
+        i += 1
+    assert status == ie.StatusCode.OK
    res = request.output_blobs['fc_out'].buffer
    assert np.argmax(res) == 2
    del exec_net
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@ -100,6 +100,9 @@ static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file
 static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
                                    " in case of one input size.";

+// @brief message for quantization bits
+static const char gna_qb_message[] = "Optional. Weight bits for quantization:  8 or 16 (default)";
+
 /// @brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);

@ -184,6 +187,9 @@ DEFINE_string(dump_config, "", dump_config_message);
 /// @brief Define flag for input shape <br>
 DEFINE_string(shape, "", shape_message);

+/// @brief Define flag for quantization bits (default 16)
+DEFINE_int32(qb, 16, gna_qb_message);
+
 /**
 * @brief This function show a help message
 */
@ -221,4 +227,5 @@ static void showUsage() {
    std::cout << "    -dump_config              " << dump_config_message << std::endl;
    std::cout << "    -load_config              " << load_config_message << std::endl;
 #endif
+    std::cout << "    -qb                       " << gna_qb_message << std::endl;
 }
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@ -13,6 +13,7 @@
 #include <inference_engine.hpp>
 #include <vpu/vpu_plugin_config.hpp>
 #include <cldnn/cldnn_config.hpp>
+#include <gna/gna_config.hpp>
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
@ -274,6 +275,14 @@ int main(int argc, char *argv[]) {
                }
            } else if (device == "MYRIAD") {
                device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+            } else if (device == "GNA") {
+                if (FLAGS_qb == 8)
+                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
+                else
+                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
+
+                if (isFlagSetInCommandLine("nthreads"))
+                    device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
            }
        }

--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -24,6 +24,7 @@
 #include "details/caseless.hpp"
 #include <details/ie_cnn_network_tools.h>
 #include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
 #include <ngraph/op/fused/gelu.hpp>
 #include <generic_ie.hpp>
 #include <transformations/common_optimizations/common_optimizations.hpp>
@ -73,7 +74,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
    std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
    if (network.getFunction()) {
        const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
-            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) != nullptr;
+            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
+                   std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
        };
        CNNNetwork net(network.getFunction());
        auto nGraphFunc = net.getFunction();
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@ -17,6 +17,8 @@
 #include "blob_factory.hpp"
 #include "precision_ex.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "weights_converter.hpp"
+#include "layer_transform.hpp"

 namespace GNAPluginNS {
 namespace frontend {
@ -137,6 +139,48 @@ class Quant<QuantI8> {
    }
 };

+template <typename T>
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
+        fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
+    prec_blob->allocate();
+
+    int i = 0;
+    for (auto& precValue : *prec_blob) {
+        auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
+        if (f32Value > std::numeric_limits<T>::max()) {
+            precValue = std::numeric_limits<T>::max();
+        } else if (f32Value < std::numeric_limits<T>::min()) {
+            precValue = std::numeric_limits<T>::min();
+        } else {
+            precValue = static_cast<T>(f32Value);
+        }
+    }
+
+    return  static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
+}
+
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    InferenceEngine::Blob::Ptr result_ptr = nullptr;
+    switch (precision) {
+    case InferenceEngine::Precision::FP32:
+        result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I32:
+        result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I16:
+        result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I8:
+        result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
+        break;
+    default:
+        THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
+    }
+    return result_ptr;
+}
+
 template<class QuantDesc, class QuantFunc>
 inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
                                  InferenceEngine::WeightableLayer *wl,
@ -389,6 +433,18 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
        }
        cnnLayer->precision = Desc::mandatory().getInputPrecision();

+        if (cnnLayer->type == "Const") {
+            if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
+            }
+            auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
+            auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
+            auto const_blob = cnnLayer->blobs["custom"];
+            if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
+                cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+            }
+        }
+
        return true;
    }
 };
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -197,6 +197,36 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
            return true;
        }

+        if (cnnLayer->type == "Const") {
+            auto blob = cnnLayer->blobs["custom"];
+            if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                blob = make_fp32_blob(blob);
+            }
+            auto max_val = std::numeric_limits<float>::min();
+            auto min_val = std::numeric_limits<float>::max();
+
+            auto flt_buf = blob->buffer().as<float*>();
+            auto size = blob->size();
+
+            for (int i=0; i < size; i++) {
+                auto val = flt_buf[i];
+                if (val > max_val) max_val = val;
+                if (val < min_val) min_val = val;
+            }
+
+            auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
+            auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
+
+            // TODO: Investigate what should be the scale in such cases (31910)
+            if (std::isinf(scale_val)) {
+                quant->_dst_quant.scale = quant->_src_quant.scale;
+            } else {
+                quant->_dst_quant.scale = scale_val;
+            }
+
+            return ScaleFactorUpdateResult();
+        }
+
        if (!CNNNetHasPrevLayer(cnnLayer)) {
            quant->_dst_quant.scale = quant->_src_quant.scale;
            return ScaleFactorUpdateResult();
@ -231,6 +261,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {

        auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
        auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
+
        auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);

        switch (eltwiseLayer->_operation) {
@ -239,6 +270,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                quantData->_dst_quant.scale     = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
                break;
            }
+            case InferenceEngine::EltwiseLayer::Sub:
            case InferenceEngine::EltwiseLayer::Sum: {
                // detect which input will be used as biases
                if (LayerInfo(in0).has32BOutput()) {
@ -247,6 +279,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                }

                // this path might result in significant data loss
+                quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;

--- a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
+++ b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
@ -7,22 +7,28 @@
 #include "quantized_layer_params.hpp"
 #include "precision_utils.h"

+inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
+    auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+         fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
+    fp32_blob->allocate();
+
+    int i = 0;
+    for (auto& f32Value : *fp32_blob) {
+        auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
+        f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
+    }
+
+    return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
+}
+
 inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
    InferenceEngine::BlobMap newBlobs;
    for (auto& blob : lp->blobs) {
        if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
            THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
        }
-        auto tmp =
-                InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                    blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
-        tmp->allocate();
-        int i = 0;
-        for (auto& f32Value : *tmp) {
-            auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
-            f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
-        }
-        newBlobs[blob.first] = tmp;
+        auto fp32_blob = make_fp32_blob(blob.second);
+        newBlobs[blob.first] = fp32_blob;
    }
    lp->_biases = newBlobs["biases"];
    lp->_weights = newBlobs["weights"];
@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
    for (auto& dataItem : lp->outData) {
        dataItem->setPrecision(InferenceEngine::Precision::FP32);
    }
+    InferenceEngine::BlobMap newBlobs;
+    for (auto& blob_pair : lp->blobs) {
+        auto blob_name = blob_pair.first;
+        auto blob_ptr = blob_pair.second;
+        if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+            auto new_blob = make_fp32_blob(blob_ptr);
+            newBlobs[blob_name] = new_blob;
+        } else {
+            newBlobs[blob_name] = blob_ptr;
+        }
+    }
+
    return true;
 }

--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -185,17 +185,16 @@ void  GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
    if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
        THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
    }
-    auto constBlob = constLayer->blobs["custom"];
+    auto const_blob = constLayer->blobs["custom"];

-    void* ptr_for_const_blob = &ptr_for_const_blob;
-    connectOutput(constLayer, ptr_for_const_blob, constBlob->size());
-
-    const_connections[constLayer->name] = ptr_for_const_blob;
+    const_connections[constLayer->name] = &const_connections[constLayer->name];
+    void* ptr_for_const_blob = &const_connections[constLayer->name];

+    connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
    // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
    // dont see practical use case when bind storage type need to be different that allocation type
-    gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
-        ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
+    gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
+        ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
        });
 }

@ -602,15 +601,35 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
    if (cropLayer == nullptr) {
        return;
    }
-    if (cropLayer->axis.size() > 1) {
+
+    IE_ASSERT(!layer->insData.empty());
+    auto inputs = layer->insData.begin()->lock();
+
+    IE_ASSERT(!cropLayer->axis.empty());
+    IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size());
+    IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
+
+    std::vector<int> axis, dim, offset;
+    for (int n = 0; n < cropLayer->axis.size(); n++) {
+        uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
+        // Exclude crop layer components that do nothing
+        if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
+            continue;
+        }
+        axis.push_back(cropLayer->axis[n]);
+        dim.push_back(cropLayer->dim[n]);
+        offset.push_back(cropLayer->offset[n]);
+    }
+
+    if (axis.size() > 1) {
        THROW_GNA_EXCEPTION <<
-            "Crop layer does not support the number of cropped dimensions = "
-            << cropLayer->axis.size() << ".";
+            "Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: "
+            << axis.size() << ".";
    }

    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-    size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
-    size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
+    size_t cropOffset = offset.front() * cropLayer->precision.size();
+    size_t cropOutputSize = dim.front() * cropLayer->precision.size();

    if (ALIGN64(cropOffset) == cropOffset) {
        // leave crop as it is
@ -637,20 +656,18 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
    } else {
        gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
        IE_ASSERT(!layer->outData.empty());
-        IE_ASSERT(!layer->insData.empty());
        auto outputs = *layer->outData.begin();
-        auto inputs = layer->insData.begin()->lock();

        // only 1D crops supported
-        if (cropLayer->axis.size() != 1) {
+        if (axis.size() != 1) {
            THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name;
        }

        // TODO: add unit tests for 4d crops blobs
-        uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]);
+        uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
        uint32_t num_columns_in = 1;

-        uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]);
+        uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
        uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;

        void* ptr_inputs = nullptr;
@ -686,7 +703,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
        connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
        connectOutput(layer, ptr_outputs, num_data_bytes_out);

-        FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
+        FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);

        (quantized == nullptr) ?
            gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
@ -713,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
    int biasesLayerIdx = 1;

    if (quantized) {
-        if (eltwise._operation == EltwiseLayer::Sum) {
+        switch (eltwise._operation) {
+        case InferenceEngine::EltwiseLayer::Sum:
+        case InferenceEngine::EltwiseLayer::Sub:
+        {
            if (inputs4Bytes->getPrecision().size() != 4) {
                std::swap(inputs4Bytes, inputs2Bytes);
                biasesLayerIdx = 0;
            }
            GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
            GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
-        } else {
+            break;
+        }
+        case InferenceEngine::EltwiseLayer::Prod:
+        {
            // for mul both inputs should be 2 bytes precision
            GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
            GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
+            break;
+        }
+        default:
+            THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
        }
    }

@ -767,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
    connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);

    switch (eltwise._operation) {
+    case EltwiseLayer::Sub:
+        if (quantized == nullptr) {
+            gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
+        } else {
+            auto scaledIdentity = -quantized->_weights_quant.scale;
+
+            auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
+
+            gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
+        }
+        connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
+        break;
    case EltwiseLayer::Sum:
        if (quantized == nullptr) {
            gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@ -248,8 +248,6 @@ void Config::AdjustKeyMapValues() {
    key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
            gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
    key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
-    key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
-            gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
    key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
            gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
    key_config_map[CONFIG_KEY(PERF_COUNT)] =
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@ -153,6 +153,15 @@ class LayerInfo {
        return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
               InferenceEngine::EltwiseLayer::Sum;
    }
+    bool isEltwiseSub() const noexcept {
+        IS_VALID();
+        if (!isEltwise()) return false;
+        // dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer) is validated in isEltwise function
+        // coverity[var_deref_op]
+        return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
+            InferenceEngine::EltwiseLayer::Sub;
+    }
+
    bool isEltwiseMul() const noexcept {
        IS_VALID();
        if (!isEltwise()) return false;
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -150,6 +150,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
        auto prev1 = PrevFunctionalLayer(l, 1);

        switch (eltwise->_operation) {
+            case EltwiseLayer::Sub:
            case EltwiseLayer::Sum:
                if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
                    return prevLayers;
@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() {
            // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
            // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights

-            if (eltwise->_operation != EltwiseLayer::Sum)
+            if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
                continue;

            auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {
--- a/inference-engine/src/ir_readers/ie_ir_parser.cpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.cpp
@ -392,7 +392,7 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
        auto blobs = node.child("blobs");
        if (!blobs.empty()) {
            for (pugi::xml_node blob = blobs.first_child(); !blob.empty(); blob = blob.next_sibling()) {
-                size_t size = GetUIntAttr(blob, "size", 0);
+                size_t size = GetUInt64Attr(blob, "size", 0);
                uint64_t offset = GetUInt64Attr(blob, "offset", 0);
                Precision precision(Precision::U8);
                const std::string& preStr = GetStrAttr(blob, "precision", "");
@ -787,7 +787,7 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::LSTMCell>::cre
    std::vector<float> activations_beta = getParameters<float>(dn, "activations_beta", {});
    float clip = GetFloatAttr(dn, "clip", 0.f);
    return std::make_shared<ngraph::op::LSTMCell>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5],
-                                                  GetUIntAttr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
+                                                  GetUInt64Attr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
                                                  activations, activations_alpha, activations_beta, clip);
 }

@ -1365,8 +1365,8 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::Constant>::cre
    if (dn.empty())
        THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;

-    size_t offset = GetUIntAttr(dn, "offset");
-    size_t size = GetUIntAttr(dn, "size");
+    size_t offset = GetUInt64Attr(dn, "offset");
+    size_t size = GetUInt64Attr(dn, "size");

    if (!weights || weights->cbuffer() == nullptr)
        THROW_IE_EXCEPTION << "Cannot read network! The model requires weights data! "
--- a/inference-engine/src/ir_readers/ie_ir_parser.hpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.hpp
@ -166,12 +166,12 @@ private:
    class XmlDeserializer : public ngraph::AttributeVisitor {
    public:
        explicit XmlDeserializer(const pugi::xml_node& node): node(node) {}
-        void on_attribute(const std::string& name, std::string& value) override {
+        void on_adapter(const std::string& name, ngraph::ValueAccessor<std::string>& value) override {
            std::string val;
            if (!getStrAttribute(node.child("data"), name, val)) return;
-            value = val;
+            value.set(val);
        }
-        void on_attribute(const std::string& name, bool& value) override {
+        void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& value) override {
            std::string val;
            if (!getStrAttribute(node.child("data"), name, val)) return;
            std::transform(val.begin(), val.end(), val.begin(), [](char ch) {
@ -184,7 +184,7 @@ private:
            bool is_false = false_names.find(val) != false_names.end();

            if (!is_true && !is_false) return;
-            value = is_true;
+            value.set(is_true);
        }
        void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override {
            std::string val;
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@ -63,12 +63,8 @@ public:

    CNNLayerPtr create();

-    void on_attribute(const std::string& name, std::string& value) override {
-        params[name] = value;
-    }
-
-    void on_attribute(const std::string& name, bool& value) override {
-        params[name] = value ? "true" : "false";
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<bool> &value) override {
+        params[name] = value.get() ? "true" : "false";
    }

    void addSpecificCreator(const std::vector<std::string>& forTypes, const CreatorFor& creator) {
@ -417,6 +413,15 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
        res->params = params;
        return res;
    });
+
+    addSpecificCreator({"StaticShapeTopK"}, [](const std::shared_ptr<::ngraph::Node>& node,
+        const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), "TopK",
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<TopKLayer>(attrs);
+        res->params = params;
+        return res;
+    });
 }

 CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
@ -530,7 +535,6 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
                std::make_shared<Builder::NodeConverter<::ngraph::op::Sign>>(),
                std::make_shared<Builder::NodeConverter<::ngraph::op::Sinh>>(),
                std::make_shared<Builder::NodeConverter<::ngraph::op::SquaredDifference>>(),
-                std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Select>>(),
                std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Softmax>>(),
                std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Split>>(),
                std::make_shared<Builder::NodeConverter<::ngraph::op::VariadicSplit>>(),
--- a/inference-engine/src/legacy_api/src/graph_transformer.cpp
+++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp
@ -16,6 +16,7 @@
 #include <string>
 #include <vector>
 #include <mutex>
+#include <algorithm>

 #include <cnn_network_ngraph_impl.hpp>
 #include "blob_factory.hpp"
@ -228,6 +229,12 @@ std::vector<CNNLayerPtr> ConstTransformer::foldConstSubgraphsInternal(const std:
    return remainingConstLayers;
 }

+static std::vector<std::string> skipConstInfer = {
+    "FakeQuantize",
+    "Quantize",
+    "CumSum"        // Const inference function for CumSum is not implemented!
+};
+
 const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers) {
    std::map<std::string, bool> mapConstLayers;
    // collect all const layers, which inputs are const layers.
@ -235,7 +242,7 @@ const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::ve
        // Layers with "Shape" and "Const" type are Const by definition
        if (layer->type == "Shape" || layer->type == "Const") {
            mapConstLayers[layer->name] = false;
-        } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) {
+        } else if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end() && !isForFakeQuantzie(*layer)) {
            bool isAllInputsConst = true;
            for (auto const& data : layer->insData) {
                auto creator = data.lock()->getCreatorLayer().lock();
@ -336,7 +343,7 @@ const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>&
    };

    for (const auto& layer : sortedLayers) {
-        if (layer->type == "FakeQuantize" || layer->type == "Quantize") {
+        if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) != skipConstInfer.end()) {
            continue;
        }

@ -346,13 +353,13 @@ const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>&

            auto implPtr = holder.getConstInferImpl(layer->type);
            if (!implPtr && !isForShape)
-                if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+                if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
                    THROW_IE_EXCEPTION << "Failed to find reference implementation for `" + layer->name +
                                              "` Layer with `" + layer->type + "` Type on constant propagation";
            if (!isForShape) {
                auto outputBlobs = getOutputBlobs(layer->outData);
                auto inp = getInputBlobs(layer->insData, isForShape);
-                if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+                if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
                    implPtr->infer(inp, layer->params, layer->blobs, outputBlobs);
                for (int i = 0; i < layer->outData.size(); i++) {
                    std::string dataName = layer->outData[i]->getName();
--- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
@ -1381,24 +1381,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::SquaredDifference>::createLayer(const st
    return res;
 }

-template <>
-CNNLayer::Ptr NodeConverter<ngraph::op::v1::Select>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "Select", details::convertPrecision(layer->get_output_element_type(0))};
-
-    auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
-    auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Select>(layer);
-    if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
-
-    auto broadcast = castedLayer->get_auto_broadcast().m_type;
-    if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) {
-        res->params["auto_broadcast"] = "numpy";
-    } else if (broadcast == ngraph::op::AutoBroadcastType::NONE) {
-        res->params["auto_broadcast"] = "none";
-    }
-
-    return res;
-}
-
 template <>
 CNNLayer::Ptr NodeConverter<ngraph::op::DetectionOutput>::createLayer(
    const std::shared_ptr<ngraph::Node>& layer) const {
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@ -4,6 +4,10 @@

 set(TARGET_NAME "MKLDNNPlugin")

+if(ENABLE_LTO)
+    ie_enable_lto()
+endif()
+
 if (WIN32)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
 endif()
@ -41,6 +45,7 @@ set(LAYERS
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_resample_node.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp

+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
@ -93,6 +98,7 @@ set(LAYERS
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp
 )

 foreach(LAYER ${LAYERS})
--- a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
@ -51,7 +51,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>

    memory::primitive_desc fetch() const {
        memory::primitive_desc adesc;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;

        cdesc = mkldnn_primitive_desc_iterator_fetch(get());

@ -72,7 +72,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
    memory::primitive_desc src_primitive_desc(size_t index = 0) const {
        memory::primitive_desc adesc;
        memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
        cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
        const_mkldnn_primitive_desc_t const_cdesc =
                mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@ -86,7 +86,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
    memory::primitive_desc dst_primitive_desc(size_t index = 0) const {
        memory::primitive_desc adesc;
        memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
        cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
        const_mkldnn_primitive_desc_t const_cdesc =
                mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@ -101,7 +101,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
    memory::primitive_desc diff_src_primitive_desc(size_t index = 0) const {
        memory::primitive_desc adesc;
        memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
        cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
        const_mkldnn_primitive_desc_t const_cdesc =
                mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@ -115,7 +115,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
    memory::primitive_desc weights_primitive_desc(size_t index = 0) const {
        memory::primitive_desc adesc;
        memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
        cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
        const_mkldnn_primitive_desc_t const_cdesc =
                mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@ -129,7 +129,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
    memory::primitive_desc diff_dst_primitive_desc(size_t index = 0) const {
        memory::primitive_desc adesc;
        memory::primitive_desc cdesc_elem;
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;
        cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
        const_mkldnn_primitive_desc_t const_cdesc =
                mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@ -152,7 +152,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>

    template <typename T>
    void getPrimitiveDescriptor(T& pdesc) const {
-        mkldnn_primitive_desc_t cdesc;
+        mkldnn_primitive_desc_t cdesc = nullptr;

        memory::primitive_desc cdescpd;

--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@ -151,7 +151,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
    auto inputDesc = getInputDesc();
    auto outputDesc = getOutputDesc();
    if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
-            (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
+            (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 &&
+            (inputDesc.getPrecision() != outputDesc.getPrecision() ||
+             inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc())))
        THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
    if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
        THROW_IE_EXCEPTION << "Cannot get input descriptor!";
--- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp
@ -1,22 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mkldnn_layers_dispatcher.hpp"
-#include <details/ie_exception.hpp>
-#include "nodes/list.hpp"
-#include <memory>
-
-using namespace InferenceEngine;
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr) {
-    if (!mngr)
-        THROW_IE_EXCEPTION << "Cannot add default extensions! Extension manager is empty.";
-
-    auto defaultExtensions = std::make_shared<Extensions::Cpu::MKLDNNExtensions>();
-    mngr->AddExtension(defaultExtensions);
-}
-
-}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp
@ -1,13 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "mkldnn_extension_mngr.h"
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr);
-
-}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@ -23,11 +23,14 @@ MKLDNNMemory::MKLDNNMemory(const engine& eng) : eng(eng) {}

 size_t MKLDNNMemory::GetSize() const {
    uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType()));
+    return GetElementsCount() * itemSize;
+}

+size_t MKLDNNMemory::GetElementsCount() const {
    auto desc = GetDescriptor();
    std::vector<int> dims(desc.data.layout_desc.blocking.padding_dims,
                          desc.data.layout_desc.blocking.padding_dims + desc.data.ndims);
-    return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>()) * itemSize;
+    return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>());
 }

 void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory::format format, const void* data) {
@ -182,6 +185,7 @@ bool MKLDNNMemory::isConsistant(memory::dims dims, memory::format format) {
        case f::OhIw16o4i:
        case f::OIhw4i16o4i:
        case f::OhIw8o4i:
+        case f::IOhw16o16i:
            ndims = 4; break;
        // DHW
        case f::ncdhw:
@ -411,6 +415,7 @@ std::string MKLDNNMemory::formatToString(memory::format fmt) {
        case memory::OhIw8o4i: return "OhIw8o4i";
        case memory::OhIw16o4i: return "OhIw16o4i";
        case memory::OIhw4i16o4i: return "OIhw4i16o4i";
+        case memory::IOhw16o16i: return "IOhw16o16i";

        case memory::oidhw: return "oidhw";
        case memory::dhwio: return "dhwio";
@ -718,6 +723,33 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
            blkDims.push_back(16);
            layout = Layout::BLOCKED;
            break;
+        case memory::OIhw8o8i:
+            order = {0, 1, 2, 3, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+            blkDims.push_back(8);
+            blkDims.push_back(8);
+            layout = Layout::BLOCKED;
+            break;
+        case memory::OIhw16o16i:
+            order = {0, 1, 2, 3, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
+        case memory::IOhw16o16i:
+            order = {1, 0, 2, 3, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
        case memory::OIdhw8i8o:
            order = {0, 1, 2, 3, 4, 1, 0};
            blkDims = dims;
@ -736,8 +768,26 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
            blkDims.push_back(16);
            layout = Layout::BLOCKED;
            break;
+        case memory::OIdhw8o8i:
+            order = {0, 1, 2, 3, 4, 1, 0};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+            blkDims.push_back(8);
+            blkDims.push_back(8);
+            layout = Layout::BLOCKED;
+            break;
+        case memory::OIdhw16o16i:
+            order = {0, 1, 2, 3, 4, 0, 1};
+            blkDims = dims;
+            blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
        case memory::gOIhw4o4i:
-            order = {0, 1, 2, 3, 4, 2, 1};
+            order = {0, 1, 2, 3, 4, 1, 2};
            blkDims = dims;
            blkDims[1] = blkDims[1] / 4 + (blkDims[1] % 4 ? 1 : 0);
            blkDims[2] = blkDims[2] / 4 + (blkDims[2] % 4 ? 1 : 0);
@ -754,6 +804,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
            blkDims.push_back(8);
            layout = Layout::BLOCKED;
            break;
+        case memory::gOIhw8o8i:
+            order = {0, 1, 2, 3, 4, 1, 2};
+            blkDims = dims;
+            blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+            blkDims[2] = blkDims[2] / 8 + (blkDims[2] % 8 ? 1 : 0);
+            blkDims.push_back(8);
+            blkDims.push_back(8);
+            layout = Layout::BLOCKED;
+            break;
        case memory::gOIhw16i16o:
            order = {0, 1, 2, 3, 4, 2, 1};
            blkDims = dims;
@ -763,6 +822,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
            blkDims.push_back(16);
            layout = Layout::BLOCKED;
            break;
+        case memory::gOIhw16o16i:
+            order = {0, 1, 2, 3, 4, 1, 2};
+            blkDims = dims;
+            blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+            blkDims[2] = blkDims[2] / 16 + (blkDims[2] % 16 ? 1 : 0);
+            blkDims.push_back(16);
+            blkDims.push_back(16);
+            layout = Layout::BLOCKED;
+            break;
        case memory::OhIw8o4i:
            order = {0, 2, 1, 3, 0, 1};
            blkDims = dims;
@ -1067,6 +1135,16 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
                    } else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
                        mkldnnFormat = memory::format::OIhw16i16o;
                    }
+                } else if (order.size() == 6 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+                    if (blkdDims[4] == 8 && blkdDims[5] == 8) {
+                        mkldnnFormat = memory::format::OIhw8o8i;
+                    } else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+                        mkldnnFormat = memory::format::OIhw16o16i;
+                    }
+                } else if (order.size() == 6 && order[0] == 1 && order[1] == 0 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+                    if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+                        mkldnnFormat = memory::format::IOhw16o16i;
+                    }
                } else if (order.size() == 5 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0) {
                    if (blkdDims[4] == 8) {
                        mkldnnFormat = memory::format::Ohwi8o;
@ -1122,6 +1200,13 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
                    } else if (blkdDims[6] == 16) {
                        mkldnnFormat = memory::format::OIdhw16i16o;
                    }
+                } else if (order.size() == 7 &&
+                           order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
+                    if (blkdDims[6] == 8) {
+                        mkldnnFormat = memory::format::OIdhw8o8i;
+                    } else if (blkdDims[6] == 16) {
+                        mkldnnFormat = memory::format::OIdhw16o16i;
+                    }
                } else if (order.size() == 7 &&
                           order[0] == 0 && order[1] == 2 && order[2] == 3 && order[3] == 1 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
                    if (blkdDims[5] == 8) {
@ -1136,12 +1221,21 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
                } else if (order.size() == 7 &&
                           order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 2 && order[6] == 1) {
                    if (blkdDims[6] == 4) {
-                        mkldnnFormat = memory::format::gOIhw4o4i;
+                        mkldnnFormat = memory::format::gOIhw4i4o;
                    } else if (blkdDims[6] == 8) {
                        mkldnnFormat = memory::format::gOIhw8i8o;
                    } else if (blkdDims[6] == 16) {
                        mkldnnFormat = memory::format::gOIhw16i16o;
                    }
+                } else if (order.size() == 7 &&
+                           order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
+                    if (blkdDims[6] == 4) {
+                        mkldnnFormat = memory::format::gOIhw4o4i;
+                    } else if (blkdDims[6] == 8) {
+                        mkldnnFormat = memory::format::gOIhw8o8i;
+                    } else if (blkdDims[6] == 16) {
+                        mkldnnFormat = memory::format::gOIhw16o16i;
+                    }
                } else if (order.size() == 7 &&
                           order[0] == 0 && order[1] == 1 && order[2] == 3 && order[3] == 2 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
                    if (blkdDims[5] == 8 && blkdDims[6] == 4) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
@ -87,6 +87,7 @@ public:
    }

    size_t GetSize() const;
+    size_t GetElementsCount() const;

    mkldnn::memory::format GetFormat() const {
        return static_cast<mkldnn::memory::format>(prim->get_primitive_desc().desc().data.format);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -1017,12 +1017,17 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
    auto config = selected_pd->getConfig();
    if (!isInitConfig(config)) {
        for (size_t i = 0; i < config.inConfs.size(); i++) {
-            config.inConfs[i].desc = getConfiguredInputDesc(config, i);
+            // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field.
+            // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+            config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i));
        }

        for (size_t i = 0; i < config.outConfs.size(); i++) {
-            config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
+            // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field.
+            // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+            config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i));
        }
+
        initDescriptor(config);
    } else if (getType() != RNNSeq && getType() != RNNCell) {
        initDescriptor(config);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -370,7 +370,10 @@ public:
            if (srcDescs.empty() || selectedDescs.empty())
                return false;
            for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
-                if (srcDescs[i] != selectedDescs[i].desc && srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
+                if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() &&
+                      srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() &&
+                      srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) &&
+                      srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
                    return false;
            }
            return true;
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -5,7 +5,6 @@
 #include "ie_metric_helpers.hpp"
 #include "mkldnn_plugin.h"
 #include "mkldnn_extension_mngr.h"
-#include "mkldnn_layers_dispatcher.hpp"
 #include "mkldnn_weights_cache.hpp"
 #include <cpp_interfaces/base/ie_plugin_base.hpp>
 #include <threading/ie_executor_manager.hpp>
@ -15,6 +14,7 @@
 #include <tuple>
 #include <ie_system_conf.h>
 #include <generic_ie.hpp>
+#include <nodes/list.hpp>

 #include "convert_function_to_cnn_network.hpp"
 #include <transformations/common_optimizations/common_optimizations.hpp>
@ -23,6 +23,7 @@
 #include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
 #include <ngraph/op/fused/gelu.hpp>

 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
@ -40,7 +41,7 @@ using namespace InferenceEngine;

 Engine::Engine() {
    _pluginName = "CPU";
-    addDefaultExtensions(extensionManager);
+    extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
 }

 Engine::~Engine() {
@ -83,7 +84,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
        const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
                std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
-                std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
+                std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
+                std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
        };
        auto nGraphFunc = clonedNetwork->getFunction();
        // Disable shape inference (WA for generic operations)
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include "argmax_imp.hpp"
@ -49,7 +48,7 @@ private:
    argmax_conf conf;
 };

-REG_FACTORY_FOR(ImplFactory<ArgMaxImpl>, ArgMax);
+REG_FACTORY_FOR(ArgMaxImpl, ArgMax);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
@ -4,6 +4,7 @@

 #include "argmax_imp.hpp"

+#include <cstring>
 #include <algorithm>
 #include <string>
 #include <vector>
@ -181,7 +182,7 @@ void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape
            vmask_type vmask;
            int s_index = i0 * dim * after_num + ib1 * block_size;

-            memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
+            std::memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));

            auto vswap_func = [&](int index1, int index2) {
                vtmp = vmax_values[index1];
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
@ -6,7 +6,7 @@

 #include <ie_iextension.h>
 #include "ie_util_internal.hpp"
-#include "list.hpp"
+#include "nodes/list.hpp"

 #include <string>
 #include <vector>
@ -176,6 +176,21 @@ protected:

 IE_SUPPRESS_DEPRECATED_END

+template <typename __prim>
+inline void extRegister(MKLDNNExtensions * extInstance, const char * __type) {
+    IE_SUPPRESS_DEPRECATED_START
+    extInstance->AddExt(__type,
+                [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+                    return new __prim(layer);
+                });
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+#define REG_FACTORY_FOR(__prim, __type) \
+    void __prim ## __type(MKLDNNExtensions * extInstance) { \
+        extRegister<ImplFactory<__prim>>(extInstance, #__type); \
+    }
+
 }  // namespace Cpu
 }  // namespace Extensions
 }  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -219,7 +218,7 @@ private:
    std::vector<size_t> _crops_end;
 };

-REG_FACTORY_FOR(ImplFactory<BatchToSpaceImpl>, BatchToSpace);
+REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -129,7 +128,7 @@ private:
    const size_t BROADCAST_SHAPE = 1;
 };

-REG_FACTORY_FOR(ImplFactory<BroadcastImpl>, Broadcast);
+REG_FACTORY_FOR(BroadcastImpl, Broadcast);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -123,7 +122,7 @@ private:
    bool with_bins = false;
 };

-REG_FACTORY_FOR(ImplFactory<BucketizeImpl>, Bucketize);
+REG_FACTORY_FOR(BucketizeImpl, Bucketize);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
@ -14,8 +14,16 @@ namespace Cpu {

 #if defined(HAVE_AVX512F)
 namespace AVX512F {
+    static inline __m512 _mm_uni_any_ps() {
+        return __m512{};
+    }
+
+    static inline __m512i _mm_uni_any_epi32() {
+        return __m512i{};
+    }
+
    static inline __m512 _mm_uni_loadu_ps(const float* psrc) {
-        return _mm512_loadu_ps(psrc);
+        return _mm512_mask_loadu_ps(_mm_uni_any_ps(), (__mmask16)-1, psrc);
    }

    static inline void _mm_uni_storeu_ps(float* pdst, const __m512& vec) {
@ -62,8 +70,12 @@ namespace AVX512F {
        return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1)));
    }

+    static inline __m512i _mm_uni_set1_epi32(int value) {
+        return _mm512_mask_set1_epi32(_mm_uni_any_epi32(), (__mmask16)-1, value);
+    }
+
    static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) {
-        return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1);
+        return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm_uni_set1_epi32(0)), vec0, vec1);
    }

    static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) {
@ -90,10 +102,6 @@ namespace AVX512F {
        return _mm512_add_epi32(vec0, vec1);
    }

-    static inline __m512i _mm_uni_set1_epi32(int value) {
-        return _mm512_set1_epi32(value);
-    }
-
    static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) {
        return _mm512_sll_epi32(vec, _mm_set1_epi64x(value));
    }
@ -119,7 +127,7 @@ namespace AVX512F {
    }

    static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) {
-        return _mm512_cvtepi32_ps(vec);
+        return _mm512_mask_cvtepi32_ps(_mm_uni_any_ps(), (__mmask16)-1, vec);
    }
 }  // namespace AVX512F
 #elif defined(HAVE_AVX2)
--- a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -129,7 +128,7 @@ private:
    std::string precision;
 };

-REG_FACTORY_FOR(ImplFactory<ConvertImpl>, Convert);
+REG_FACTORY_FOR(ConvertImpl, Convert);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -86,7 +85,7 @@ public:
    }
 };

-REG_FACTORY_FOR(ImplFactory<CTCGreedyDecoderImpl>, CTCGreedyDecoder);
+REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
@ -0,0 +1,230 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "list.hpp"
+#include "base.hpp"
+
+#include <string>
+#include <vector>
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class CumSumImpl: public ExtLayerBase {
+    enum { CUM_SUM_DATA, AXIS, numOfInputs };
+    enum { N, C, D, H, W, numOfDims };
+    bool exclusive;
+    bool reverse;
+    size_t axis = 0;
+    std::vector<size_t> shape5d;
+
+public:
+    explicit CumSumImpl(const CNNLayer* layer) {
+        try {
+            layerName = layer->name;
+            if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";
+
+            const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
+            const auto &dataShape = dataTensor.getDims();
+            if (dataShape.size() < 1 || dataShape.size() > 5) {
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
+            }
+
+            exclusive = layer->GetParamAsBool("exclusive", false);
+            reverse = layer->GetParamAsBool("reverse", false);
+
+            const auto& dataPrecision = dataTensor.getPrecision();
+            if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
+                dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();
+
+            if (layer->insData.size() == numOfInputs) {
+                const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
+                const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+                if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
+                    THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();
+
+                const auto axisTensorRank = axisTensor.getDims().size();
+                if (axisTensorRank != 0)
+                    THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
+            }
+
+            if (dataShape != layer->outData[0]->getTensorDesc().getDims())
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";
+
+            shape5d = get5dShape(dataShape);
+
+            LayerConfig config;
+            for (size_t i = 0; i < layer->insData.size(); i++) {
+                DataConfig inConfig;
+                inConfig.inPlace = -1;
+                inConfig.constant = false;
+
+                Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision();
+                if (inPrecision == Precision::BF16)
+                    inPrecision = Precision::FP32;
+                const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
+                inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
+
+                config.inConfs.push_back(inConfig);
+            }
+            DataConfig outConfig;
+            outConfig.inPlace = -1;
+            outConfig.constant = false;
+            Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
+            if (outPrecision == Precision::BF16)
+                outPrecision = Precision::FP32;
+            const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
+            outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
+
+            config.outConfs.push_back(outConfig);
+
+            config.dynBatchSupport = false;
+            confs.push_back(config);
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        if (inputs.size() == numOfInputs)
+            axis = getAxis(inputs[AXIS], inputs[CUM_SUM_DATA]);
+
+        const auto &dataPrecision = inputs[CUM_SUM_DATA]->getTensorDesc().getPrecision();
+        switch (dataPrecision) {
+            case Precision::I8   : { execImpl<int8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::U8   : { execImpl<uint8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::I16  : { execImpl<int16_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::I32  : { execImpl<int32_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::FP32 : { execImpl<float>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::I64  : { execImpl<int64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            case Precision::U64  : { execImpl<uint64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+            default : {
+                if (resp) {
+                    std::string errorMsg = "CumSum layer with name '" + layerName + "' has unsupported 'data' input precision: " + dataPrecision.name();
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return GENERAL_ERROR;
+            }
+        }
+        return OK;
+    }
+
+private:
+    template <typename dataType>
+    void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) {
+        const auto *input = _input->cbuffer().as<const dataType *>() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        auto *output = _output->buffer().as<dataType *>() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis];
+
+        if (reverse) {
+            if (exclusive) {
+                cumSum<true, true, dataType>(input, output, offset);
+            } else {
+                cumSum<true, false, dataType>(input, output, offset);
+            }
+        } else {
+            if (exclusive) {
+                cumSum<false, true, dataType>(input, output, offset);
+            } else {
+                cumSum<false, false, dataType>(input, output, offset);
+            }
+        }
+    }
+
+    template <bool reverse, bool exclusive, typename dataType>
+    void cumSum(const dataType *input, dataType *output, const size_t &offset) {
+        std::vector<size_t> iterationRange(numOfDims - 1);
+        size_t j = 0;
+        for (size_t i = 0; i < shape5d.size(); i++) {
+            if (i == axis)
+                continue;
+            iterationRange[j++] = shape5d[i];
+        }
+        parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) {
+            std::vector<size_t> forStartOffset;
+            forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3);
+            forStartOffset.insert(forStartOffset.begin() + axis, 0);
+            size_t startOffset = getStartOffset(forStartOffset);
+
+            const dataType *inputStart = input + startOffset;
+            dataType *outputStart = output + startOffset;
+
+            if (reverse) {
+                if (exclusive) {
+                    outputStart[offset*(shape5d[axis] - 1)] = 0;
+                    for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+                        outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset];
+                    }
+                } else {
+                    outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)];
+                    for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+                        outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset];
+                    }
+                }
+            } else {
+                if (exclusive) {
+                    outputStart[0] = 0;
+                    for (size_t i = 1; i < shape5d[axis]; i++) {
+                        outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset];
+                    }
+                } else {
+                    outputStart[0] = inputStart[0];
+                    for (size_t i = 1; i < shape5d[axis]; i++) {
+                        outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset];
+                    }
+                }
+            }
+        });
+    }
+
+    size_t getStartOffset(std::vector<size_t> &forStartOffset) {
+        return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] +
+               forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W];
+    }
+
+    size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) {
+        const auto& axisPrecision = _axis->getTensorDesc().getPrecision();
+        const int64_t dataShapeSize = static_cast<int64_t>(_data->getTensorDesc().getDims().size());
+        int64_t axisValueFromBlob;
+        switch (axisPrecision) {
+            case Precision::I32 : {
+                const auto *axisPtr = _axis->cbuffer().as<const int32_t *>();
+                axisValueFromBlob = static_cast<int64_t>(axisPtr[0]);
+                break;
+            }
+            case Precision::I64 : {
+                const auto *axisPtr = _axis->cbuffer().as<const int64_t *>();
+                axisValueFromBlob = axisPtr[0];
+                break;
+            }
+            default : {
+                THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "'  doesn't support 'axis' input with precision: " << axisPrecision.name();
+            }
+        }
+        if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1)
+            THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "'  has axis with a value out of range: " << axisValueFromBlob;
+        return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize);
+    }
+
+    std::vector<size_t> get5dShape(const SizeVector& dims) {
+        std::vector<size_t> shape5d(numOfDims, 1);
+        for (size_t i = 0; i < dims.size(); i++)
+            shape5d[i] = dims[i];
+        return shape5d;
+    }
+
+private:
+    std::string layerName;
+};
+
+REG_FACTORY_FOR(CumSumImpl, CumSum);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -118,7 +117,7 @@ private:
    size_t ownStrides[CNTR_SIZE];
 };

-REG_FACTORY_FOR(ImplFactory<DepthToSpaceImpl>, DepthToSpace);
+REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cfloat>
@ -604,7 +603,7 @@ void DetectionOutputImpl::nms_mx(const float* conf_data,
    }
 }

-REG_FACTORY_FOR(ImplFactory<DetectionOutputImpl>, DetectionOutput);
+REG_FACTORY_FOR(DetectionOutputImpl, DetectionOutput);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cassert>
@ -392,7 +391,7 @@ private:



-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
+REG_FACTORY_FOR(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -121,7 +120,7 @@ private:
    const size_t FILL_VALUE = 1;
 };

-REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
+REG_FACTORY_FOR(FillImpl, Fill);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -148,7 +147,7 @@ private:
 };


-REG_FACTORY_FOR(ImplFactory<GatherImpl>, Gather);
+REG_FACTORY_FOR(GatherImpl, Gather);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -150,7 +149,7 @@ private:
    InferenceEngine::Precision precision;
 };

-REG_FACTORY_FOR(ImplFactory<GatherTreeImpl>, GatherTree);
+REG_FACTORY_FOR(GatherTreeImpl, GatherTree);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -58,7 +57,7 @@ private:
    float bias = 1.0f;
 };

-REG_FACTORY_FOR(ImplFactory<GRNImpl>, GRN);
+REG_FACTORY_FOR(GRNImpl, GRN);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <string>
 #include <vector>
@ -434,7 +433,7 @@ private:
    }
 };

-REG_FACTORY_FOR(ImplFactory<InterpImpl>, Interp);
+REG_FACTORY_FOR(InterpImpl, Interp);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/list.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.cpp
@ -0,0 +1,29 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "nodes/list.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+#define FACTORY_DECLARATION(__prim, __type) \
+    void __prim ## __type(MKLDNNExtensions * extInstance)
+
+#define FACTORY_CALL(__prim, __type) \
+    __prim ## __type(this)
+
+#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_DECLARATION(__prim, __type)
+# include "list_tbl.hpp"
+#undef MKLDNN_EXTENSION_NODE
+
+MKLDNNExtensions::MKLDNNExtensions() {
+    #define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_CALL(__prim, __type)
+    # include "list_tbl.hpp"
+    #undef MKLDNN_EXTENSION_NODE
+}
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
@ -12,17 +12,6 @@
 #include <memory>
 #include <algorithm>

-// WA for xbyak.h
-#ifdef _WIN32
-# ifndef _WINSOCKAPI_
-#  define _WINSOCKAPI_
-# endif
-# ifndef _WINSOCK2API_
-#  define _WINSOCK2API_
-# endif
-#endif
-#include <cpu_isa_traits.hpp>
-
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
@ -37,14 +26,16 @@ struct ExtensionsHolder {

 class MKLDNNExtensions : public IExtension {
 public:
+    MKLDNNExtensions();
+
    StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override {
-        collectTypes(types, size, MKLDNNExtensions::GetExtensionsHolder()->list);
+        collectTypes(types, size, extensionsHolder->list);
        return OK;
    }

    StatusCode
    getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept override {
-        auto& factories = MKLDNNExtensions::GetExtensionsHolder()->list;
+        auto& factories = extensionsHolder->list;
        if (factories.find(cnnLayer->type) == factories.end()) {
            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
@ -78,21 +69,13 @@ public:
        delete this;
    }

-    static void AddExt(std::string name, ext_factory factory) {
-        auto extensionsHolder = GetExtensionsHolder();
-        if (extensionsHolder != nullptr)
-            extensionsHolder->list[name] = factory;
-    }
-
-    static std::shared_ptr<ExtensionsHolder> GetExtensionsHolder() {
-        static std::shared_ptr<ExtensionsHolder> localHolder;
-        if (localHolder == nullptr) {
-            localHolder = std::make_shared<ExtensionsHolder>();
-        }
-        return localHolder;
+    void AddExt(std::string name, ext_factory factory) {
+        extensionsHolder->list[name] = factory;
    }

 private:
+    std::shared_ptr<ExtensionsHolder> extensionsHolder = std::make_shared<ExtensionsHolder>();
+
    template<class T>
    void collectTypes(char**& types, unsigned int& size, const std::map<std::string, T> &factories) {
        types = new char *[factories.size()];
@ -108,22 +91,6 @@ private:

 IE_SUPPRESS_DEPRECATED_END

-template<typename Ext>
-class ExtRegisterBase {
-public:
-    explicit ExtRegisterBase(const std::string& type) {
-        IE_SUPPRESS_DEPRECATED_START
-        MKLDNNExtensions::AddExt(type,
-                              [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
-                                  return new Ext(layer);
-                              });
-        IE_SUPPRESS_DEPRECATED_END
-    }
-};
-
-#define REG_FACTORY_FOR(__prim, __type) \
-static ExtRegisterBase<__prim> __reg__##__type(#__type)
-
 }  // namespace Cpu
 }  // namespace Extensions
 }  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@ -0,0 +1,93 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef MKLDNN_EXTENSION_NODE
+# warning "MKLDNN_EXTENSION_NODE is not defined"
+# define MKLDNN_EXTENSION_NODE(__prim, __type)
+#endif
+
+MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
+MKLDNN_EXTENSION_NODE(MathImpl, Abs);
+MKLDNN_EXTENSION_NODE(MathImpl, Acos);
+MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Asin);
+MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Atan);
+MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
+MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
+MKLDNN_EXTENSION_NODE(MathImpl, Cos);
+MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Erf);
+MKLDNN_EXTENSION_NODE(MathImpl, Floor);
+MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid);
+MKLDNN_EXTENSION_NODE(MathImpl, Log);
+MKLDNN_EXTENSION_NODE(MathImpl, Neg);
+MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal);
+MKLDNN_EXTENSION_NODE(MathImpl, Selu);
+MKLDNN_EXTENSION_NODE(MathImpl, Sign);
+MKLDNN_EXTENSION_NODE(MathImpl, Sin);
+MKLDNN_EXTENSION_NODE(MathImpl, Sinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Softplus);
+MKLDNN_EXTENSION_NODE(MathImpl, Softsign);
+MKLDNN_EXTENSION_NODE(MathImpl, Tan);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
+MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
+MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
+MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
+MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
+MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
+MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
+MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
+MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
+MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
+MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
+MKLDNN_EXTENSION_NODE(FillImpl, Fill);
+MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
+MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
+MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace);
+MKLDNN_EXTENSION_NODE(ScatterImpl, ScatterUpdate);
+MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
+MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
+MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
+MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
+MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
+MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
+MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
+MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth);
+MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
+MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
+MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
+MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
+MKLDNN_EXTENSION_NODE(PadImpl, Pad);
+MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
+MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
+MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
+MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
+MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
+MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
+MKLDNN_EXTENSION_NODE(RangeImpl, Range);
+MKLDNN_EXTENSION_NODE(SelectImpl, Select);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceAnd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL1);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL2);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSumExp);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMax);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMean);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMin);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceOr);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceProd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSumSquare);
+MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
+MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
+MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
+MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
--- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -105,7 +104,7 @@ private:
    bool is_last_dim = false;
 };

-REG_FACTORY_FOR(ImplFactory<LogSoftmaxImpl>, LogSoftmax);
+REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -272,29 +271,29 @@ private:
    float gamma = 0.0f;
 };

-REG_FACTORY_FOR(ImplFactory<MathImpl>, Abs);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Acos);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Acosh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Asin);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Asinh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Atan);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Atanh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Ceil);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Cos);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Cosh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Erf);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Floor);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, HardSigmoid);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Log);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Neg);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Reciprocal);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Selu);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sign);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sin);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sinh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Softplus);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Softsign);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Tan);
+REG_FACTORY_FOR(MathImpl, Abs);
+REG_FACTORY_FOR(MathImpl, Acos);
+REG_FACTORY_FOR(MathImpl, Acosh);
+REG_FACTORY_FOR(MathImpl, Asin);
+REG_FACTORY_FOR(MathImpl, Asinh);
+REG_FACTORY_FOR(MathImpl, Atan);
+REG_FACTORY_FOR(MathImpl, Atanh);
+REG_FACTORY_FOR(MathImpl, Ceil);
+REG_FACTORY_FOR(MathImpl, Cos);
+REG_FACTORY_FOR(MathImpl, Cosh);
+REG_FACTORY_FOR(MathImpl, Erf);
+REG_FACTORY_FOR(MathImpl, Floor);
+REG_FACTORY_FOR(MathImpl, HardSigmoid);
+REG_FACTORY_FOR(MathImpl, Log);
+REG_FACTORY_FOR(MathImpl, Neg);
+REG_FACTORY_FOR(MathImpl, Reciprocal);
+REG_FACTORY_FOR(MathImpl, Selu);
+REG_FACTORY_FOR(MathImpl, Sign);
+REG_FACTORY_FOR(MathImpl, Sin);
+REG_FACTORY_FOR(MathImpl, Sinh);
+REG_FACTORY_FOR(MathImpl, Softplus);
+REG_FACTORY_FOR(MathImpl, Softsign);
+REG_FACTORY_FOR(MathImpl, Tan);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
@ -108,7 +108,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {

 MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::primitive_desc &primitive_desc) const {
    memory::primitive_desc aprimitive_desc;
-    mkldnn_primitive_desc_t bndesc;
+    mkldnn_primitive_desc_t bndesc = nullptr;
    mkldnn_batch_normalization_desc_t *p;
    error::wrap_c_api(mkldnn_primitive_desc_query(
            primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
@ -128,7 +128,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::pri

 MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primitive_desc &primitive_desc) const {
    memory::primitive_desc aprimitive_desc;
-    mkldnn_primitive_desc_t bndesc;
+    mkldnn_primitive_desc_t bndesc = nullptr;
    mkldnn_batch_normalization_desc_t *p;
    error::wrap_c_api(mkldnn_primitive_desc_query(
            primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
@ -148,7 +148,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primiti

 MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const memory::primitive_desc &primitive_desc) const {
    memory::primitive_desc adesc;
-    mkldnn_primitive_desc_t bndesc;
+    mkldnn_primitive_desc_t bndesc = nullptr;
    const_mkldnn_primitive_desc_t const_bndesc =
            mkldnn_primitive_desc_query_pd(primitive_desc.get(),
                                           mkldnn::convert_to_c(weights_pd), 0);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@ -17,6 +17,16 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <ie_layers_internal.hpp>
+
+// WA for xbyak.h
+#ifdef _WIN32
+# ifndef _WINSOCKAPI_
+#  define _WINSOCKAPI_
+# endif
+# ifndef _WINSOCK2API_
+#  define _WINSOCK2API_
+# endif
+#endif
 #include "cpu_isa_traits.hpp"

 using namespace mkldnn;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@ -279,8 +279,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc);
        createDescriptor({in_candidate}, {out_candidate});
    } else {
-        inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
-        outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
+        inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
+        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
+        outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
+        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
        eltwisePrecision = Precision::FP32;
        for (int i = 0; i < fusedWith.size(); i++) {
            auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@ -37,7 +37,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
        precision = InferenceEngine::Precision::FP32;
    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);

-    if (getParentEdges().size() != 1)
+    if (getParentEdges().empty() || getParentEdges().size() > 3)
        THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
    if (getChildEdges().empty())
        THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
@ -45,7 +45,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
    auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get());
    if (deconvLayer == nullptr)
        THROW_IE_EXCEPTION << "Cannot convert deconvolution layer.";
-    if (deconvLayer->_weights == nullptr) {
+    if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) {
        THROW_IE_EXCEPTION << "Weights are empty for layer: " << deconvLayer->name
                           << " used in MKLDNN node: " << getName() << "\n"
                           << "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
@ -54,11 +54,22 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
    withGroups = (deconvLayer->_group > 1);
    isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth &&
            deconvLayer->_group == deconvLayer->input()->getDims()[1];
-    withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0);
+
+    bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3;
    if (withBiases) {
-        biases = deconvLayer->_biases;
+        Blob::Ptr biases;
+
+        if (getParentEdges().size() == 3) {
+            auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer();
+            if (biasLayer->type != "Const")
+                THROW_IE_EXCEPTION << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases";
+            biases = biasLayer->blobs["custom"];
+        } else {
+            biases = deconvLayer->_biases;
+        }
+
        //  WA: we add bias as depthwise post op
-        setBiasAsPostOp();
+        setBiasAsPostOp(biases);
    }

    /* Original layout format for deconv weights is iohw (from Caffe).
@ -83,7 +94,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
        weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]);
    }

-    internalBlobs.push_back(createInternalBlob(weightDims, true));
+    if (getParentEdges().size() == 1)
+        internalBlobs.push_back(createInternalBlob(weightDims, true));

    invertVectorCopyUtoI(deconvLayer->_stride, stride);
    for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
@ -113,7 +125,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
    }
 }

-void MKLDNNDeconvolutionNode::setBiasAsPostOp() {
+void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) {
    mkldnn::post_ops ops;
    MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biases->size(), 16))});

@ -157,7 +169,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() {

    prim.reset(new convolution_backward_data(prim_desc,
            getParentEdgeAt(0)->getMemory().GetPrimitive(),
-            internalBlobMemory[0]->GetPrimitive(),
+            getWeights(),
            getChildEdgeAt(0)->getMemory().GetPrimitive()));
 }

@ -197,15 +209,32 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
 }

 MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
+    InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(idx - 1).desc())
+                                               : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
+
+    if (desc.getLayout() == InferenceEngine::Layout::ANY) {
        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
+    } else {
+        if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
+                                                                                       desc.getBlockingDesc().getOrder().end()) + 1) {
+            auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
+            auto new_dims = weightsDims.ToSizeVector();
+
+            auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
+                                                  new_dims,
+                                                  desc.getBlockingDesc());
+            if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
+                td.setLayout(BLOCKED);
+            }
+            return MKLDNNMemoryDesc(td);
+        } else {
+            return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
+                                                                getParentEdgeAt(idx)->getDims().ToSizeVector(),
+                                                                desc.getBlockingDesc()));
+        }
+    }
 }

 MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
@ -219,4 +248,9 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_i
                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
                                                            desc.getBlockingDesc()));
 }
+
+const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const {
+    return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@ -27,11 +27,14 @@ public:
        return false;
    }

+    size_t descInputNumbers(MKLDNNDescriptor desc) override {
+        return static_cast<size_t>(getParentEdges().size());
+    }
+
    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
    MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;

 private:
-    bool withBiases = false;
    bool withGroups = false;
    bool isDW = false;
    size_t groupNum = 1;
@ -40,13 +43,14 @@ private:
    std::vector<ptrdiff_t> dilation;
    std::vector<ptrdiff_t> paddingR;
    MKLDNNDims weightsDims;
-    InferenceEngine::Blob::Ptr biases;
    std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
    std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;

    mkldnn::primitive_attr attr;
    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
-    void setBiasAsPostOp();
+    void setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases);
+
+    const mkldnn::memory& getWeights() const;
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@ -715,7 +715,9 @@ void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() {
    setPostOps(attr, true);

    Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+    inputPrecision = inputPrecision == Precision::BF16 ? Precision(Precision::FP32) : inputPrecision;
    Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+    outputPrecision = outputPrecision == Precision::BF16 ? Precision(Precision::FP32) : outputPrecision;

    if (!fusedWith.empty()) {
        auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@ -119,22 +119,38 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe
        // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats.
        // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw)
        // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout.
-        if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
+        if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
+            MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
            try {
                mkldnn::memory::dims newDims = dst_blocked->GetDims();
-                mkldnn::memory::format newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
-                                                   src_blocked->GetDims().size() == 5 ? memory::goidhw :
-                                                   src_blocked->GetFormat();
+                mkldnn::memory::format newFormat;
+                newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
+                            src_blocked->GetDims().size() == 5 ? memory::goidhw :
+                            src_blocked->GetFormat();

                auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat);
                src_blocked->Create(newDesc, srcPtr, false);

                createReorder();
-            } catch (const std::exception&) {
+            } catch (...) {
                THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
            }
+        // MKLDNN doesn't support direct reorders between planar data formats in case they have different rank but the same number of elements.
+        // Code block below detects these cases and substitute src dims with dst ones.
+        } else if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
+                   MKLDNNMemory::GetPlainFormat(dst_blocked->GetDims()) == dst_blocked->GetFormat() &&
+                   src_blocked->GetElementsCount() == dst_blocked->GetElementsCount()) {
+            try {
+                auto newDesc = mkldnn::memory::desc(dst_blocked->GetDims(), src_blocked->GetDataType(), dst_blocked->GetFormat());
+                src_blocked->Create(newDesc, srcPtr, false);
+
+                createReorder();
+            } catch (...) {
+                THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
+            }
+        } else {
+            THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
        }
-        // TODO: should't we throw exception in this case?
    }
 }

--- a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -242,7 +241,7 @@ private:
    bool sort_result_descending = true;
 };

-REG_FACTORY_FOR(ImplFactory<NonMaxSuppressionImpl>, NonMaxSuppression);
+REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include "ie_parallel.hpp"

@ -103,7 +102,7 @@ private:
    Precision input_precision;
 };

-REG_FACTORY_FOR(ImplFactory<OneHotImpl>, OneHot);
+REG_FACTORY_FOR(OneHotImpl, OneHot);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/pad.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/pad.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -251,7 +250,7 @@ void PadImpl::pad_symmetric(const float *src_data, float* dst_data) {
    });
 }

-REG_FACTORY_FOR(ImplFactory<PadImpl>, Pad);
+REG_FACTORY_FOR(PadImpl, Pad);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -57,7 +56,7 @@ private:
    std::vector<int> shift_;
 };

-REG_FACTORY_FOR(ImplFactory<PowerFileImpl>, PowerFile);
+REG_FACTORY_FOR(PowerFileImpl, PowerFile);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <vector>
@ -338,7 +337,7 @@ private:
    int _num_priors = 0;
 };

-REG_FACTORY_FOR(ImplFactory<PriorBoxImpl>, PriorBox);
+REG_FACTORY_FOR(PriorBoxImpl, PriorBox);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <algorithm>
 #include <vector>
@ -117,7 +116,7 @@ private:
    float offset_;
 };

-REG_FACTORY_FOR(ImplFactory<PriorBoxClusteredImpl>, PriorBoxClustered);
+REG_FACTORY_FOR(PriorBoxClusteredImpl, PriorBoxClustered);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <algorithm>
 #include <cassert>
@ -90,7 +89,7 @@ private:
 };


-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronPriorGridGeneratorImpl>, ExperimentalDetectronPriorGridGenerator);
+REG_FACTORY_FOR(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include "proposal_imp.hpp"
@ -179,7 +178,7 @@ private:
    bool store_prob;  // store blob with proposal probabilities
 };

-REG_FACTORY_FOR(ImplFactory<ProposalImpl>, Proposal);
+REG_FACTORY_FOR(ProposalImpl, Proposal);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp
@ -4,6 +4,7 @@

 #include "proposal_imp.hpp"

+#include <cstring>
 #include <cmath>
 #include <string>
 #include <vector>
@ -137,7 +138,7 @@ static void nms_cpu(const int num_boxes, int is_dead[],
    const float* x1 = boxes + 2 * num_proposals;
    const float* y1 = boxes + 3 * num_proposals;

-    memset(is_dead, 0, num_boxes * sizeof(int));
+    std::memset(is_dead, 0, num_boxes * sizeof(int));

 #if defined(HAVE_AVX2)
    __m256  vc_fone = _mm256_set1_ps(coordinates_offset);
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
@ -2,9 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

+#include <cstring>
 #include <cassert>
 #include <cmath>
 #include <string>
@ -130,7 +130,7 @@ void nms_cpu(const int num_boxes, int is_dead[],
    const float* x1 = boxes + 2 * num_proposals;
    const float* y1 = boxes + 3 * num_proposals;

-    memset(is_dead, 0, num_boxes * sizeof(int));
+    std::memset(is_dead, 0, num_boxes * sizeof(int));

 #if defined(HAVE_AVX2)
    __m256  vc_fone = _mm256_set1_ps(coordinates_offset);
@ -410,7 +410,7 @@ private:
    std::vector<int> roi_indices_;
 };

-REG_FACTORY_FOR(ImplFactory<ONNXCustomProposalImpl>, ExperimentalDetectronGenerateProposalsSingleImage);
+REG_FACTORY_FOR(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <cmath>
 #include <vector>
@ -303,7 +302,7 @@ private:
    float trans_std_;
 };

-REG_FACTORY_FOR(ImplFactory<PSROIPoolingImpl>, PSROIPooling);
+REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/range.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/range.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -125,7 +124,7 @@ StatusCode RangeImpl::range(data_t start, data_t limit, data_t delta, Blob::Ptr
    });
    return OK;
 }
-REG_FACTORY_FOR(ImplFactory<RangeImpl>, Range);
+REG_FACTORY_FOR(RangeImpl, Range);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -388,18 +387,18 @@ void ReduceImpl::reduce(
    }
 }

-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceAnd);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL1);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL2);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSum);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSumExp);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMax);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMean);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMin);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceOr);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceProd);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSum);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSumSquare);
+REG_FACTORY_FOR(ReduceImpl, ReduceAnd);
+REG_FACTORY_FOR(ReduceImpl, ReduceL1);
+REG_FACTORY_FOR(ReduceImpl, ReduceL2);
+REG_FACTORY_FOR(ReduceImpl, ReduceLogSum);
+REG_FACTORY_FOR(ReduceImpl, ReduceLogSumExp);
+REG_FACTORY_FOR(ReduceImpl, ReduceMax);
+REG_FACTORY_FOR(ReduceImpl, ReduceMean);
+REG_FACTORY_FOR(ReduceImpl, ReduceMin);
+REG_FACTORY_FOR(ReduceImpl, ReduceOr);
+REG_FACTORY_FOR(ReduceImpl, ReduceProd);
+REG_FACTORY_FOR(ReduceImpl, ReduceSum);
+REG_FACTORY_FOR(ReduceImpl, ReduceSumSquare);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include "common/defs.h"
 #include "common/softmax.h"
@ -304,7 +303,7 @@ private:
    }
 };

-REG_FACTORY_FOR(ImplFactory<RegionYoloImpl>, RegionYolo);
+REG_FACTORY_FOR(RegionYoloImpl, RegionYolo);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <vector>

@ -64,7 +63,7 @@ private:
    int stride;
 };

-REG_FACTORY_FOR(ImplFactory<ReorgYoloImpl>, ReorgYolo);
+REG_FACTORY_FOR(ReorgYoloImpl, ReorgYolo);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -172,7 +171,7 @@ private:
    size_t work_amount_dst;
 };

-REG_FACTORY_FOR(ImplFactory<ReverseSequenceImpl>, ReverseSequence);
+REG_FACTORY_FOR(ReverseSequenceImpl, ReverseSequence);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
@ -7,7 +7,6 @@
 // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <cassert>
 #include <cmath>
@ -406,7 +405,7 @@ private:
    int nw = 0;
 };

-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronROIFeatureExtractorImpl>, ExperimentalDetectronROIFeatureExtractor);
+REG_FACTORY_FOR(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -182,7 +181,7 @@ private:
    const size_t SCATTER_UPDATES = 2;
 };

-REG_FACTORY_FOR(ImplFactory<ScatterImpl>, ScatterUpdate);
+REG_FACTORY_FOR(ScatterImpl, ScatterUpdate);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/select.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/select.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <string>
@ -221,7 +220,7 @@ private:
    }
 };

-REG_FACTORY_FOR(ImplFactory<SelectImpl>, Select);
+REG_FACTORY_FOR(SelectImpl, Select);
 }  // namespace Cpu
 }  // namespace Extensions
 }  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -142,7 +141,7 @@ private:
    size_t ownStrides[CNTR_SIZE];
 };

-REG_FACTORY_FOR(ImplFactory<ShuffleChannelsImpl>, ShuffleChannels);
+REG_FACTORY_FOR(ShuffleChannelsImpl, ShuffleChannels);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -332,7 +331,7 @@ private:
    std::vector<simpler_nms_anchor> anchors_;
 };

-REG_FACTORY_FOR(ImplFactory<SimplerNMSImpl>, SimplerNMS);
+REG_FACTORY_FOR(SimplerNMSImpl, SimplerNMS);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
@ -4,7 +4,6 @@

 #include "base.hpp"
 #include "ie_parallel.hpp"
-#include "list.hpp"

 #include <cmath>
 #include <string>
@ -222,7 +221,7 @@ private:
    std::vector<size_t> _pads_end;
 };

-REG_FACTORY_FOR(ImplFactory<SpaceToBatchImpl>, SpaceToBatch);
+REG_FACTORY_FOR(SpaceToBatchImpl, SpaceToBatch);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -119,7 +118,7 @@ private:
    size_t ownStrides[CNTR_SIZE];
 };

-REG_FACTORY_FOR(ImplFactory<SpaceToDepthImpl>, SpaceToDepth);
+REG_FACTORY_FOR(SpaceToDepthImpl, SpaceToDepth);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -225,7 +224,7 @@ private:
    size_t outMaxNumValues = 0;
 };

-REG_FACTORY_FOR(ImplFactory<SparseFillEmptyRowsImpl>, SparseFillEmptyRows);
+REG_FACTORY_FOR(SparseFillEmptyRowsImpl, SparseFillEmptyRows);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -202,9 +201,9 @@ private:
    ReducedOp reduction_op;
 };

-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentMean);
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSqrtN);
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSum);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentMean);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSum);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -152,7 +151,7 @@ private:
    bool with_default_value = false;
 };

-REG_FACTORY_FOR(ImplFactory<SparseToDenseImpl>, SparseToDense);
+REG_FACTORY_FOR(SparseToDenseImpl, SparseToDense);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -226,7 +225,7 @@ private:
    Precision input_default_value_precision;
 };

-REG_FACTORY_FOR(ImplFactory<ExperimentalSparseWeightedReduceImpl>, ExperimentalSparseWeightedSum);
+REG_FACTORY_FOR(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -57,7 +56,7 @@ public:
    }
 };

-REG_FACTORY_FOR(ImplFactory<SqueezeImpl>, Squeeze);
+REG_FACTORY_FOR(SqueezeImpl, Squeeze);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -376,7 +375,7 @@ void StridedSliceImpl::strided_slice_p(const float *src_data, float* dst_data) {
    });
 }

-REG_FACTORY_FOR(ImplFactory<StridedSliceImpl>, StridedSlice);
+REG_FACTORY_FOR(StridedSliceImpl, StridedSlice);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -578,7 +577,7 @@ private:
    }
 };

-REG_FACTORY_FOR(ImplFactory<TopKImpl>, TopK);
+REG_FACTORY_FOR(TopKImpl, TopK);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"
 #include <algorithm>
 #include <cassert>
@ -71,7 +70,7 @@ private:
    int max_rois_num_;
 };

-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronTopKROIsImpl>, ExperimentalDetectronTopKROIs);
+REG_FACTORY_FOR(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -199,7 +198,7 @@ private:
    size_t num_elements = 0;
 };

-REG_FACTORY_FOR(ImplFactory<UniqueImpl>, Unique);
+REG_FACTORY_FOR(UniqueImpl, Unique);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "list.hpp"
 #include "base.hpp"

 #include <cmath>
@ -52,7 +51,7 @@ public:
    }
 };

-REG_FACTORY_FOR(ImplFactory<UnsqueezeImpl>, Unsqueeze);
+REG_FACTORY_FOR(UnsqueezeImpl, Unsqueeze);

 }  // namespace Cpu
 }  // namespace Extensions
--- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp
@ -38,9 +38,9 @@ class INFERENCE_ENGINE_API_CLASS(ConvFusion);
 class ngraph::pass::ConvFusion: public ngraph::pass::GraphRewrite {
 public:
    ConvFusion() : GraphRewrite() {
-        fuse_convolution_with<op::ConvolutionIE,   op::v1::Multiply>();
-        fuse_convolution_with<op::ConvolutionIE,   op::v1::Add>();
-        fuse_convolution_with<op::DeconvolutionIE, op::v1::Add>();
+        fuse_convolution_with<op::ConvolutionIE,   opset1::Multiply>();
+        fuse_convolution_with<op::ConvolutionIE,   opset1::Add>();
+        fuse_convolution_with<op::DeconvolutionIE, opset1::Add>();
    }

 private:
@ -53,8 +53,8 @@ private:

 template <class Conv, class Eltwise>
 void ngraph::pass::ConvFusion::fuse_convolution_with() {
-    static_assert(std::is_same<Eltwise, ngraph::op::v1::Multiply>() || std::is_same<Eltwise, ngraph::op::v1::Add>(),
-                  "This transformation works only with ngraph::op::v1::Add and ngraph::op::v1::Multiply");
+    static_assert(std::is_same<Eltwise, ngraph::opset1::Multiply>() || std::is_same<Eltwise, ngraph::opset1::Add>(),
+                  "This transformation works only with ngraph::opset1::Add and ngraph::opset1::Multiply");

    static_assert(std::is_same<Conv, ngraph::op::ConvolutionIE>() || std::is_same<Conv, ngraph::op::DeconvolutionIE>(),
                  "This transformation works only with ngraph::op::ConvolutionIE and ngraph::op::DeconvolutionIE");
@ -85,56 +85,63 @@ ngraph::graph_rewrite_callback ngraph::pass::ConvFusion::get_callback() {
        }

        // TODO: check that constant can be scalar and do not match [1, C, 1, 1] layout
-        auto constant_shape = m_const->get_shape();
-        auto output_shape = m_conv->get_shape();
-        size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
-        if (constant_size != output_shape[1]) {
+        const auto constant_shape = m_const->get_shape();
+        const auto output_pshape = m_conv->get_output_partial_shape(0);
+
+        if (output_pshape.rank().is_dynamic() || output_pshape[1].is_dynamic()) {
            return false;
        }

-        std::shared_ptr<ngraph::Node> constant(m_const);
+        const auto channel_dim = output_pshape[1].get_length();
+
+        size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
+        if (constant_size != channel_dim) {
+            return false;
+        }
+
+        Output<Node> constant(m_const);

        if (constant_shape.size() > 1) {
-            constant = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {output_shape[1]}), true);
+            constant = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {channel_dim}), true);
        }

        if (m_conv->output(0).get_target_inputs().size() != 1) {
            return false;
        }

-        std::shared_ptr<Node> new_conv, new_weights, new_bias;
-        if (std::dynamic_pointer_cast<op::v1::Add>(eltwise)) {
+        Output<Node> new_conv, new_weights, new_bias;
+        if (std::dynamic_pointer_cast<opset1::Add>(eltwise)) {
            // Fuse: ConvolutionIE/DeconvolutionIE->Add
            if (m_conv->inputs().size() == 2) {
                new_bias = constant;
            } else {
-                new_bias = std::make_shared<op::v1::Add>(constant, m_conv->input_value(2));
+                new_bias = std::make_shared<opset1::Add>(constant, m_conv->input_value(2));
            }
            new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), m_conv->input_value(1), new_bias});
-        } else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<op::v1::Multiply>(eltwise)) {
+        } else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<opset1::Multiply>(eltwise)) {
            // Fuse: ConvolutionIE->Mul
            auto weights_shape = m_conv->input(1).get_shape();

            Shape const_shape(weights_shape.size(), 1);
            const_shape[0] = weights_shape[0];

-            auto const_reshape = std::make_shared<op::v1::Reshape>(constant,
+            auto const_reshape = std::make_shared<opset1::Reshape>(constant,
                                                                   op::Constant::create(element::i64, Shape{const_shape.size()}, const_shape), true);
-            new_weights = std::make_shared<op::v1::Multiply> (m_conv->input_value(1), const_reshape);
+            new_weights = std::make_shared<opset1::Multiply> (m_conv->input_value(1), const_reshape);
            if (m_conv->inputs().size() == 2) {
                new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights});
            } else {
-                auto bias_reshape = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
-                new_bias = std::make_shared<op::v1::Multiply>(bias_reshape, constant);
+                auto bias_reshape = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
+                new_bias = std::make_shared<opset1::Multiply>(bias_reshape, constant);
                new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights, new_bias});
            }
        } else {
            return false;
        }

-        ngraph::copy_runtime_info({m_conv, eltwise}, new_conv);
-        new_conv->set_friendly_name(m.get_match_root()->get_friendly_name());
-        ngraph::replace_node(m.get_match_root(), new_conv);
+        ngraph::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr());
+        new_conv.get_node_shared_ptr()->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ngraph::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr());
        return true;
    };
    return callback;
--- a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp
@ -17,5 +17,6 @@
 NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass)
 NGRAPH_PASS(ConvertNMS3, ::ngraph::pass)
 NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass)
+NGRAPH_PASS(ConvertShuffleChannels3, ::ngraph::pass)
 NGRAPH_PASS(ConvertTopK3, ::ngraph::pass)

--- a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp
@ -0,0 +1,30 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertShuffleChannels3);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite {
+public:
+    ConvertShuffleChannels3() : GraphRewrite() {
+        convert_shuffle_channels3();
+    }
+
+private:
+    void convert_shuffle_channels3();
+};
--- a/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp
+++ b/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp
@ -54,43 +54,11 @@ op::ConvolutionIE::ConvolutionIE(const Output<Node>& data_batch,
 }

 void op::ConvolutionIE::validate_and_infer_types() {
-    const PartialShape& data_batch_pshape = get_input_partial_shape(0);
+    PartialShape data_batch_shape = get_input_partial_shape(0);
    element::Type data_batch_et = get_input_element_type(0);
-    const PartialShape& filters_pshape = get_input_partial_shape(1);
+    PartialShape filters_shape = get_input_partial_shape(1);
    element::Type filters_et = get_input_element_type(1);

-    PartialShape result_shape{PartialShape::dynamic()};
-
-    // we need to adjust filters_shape to reuse helpers for normal convolution
-    if (filters_pshape.is_static() && data_batch_pshape.is_static()) {
-        auto filters_shape = filters_pshape.to_shape();
-        auto groups = m_group;
-        auto data_batch_shape = data_batch_pshape.to_shape();
-        data_batch_shape[1] /= groups;
-
-        if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
-            m_pads_begin.clear();
-            m_pads_end.clear();
-            infer_auto_padding(
-                data_batch_shape,
-                Shape(filters_shape.begin() + 2, filters_shape.end()), // Remove {O,I}
-                m_strides,
-                m_dilations,
-                m_auto_pad,
-                m_pads_end,
-                m_pads_begin);
-        }
-
-        result_shape =
-            infer_convolution_forward(this,
-                                      data_batch_shape,
-                                      Strides(m_strides.size(), 1), // dummy data dilations
-                                      m_pads_begin,
-                                      m_pads_end,
-                                      filters_shape,
-                                      m_strides,
-                                      m_dilations);
-    }
    element::Type result_et;

    NODE_VALIDATION_CHECK(
@ -102,6 +70,45 @@ void op::ConvolutionIE::validate_and_infer_types() {
        filters_et,
        ").");

+    PartialShape result_shape{PartialShape::dynamic()};
+
+    // In case if number of groups greater than 1 and channel dimension is dynamic we can't calculate output shape
+    if (m_group > 1) {
+        if (data_batch_shape.rank().is_dynamic() || data_batch_shape[1].is_dynamic()) {
+            set_output_type(0, result_et, result_shape);
+            return;
+        } else {
+            // Update channel dimension according to groups count
+            data_batch_shape[1] = data_batch_shape[1].get_length() / m_group;
+        }
+    }
+
+    // we need to adjust filters_shape to reuse helpers for normal convolution
+    if (filters_shape.is_static() && data_batch_shape.is_static()) {
+        if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
+            m_pads_begin.clear();
+            m_pads_end.clear();
+            auto filter_shape = filters_shape.to_shape();
+            filter_shape.erase(filter_shape.begin(), filter_shape.begin() + 2); // Remove {O,I}
+            infer_auto_padding(data_batch_shape.to_shape(),
+                               filter_shape,
+                               m_strides,
+                               m_dilations,
+                               m_auto_pad,
+                               m_pads_end,
+                               m_pads_begin);
+        }
+    }
+
+    result_shape = infer_convolution_forward(this,
+                                             data_batch_shape,
+                                             Strides(m_strides.size(), 1), // dummy data dilations
+                                             m_pads_begin,
+                                             m_pads_end,
+                                             filters_shape,
+                                             m_strides,
+                                             m_dilations);
+
    set_output_type(0, result_et, result_shape);
 }

--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp
@ -47,10 +47,16 @@ void ngraph::pass::ConvertInterpolateToInterpOrResample::convert_interpolate_to_
            interpolate_attrs.pads_end =  std::vector<size_t>{0};

        std::vector<size_t> useless_axes;
-        for (const auto & axis : interpolate_axes)
-            if (input_shape[axis] == out_spatial_shape[axis] && axis < 2)
-                // keeping only those not spatial dimensions that are going to be changed
-                useless_axes.push_back(axis);
+        size_t axis_idx = 0;
+        for (auto axis = 0; axis < input_shape.size(); ++axis) {
+            if (interpolate_axes.count(axis)) {
+                if (input_shape[axis] == out_spatial_shape[axis_idx] && axis < 2)
+                    // keeping only those not spatial dimensions that are going to be changed
+                    useless_axes.push_back(axis);
+                ++axis_idx;
+            }
+        }
+
        std::reverse(useless_axes.begin(), useless_axes.end());
        for (const auto & axis : useless_axes) {
            interpolate_axes.erase(axis);
--- a/Show More
+++ b/Show More