[CPU] Plugin migration on ngraph (#4344)

2021-05-06 19:49:24 +03:00 · 2021-05-06 19:49:24 +03:00 · a19413c0c0
commit a19413c0c0
parent 2bb8e9facc
411 changed files with 14378 additions and 51599 deletions
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@ -54,21 +54,16 @@ if(SELECTIVE_BUILD STREQUAL "ON")
    endif()
 endif()

-target_link_libraries(${TARGET_NAME} PRIVATE mkldnn inference_engine inference_engine_legacy
-                                             inference_engine_transformations inference_engine_lp_transformations)
+target_link_libraries(${TARGET_NAME} PRIVATE mkldnn
+                                             inference_engine
+                                             inference_engine_transformations
+                                             inference_engine_lp_transformations)

 target_include_directories(${TARGET_NAME} PRIVATE
        $<TARGET_PROPERTY:mkldnn,INCLUDE_DIRECTORIES>)

 # Cross compiled function
 # TODO: The same for proposal, proposalONNX, topk
-cross_compiled_file(${TARGET_NAME}
-        ARCH AVX512F AVX2 SSE42 ANY
-                    nodes/argmax_imp.cpp
-        API         nodes/argmax_imp.hpp
-        NAME        arg_max_execute
-        NAMESPACE   InferenceEngine::Extensions::Cpu::XARCH
-)
 cross_compiled_file(${TARGET_NAME}
        ARCH AVX2 ANY
                    nodes/proposal_imp.cpp
@ -85,7 +80,6 @@ add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS})
 target_link_libraries(${TARGET_NAME}_obj PUBLIC mkldnn)

 target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_preproc_s,INTERFACE_INCLUDE_DIRECTORIES>
-                                                      $<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>
                                                      $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
                                                      $<TARGET_PROPERTY:openvino::itt,INTERFACE_INCLUDE_DIRECTORIES>
                                                      $<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>
--- a/inference-engine/src/mkldnn_plugin/cpu_types.h
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.h
@ -0,0 +1,124 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+namespace MKLDNNPlugin {
+
+enum Algorithm {
+    Undefined,
+
+    // Pooling algorithms
+    PoolingMax,
+    PoolingAvg,
+
+    // Convolution algorithms
+    ConvolutionCommon,
+    ConvolutionGrouped,
+
+    // Convolution algorithms
+    DeconvolutionCommon,
+    DeconvolutionGrouped,
+
+    // Elementwise algorithms
+    EltwiseAdd,
+    EltwiseMultiply,
+    EltwiseSubtract,
+    EltwiseDivide,
+    EltwiseFloorMod,
+    EltwiseMod,
+    EltwiseMaximum,
+    EltwiseMinimum,
+    EltwiseSquaredDifference,
+    EltwisePowerDynamic,
+    EltwisePowerStatic,
+    EltwiseMulAdd,
+    EltwiseEqual,
+    EltwiseNotEqual,
+    EltwiseGreater,
+    EltwiseGreaterEqual,
+    EltwiseLess,
+    EltwiseLessEqual,
+    EltwiseLogicalAnd,
+    EltwiseLogicalOr,
+    EltwiseLogicalXor,
+    EltwiseLogicalNot,
+    EltwiseRelu,
+    EltwiseGelu,
+    EltwiseElu,
+    EltwiseTanh,
+    EltwiseSigmoid,
+    EltwiseAbs,
+    EltwiseSqrt,
+    EltwiseSoftRelu,
+    EltwiseExp,
+    EltwiseClamp,
+    EltwiseSwish,
+    EltwisePrelu,
+    EltwiseMish,
+    EltwiseHswish,
+    EltwiseHsigmoid,
+    EltwiseRoundHalfToEven,
+    EltwiseRoundHalfAwayFromZero,
+    EltwiseErf,
+
+    // FakeQuantize algorithms
+    FQCommon,
+    FQQuantization,
+    FQBinarization,
+
+    // ROIPooling algorithms
+    ROIPoolingMax,
+    ROIPoolingBilinear,
+
+    // ROIAlign algorithms
+    ROIAlignMax,
+    ROIAlignAvg,
+
+    // PSROIPooling algorithms
+    PSROIPoolingAverage,
+    PSROIPoolingBilinear,
+    PSROIPoolingBilinearDeformable,
+
+    // Reduce algorithms
+    ReduceL1,
+    ReduceL2,
+    ReduceAnd,
+    ReduceOr,
+    ReduceMax,
+    ReduceMean,
+    ReduceMin,
+    ReduceProd,
+    ReduceSum,
+    ReduceLogSum,
+    ReduceLogSumExp,
+    ReduceSumSquare,
+
+    // Math algorithms
+    MathAbs,
+    MathAcos,
+    MathAcosh,
+    MathAsin,
+    MathAsinh,
+    MathAtan,
+    MathAtanh,
+    MathCeiling,
+    MathCos,
+    MathCosh,
+    MathErf,
+    MathFloor,
+    MathHardSigmoid,
+    MathLog,
+    MathNegative,
+    MathReciprocal,
+    MathSelu,
+    MathSign,
+    MathSin,
+    MathSinh,
+    MathSoftPlus,
+    MathSoftsign,
+    MathTan
+};
+
+} // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp
+++ b/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp
@ -4,9 +4,8 @@

 #include "jit_eltwise_emitters.hpp"
 #include <cpu/x64/jit_uni_eltwise.hpp>
-#include "legacy/ie_layers.h"
-
 #include <ngraph/opsets/opset1.hpp>
+#include <nodes/mkldnn_eltwise_node.h>

 using namespace InferenceEngine;
 using namespace mkldnn::impl::utils;
@ -1305,15 +1304,16 @@ jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_

    prepare_table();
 }
+
 jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {
-    auto *powerLayer = dynamic_cast<InferenceEngine::PowerLayer *>(node->getCnnLayer().get());
-    if (powerLayer == nullptr)
-        IE_THROW() << "Cannot convert power layer.";
-
-    power = powerLayer->power;
-    scale = powerLayer->scale;
-    shift = powerLayer->offset;
+    const MKLDNNEltwiseNode *powerNode = dynamic_cast<const MKLDNNEltwiseNode *>(node);
+    if (powerNode == nullptr) {
+        IE_THROW() << "Can't cast to MKLDNNEltwiseNode";
+    }
+    power = powerNode->getAlpha();
+    scale = powerNode->getBeta();
+    shift = powerNode->getGamma();

    prepare_table();
 }
--- a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp
+++ b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp
@ -4,7 +4,6 @@

 #include "jit_emitter.hpp"
 #include "jit_load_store_emitters.hpp"
-#include "legacy/ie_layers.h"
 #include <cpu/x64/jit_generator.hpp>
 #include "utils/bfloat16.hpp"

--- a/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp
+++ b/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp
@ -25,7 +25,7 @@ jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa,
 jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, InferenceEngine::Precision exec_prc)
    : jit_emitter(host, host_isa, node, exec_prc) {
    auto eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode*>(node);
-    kind = static_cast<mkldnn_alg_kind_t>(eltwiseNode->getAlgorithm());
+    kind = static_cast<mkldnn_alg_kind_t>(eltwiseNode->getMKLDNNAlgorithm());
    alpha = eltwiseNode->getAlpha();
    beta = eltwiseNode->getBeta();

--- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
@ -23,18 +23,6 @@ size_t MKLDNNDescriptor::outputNumbers() const {
    return 1;
 }

-MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::batch_normalization_forward::desc> desc) {
-    this->desc.reset(new DescFwdImpl<mkldnn::batch_normalization_forward::desc>(desc));
-}
-
-MKLDNNDescriptor::operator std::shared_ptr<mkldnn::batch_normalization_forward::desc>() {
-    auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::batch_normalization_forward::desc>>(desc);
-    if (typeDesc == nullptr) {
-        IE_THROW() << "Cannot cast descriptor!";
-    }
-    return typeDesc->getPtr();
-}
-
 MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc) {
    this->desc.reset(new DescFwdImpl<mkldnn::convolution_forward::desc>(desc));
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
@ -10,9 +10,6 @@

 class MKLDNNDescriptor {
 public:
-    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::batch_normalization_forward::desc> desc);
-    operator std::shared_ptr<mkldnn::batch_normalization_forward::desc>();
-
    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc);
    operator std::shared_ptr<mkldnn::convolution_forward::desc>();

--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@ -6,6 +6,7 @@
 #include "mkldnn_node.h"
 #include "mkldnn_extension_utils.h"
 #include <blob_factory.hpp>
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 namespace MKLDNNPlugin {
@ -603,7 +604,7 @@ InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() {
    else
        desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getBlockingDesc());

-    return make_blob_with_precision(desc, memoryPtr->GetData());
+    return isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, memoryPtr->GetData());
 }

 void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@ -4,7 +4,6 @@

 #include <ie_metric_helpers.hpp>
 #include <precision_utils.h>
-#include <legacy/net_pass.h>
 #include "mkldnn_exec_network.h"

 #include "mkldnn_async_infer_request.h"
@ -12,8 +11,6 @@
 #include "mkldnn_memory_state.h"
 #include "mkldnn_itt.h"
 #include "nodes/mkldnn_memory_node.hpp"
-#include <legacy/ie_util_internal.hpp>
-#include <legacy/graph_tools.hpp>
 #include <threading/ie_executor_manager.hpp>

 #include <threading/ie_cpu_streams_executor.hpp>
@ -23,7 +20,8 @@
 #include <unordered_set>
 #include <utility>
 #include <cstring>
-#include <legacy/details/ie_cnn_network_tools.h>
+#include <ngraph/opsets/opset1.hpp>
+#include <transformations/utils/utils.hpp>

 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@ -43,189 +41,17 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
    extensionManager(extMgr),
    _cfg{cfg},
    _name{network.getName()},
-    _numaNodesWeights(numaNodesWeights) {
-    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
-
-    // we are cloning network if we have statistics and we can transform network.
-    _clonedNetwork = cloneNetwork(network);
-
-    bool isFloatModel = true;
-    if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
-        // Check if network is INT8 or Binary.
-        CNNNetworkIterator iter(network);
-        while (iter != CNNNetworkIterator()) {
-            if (CaselessEq<std::string>()((*iter)->type, "FakeQuantize")) {
-                isFloatModel = false;
-                break;
-            }
-            iter++;
-        }
-
-        auto changePrecisionBF16 = [&](Precision current, Precision target) {
-            InputsDataMap inputs = _clonedNetwork.getInputsInfo();
-            OutputsDataMap outputs = _clonedNetwork.getOutputsInfo();
-            CNNNetworkIterator iter(_clonedNetwork);
-            while (iter != CNNNetworkIterator()) {
-                //  check, if memory output node needs to be transformed
-                if (current == Precision::FP32 &&
-                    (*iter)->type == "Memory" && (*iter)->outData.size() == 0 &&
-                    (*iter)->insData[0].lock()->getPrecision() == current) {
-                    (*iter)->insData[0].lock()->setPrecision(target);
-                }
-
-                for (size_t o = 0; o < (*iter)->outData.size(); o++) {
-                    if (inputs.find((*iter)->outData[o]->getName()) == inputs.end()
-                        && outputs.find((*iter)->outData[o]->getName()) == outputs.end()
-                        && !CaselessEq<std::string>()((*iter)->type, "const")
-                        && (*iter)->outData[o]->getPrecision() == current) {
-                        (*iter)->outData[o]->setPrecision(target);
-                    }
-                }
-                iter++;
-            }
-        };
-
-        if (with_cpu_x86_avx512_core()) {
-            // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
-            // Otherwise, only layers marked as BF16 in '_clonedNetwork' will be performed in bfloat16 mode.
-            // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
-
-            // BF16 + INT8 or BF16 + BIN models will be performed in mixed precision execution only if
-            // enforceBF16 flag was set manually
-            if (isFloatModel == false) {
-                if (cfg.manualEnforceBF16 == true)
-                    changePrecisionBF16(Precision::FP32, Precision::BF16);
-            } else if (cfg.enforceBF16 == true) {
-                changePrecisionBF16(Precision::FP32, Precision::BF16);
-            }
-        } else {
-            changePrecisionBF16(Precision::BF16, Precision::FP32);
-        }
+    _numaNodesWeights(numaNodesWeights),
+        _network(network) {
+    auto function = network.getFunction();
+    if (function == nullptr) {
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
    }
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "createConstInputs");
-    auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector<size_t>& shape, const std::string& name) {
-        LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
-        auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
-        constLayer->blobs["custom"] = blob;
-
-        const TensorDesc& td = {blob->getTensorDesc().getPrecision(), shape, TensorDesc::getLayoutByDims(shape)};
-
-        DataPtr newEdgeAfterLayer(new Data(constLayer->name, td));
-        newEdgeAfterLayer->setName(constLayer->name);
-        getCreatorLayer(newEdgeAfterLayer) = constLayer;
-        getInputTo(newEdgeAfterLayer).clear();
-
-        IE_SUPPRESS_DEPRECATED_START
-        auto icnnnet = static_cast<ICNNNetwork::Ptr>(_clonedNetwork);
-        IE_SUPPRESS_DEPRECATED_END
-        auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
-        IE_ASSERT(implNetwork != nullptr);
-        implNetwork->addData(constLayer->name.c_str(), newEdgeAfterLayer);
-        implNetwork->addLayer(constLayer);
-
-        constLayer->outData.push_back(newEdgeAfterLayer);
-        getInputTo(newEdgeAfterLayer)[layer->name] = layer;
-        layer->insData.push_back(newEdgeAfterLayer);
-    };
-
-    // The code block below transforms legacy layers to the form more compatible with opset1 in order to simplify future migration
-    // TODO: remove after plug-in is migrated on opset1
-    auto all_layers = details::CNNNetSortTopologically(_clonedNetwork);
-    for (auto &layer : all_layers) {
-        if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
-            auto constDimsRank = layer->insData[0].lock()->getDims().size();
-
-            Blob::Ptr scalesBlob = layer->blobs["weights"];
-            if (scalesBlob != nullptr) {
-                std::vector<size_t> shape(constDimsRank, 1);
-                shape[shape.size() > 1 ? 1 : 0] = scalesBlob->size();
-
-                createConstInputTo(layer, scalesBlob, shape, "weights");
-            }
-
-            Blob::Ptr shiftBlob = layer->blobs["biases"];
-            if (shiftBlob != nullptr) {
-                std::vector<size_t> shape(constDimsRank, 1);
-                shape[shape.size() > 1 ? 1 : 0] = shiftBlob->size();
-
-                createConstInputTo(layer, shiftBlob, shape, "biases");
-            } else if (scalesBlob != nullptr) {
-                Blob::Ptr biases = make_shared_blob<float>(scalesBlob->getTensorDesc());
-                if (biases == nullptr)
-                    IE_THROW() << "Cannot make 'biases' shared blob";
-                biases->allocate();
-                auto biasesPtr = biases->buffer().as<float*>();
-                for (size_t i = 0; i < biases->size(); i++)
-                    biasesPtr[i] = 0;
-
-                std::vector<size_t> shape(constDimsRank, 1);
-                shape[shape.size() > 1 ? 1 : 0] = biases->size();
-
-                createConstInputTo(layer, biases, shape, "biases");
-            }
-        } else if (layer->type == "PReLU" && layer->insData.size() == 1) {
-            Blob::Ptr scalesBlob = layer->blobs["weights"];
-            if (scalesBlob != nullptr) {
-                std::vector<size_t> shape(layer->insData[0].lock()->getDims().size(), 1);
-                shape[shape.size() > 1 ? 1 : 0] = scalesBlob->size();
-
-                createConstInputTo(layer, scalesBlob, shape, "weights");
-            }
-        } else if (layer->type == "DeformableConvolution") {
-            auto * defConvLayer = dynamic_cast<DeformableConvolutionLayer*>(layer.get());
-            if (defConvLayer == nullptr)
-                IE_THROW() << "Cannot convert deformable convolution layer.";
-
-            Blob::Ptr weightsBlob = defConvLayer->blobs["weights"];
-            if (weightsBlob != nullptr) {
-                std::vector<size_t> shape;
-
-                if (defConvLayer->_group != 1) {
-                    shape.push_back(defConvLayer->_group);
-                }
-                shape.push_back(defConvLayer->_out_depth);
-                shape.push_back(defConvLayer->input()->getDims()[1]);
-                for (int i = 1; i <= defConvLayer->_kernel.size(); i++) {
-                    shape.push_back(defConvLayer->_kernel[defConvLayer->_kernel.size() - i]);
-                }
-
-                createConstInputTo(layer, weightsBlob, shape, "weights");
-
-                defConvLayer->blobs.clear();
-                defConvLayer->_weights = nullptr;
-            }
-        } else if (layer->type == "BinaryConvolution") {
-            auto * binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(layer.get());
-            if (binConvLayer == nullptr)
-                IE_THROW() << "Cannot convert binary convolution layer.";
-
-            Blob::Ptr weightsBlob = binConvLayer->blobs["weights"];
-            if (weightsBlob != nullptr) {
-                std::vector<size_t> shape;
-
-                if (binConvLayer->_group != 1) {
-                    shape.push_back(binConvLayer->_group);
-                }
-                shape.push_back(binConvLayer->_out_depth);
-                shape.push_back(binConvLayer->input()->getDims()[1]);
-                for (int i = 1; i <= binConvLayer->_kernel.size(); i++) {
-                    shape.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]);
-                }
-
-                createConstInputTo(layer, weightsBlob, shape, "weights");
-
-                binConvLayer->blobs.clear();
-                binConvLayer->_weights = nullptr;
-            }
-        }
-    }
-
-    OV_ITT_TASK_SKIP(taskChain);
+    bool isFloatModel = !ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(function);

    if (_cfg.batchLimit > 1) {
        // check topology for applicability
-        if (!CanProcessDynBatch(_clonedNetwork)) {
+        if (!CanProcessDynBatch(_network)) {
            IE_THROW() << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
        }
    }
@ -293,12 +119,11 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() {
        std::exception_ptr exception;
        auto makeGraph = [&] {
            try {
-                auto localNetwork = cloneNetwork(_clonedNetwork);
                {
                    std::lock_guard<std::mutex> lock{_cfgMutex};
                    graphLock._graph.setConfig(_cfg);
                }
-                graphLock._graph.CreateGraph(localNetwork, extensionManager, _numaNodesWeights[numaNodeId]);
+                graphLock._graph.CreateGraph(_network, extensionManager, _numaNodesWeights[numaNodeId]);
            } catch(...) {
                exception = std::current_exception();
            }
@ -386,53 +211,48 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
 bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const {
    InputsDataMap inputs = network.getInputsInfo();

-    CNNLayerSet inputLayers;
-    std::unordered_set<CNNLayer *> allLayers;
-
    if (inputs.empty())
        return false;

-    auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
-    if (secondLayers.empty())
-        return false;
+    auto function = network.getFunction();
+    if (function == nullptr) {
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
+    }

-    bool check_result = true;
-    details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
-        auto type = TypeFromName(layer->type);
-        // This is WA for Tile layer
-        auto tileLayer = dynamic_cast<TileLayer *>(layer.get());
-        if (tileLayer && tileLayer->axis)
-            return;
+    auto ops = function->get_ordered_ops();
+    for (auto op : ops) {
+        auto type = TypeFromName(op->get_type_name());
+        if (type == Tile) {
+            const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
+            const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(tile->get_input_node_shared_ptr(1));
+            if (!repeatsNode)
+                return false;
+            if (tile && repeatsNode->cast_vector<int64_t>()[0] == 1)
+                continue;
+        }

-        auto reshapeLayer = dynamic_cast<ReshapeLayer *>(layer.get());
-        if (reshapeLayer &&
-            type == Reshape &&
-            (reshapeLayer->outData[0]->getTensorDesc().getDims()[0] ==
-             reshapeLayer->insData[0].lock()->getTensorDesc().getDims()[0])) {
-            return;
+        if (type == Reshape) {
+            if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
+                continue;
        }

        if (type != Input &&
            type != Output &&
            type != Convolution &&
            type != Deconvolution &&
-            type != Activation &&
-            type != Depthwise &&
            type != Lrn &&
            type != Pooling &&
            type != FullyConnected &&
-            type != Gemm &&
-            type != SoftMax &&
+            type != MatMul &&
+            type != Softmax &&
            type != Split &&
            type != Concatenation &&
-            type != Eltwise &&
-            type != BatchNormalization &&
-            type != Copy) {
-            check_result = false;
+                type != Eltwise) {
+            return false;
        }
-    }, false);
+    }

-    return check_result;
+    return true;
 }

 IE_SUPPRESS_DEPRECATED_START
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
@ -14,7 +14,6 @@
 #include <memory>
 #include <map>
 #include <string>
-#include <legacy/cnn_network_impl.hpp>
 #include <unordered_map>

 namespace MKLDNNPlugin {
@ -49,7 +48,7 @@ protected:
    friend class MKLDNNInferRequest;
    MKLDNNExtensionManager::Ptr extensionManager;
    std::vector<InferenceEngine::IVariableStateInternal::Ptr> memoryStates;
-    InferenceEngine::CNNNetwork                 _clonedNetwork;
+    const InferenceEngine::CNNNetwork           _network;
    std::mutex                                  _cfgMutex;
    Config                                      _cfg;
    std::atomic_int                             _numRequests = {0};
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
@ -31,17 +31,14 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(co
    return nullptr;
 }

-std::shared_ptr<InferenceEngine::ILayerImplFactory> MKLDNNExtensionManager::CreateExtensionFactory(
-        const InferenceEngine::CNNLayerPtr &layer) {
-    if (!layer)
-        IE_THROW() << "Cannot get cnn layer!";
+std::shared_ptr<InferenceEngine::ILayerImplFactory> MKLDNNExtensionManager::CreateExtensionFactory(const std::shared_ptr<ngraph::Node>& op) {
    std::shared_ptr<ILayerImplFactory> factory;
    for (auto& ext : _extensions) {
        ResponseDesc responseDesc;
        StatusCode rc = GENERAL_ERROR;
        ILayerImplFactory* factory_ptr = nullptr;
        if (auto mkldnnExt = std::dynamic_pointer_cast<Extensions::Cpu::MKLDNNExtensions>(ext))
-            rc = mkldnnExt->getFactoryFor(factory_ptr, layer.get(), &responseDesc);
+            rc = mkldnnExt->getFactoryFor(factory_ptr, op, &responseDesc);
        if (rc != OK) {
            factory = nullptr;
            continue;
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
@ -8,7 +8,6 @@
 #include <vector>
 #include <memory>
 #include <ie_iextension.h>
-#include <legacy/ie_layers.h>
 #include "nodes/list.hpp"

 namespace MKLDNNPlugin {
@ -18,7 +17,7 @@ public:
    using Ptr = std::shared_ptr<MKLDNNExtensionManager>;
    MKLDNNExtensionManager() = default;
    InferenceEngine::ILayerImpl::Ptr CreateImplementation(const std::shared_ptr<ngraph::Node>& op);
-    std::shared_ptr<InferenceEngine::ILayerImplFactory> CreateExtensionFactory(const InferenceEngine::CNNLayerPtr& Layer);
+    std::shared_ptr<InferenceEngine::ILayerImplFactory> CreateExtensionFactory(const std::shared_ptr<ngraph::Node>& op);
    void AddExtension(InferenceEngine::IExtensionPtr extension);

 private:
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -26,11 +26,9 @@
 #include <nodes/mkldnn_reorder_node.h>
 #include <nodes/mkldnn_convert_node.h>

-#include <legacy/graph_tools.hpp>
 #include <ie_algorithm.hpp>
 #include <blob_factory.hpp>
-#include <legacy/net_pass.h>
-#include <legacy/details/ie_cnn_network_tools.h>
+#include "nodes/common/cpu_memcpy.h"
 #include "nodes/common/cpu_convert.h"

 #include "precision_utils.h"
@ -39,6 +37,14 @@
 #include "utils/general_utils.h"
 #include "utils/debug_capabilities.h"
 #include "utils/node_dumper.h"
+#include "utils/ngraph_utils.hpp"
+#include "utils/cpu_utils.hpp"
+
+#include <ngraph/node.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/variant.hpp>
+#include <ngraph/ops.hpp>
+#include <transformations/utils/utils.hpp>

 /*****************************************************
 * Debug capability
@ -60,31 +66,7 @@ typedef std::vector<edge_cluster_t> edge_clusters_t;
 mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0);

 template<typename NET>
-void MKLDNNGraph::ApplyUnrollPasses(NET &net) {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses");
-
-    NetPass::CombineRNNSeq(net);
-    bool ti_proc_ok = NetPass::UnrollRNN_if(net, [] (const RNNCellBase &rnn) -> bool {
-        if (rnn.clip != 0.0f)
-            return true;
-        if ((rnn.cellType == RNNCellBase::GRU || rnn.cellType == RNNCellBase::GRU_LBR) &&
-            rnn.activations != std::vector<std::string> {"sigmoid", "tanh"})
-            return true;
-        if (rnn.cellType == RNNCellBase::LSTM &&
-            rnn.activations != std::vector<std::string> {"sigmoid", "tanh", "tanh"})
-            return true;
-        return false;
-    });
-    if (!ti_proc_ok)
-        IE_THROW() << "Plugin doesn't support Tensor Iterator in pure form. "
-                              "None TI optimization pattern has been applied successfully";
-}
-
-template void MKLDNNGraph::ApplyUnrollPasses(TensorIterator::Body&);
-template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&);
-
-template<typename NET>
-void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
+void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
        MKLDNNWeightsSharing::Ptr &w_cache) {
    OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");

@ -98,233 +80,252 @@ void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr&
    status = Ready;
 }

-template void MKLDNNGraph::CreateGraph(const TensorIterator::Body&,
+template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
        const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
 template void MKLDNNGraph::CreateGraph(const CNNNetwork&,
        const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);

-void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
+void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
    this->_name = "subgraph";
    this->reuse_io_tensors = false;

-    // Map data object onto producer layer(node)
-    std::unordered_map<Data*, std::pair<MKLDNNNodePtr, int>> data2node;
+    // Map data object onto producer node
+    std::map<std::shared_ptr<ngraph::Node>, std::pair<MKLDNNNodePtr, int>> op2node;

    // nodes which has no consumers (output or just unused). But doesn't marked as graph output.
    // Will be stored as fake output separately.
-    std::unordered_set<DataPtr> unused_data;
+    std::deque<ngraph::Output<ngraph::Node>> unusedOutputs;

-    // Step 1. Replicate input nodes
-    for (const auto &input : subgraph.inputs) {
-        if (input->getPrecision() == Precision::UNSPECIFIED) continue;  // const node holder
-
-        auto creator = getCreatorLayer(input).lock();
-        if (creator == nullptr) {
-            creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()}));
-            creator->outData.push_back(input);
+    auto getParentOutputPort = [](const std::shared_ptr<ngraph::Node> childOp, const std::shared_ptr<ngraph::Node> parentOp,
+                                  const size_t childInputPort) -> int {
+        for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) {
+            if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) {
+                return static_cast<int>(parentPort);
+            }
        }

-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache));
-        data2node[input.get()] = {node, 0};
+        return -1;
+    };

+    for (const auto op : subgraph->get_ordered_ops()) {
+        const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)};
        graphNodes.push_back(node);
-        inputNodes[input->getName()] = node;

-        if (getInputTo(input).empty()) {
-            unused_data.insert(input);
+        if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
+            inputNodesMap[node->getName()] = node;
        }
-    }

-    // Step 2. Replicate all internal nodes.
-    for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
-        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-        graphNodes.push_back(node);
+        if (op->get_type_info() == ngraph::op::v0::Result::type_info) {
+            auto prev = op->get_input_node_shared_ptr(0);
+            std::string inputID;
+            inputID = prev->get_friendly_name();
+            if (prev->get_output_size() > 1) {
+                inputID += "." + std::to_string(op->get_input_source_output(0).get_index());
+            }

-        for (int port = 0; port < layer->insData.size(); port++) {
-            auto data = layer->insData[port].lock();
+            outputNodesMap[inputID] = node;
+        }

-            auto port_info = data2node[data.get()];
-            auto parent_node = port_info.first;
-            auto parent_port_idx = port_info.second;
+        for (size_t port = 0; port < op->get_input_size(); port++) {
+            auto parentOp = op->get_input_node_shared_ptr(port);

-            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port));
+            auto portInfo = op2node[parentOp];
+            auto parentNode = portInfo.first;
+
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), port));
            node->addEdge(edge);
            graphEdges.push_back(edge);
        }
-        int out_port_idx = 0;
-        for (auto &out_data : layer->outData) {
-            data2node[out_data.get()] = {node, out_port_idx++};
-            if (getInputTo(out_data).empty()) {
-                unused_data.insert(out_data);
+
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v0::Result::type_info,
+                ngraph::op::v3::Assign::type_info,
+                ngraph::op::v6::Assign::type_info)) {
+            int outPortIdx = 0;
+            for (int oi = 0; oi < op->get_output_size(); oi++) {
+                op2node[op->output(oi).get_node_shared_ptr()] = {node, outPortIdx++};
+                if (op->get_output_target_inputs(oi).empty()) {
+                    unusedOutputs.push_back(op->output(oi));
+                }
            }
        }
    }

-    // Step 3. Add output nodes and output stubs for unused data objects.
-    for (const auto &output : subgraph.outputs) {
-        auto port_info = data2node[output.get()];
-        auto parent_node = port_info.first;
-        auto parent_port_idx = port_info.second;
-
-        CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(output);
-
-        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
-        node->addEdge(edge);
-        graphEdges.push_back(edge);
-        graphNodes.push_back(node);
-        outputNodes.push_back(node);
-
-        unused_data.erase(output);
-    }
-
    // Add stub output node for unused data
-    for (auto to_stub_data : unused_data) {
-        auto port_info = data2node[to_stub_data.get()];
-        auto parent_node = port_info.first;
-        auto parent_port_idx = port_info.second;
-
-        CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(to_stub_data);
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
-        node->addEdge(edge);
+    for (auto unusedOutput : unusedOutputs) {
+        auto portInfo = op2node[unusedOutput.get_node_shared_ptr()];
+        auto parentNode = portInfo.first;
+        auto port = portInfo.second;
+        const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+                                                                        parentNode->getOriginalOutputPrecisionAtPort(port),
+                                                                        nodeName, "Result", getEngine(), weightsCache);
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
+        outNode->addEdge(edge);
        graphEdges.push_back(edge);
-        graphNodes.push_back(node);
+        graphNodes.push_back(outNode);
    }
 }

 void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
-    InputsDataMap inputs = network.getInputsInfo();
+
+    InputsDataMap inputsInfo = network.getInputsInfo();
+    OutputsDataMap outputsInfo = network.getOutputsInfo();

    this->_name = network.getName();

-    // The input layer precision has to be equal to the InputData precision
-    std::map<std::string, Precision> changedPrecision;
-    for (const auto& input : inputs) {
-        auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
-        if (inputLayer) {
-            inputLayer->precision = inputLayer->outData[0]->getTensorDesc().getPrecision();
-        }
+    std::shared_ptr<const ngraph::Function> func = network.getFunction();
+    if (!func) {
+        IE_THROW() << "Function pointer inside CNNNetwork is nullptr";
    }

-    std::unordered_map<CNNLayerPtr, MKLDNNNodePtr> layer2node;
-    std::unordered_set<DataPtr> unused_data;  // nodes which has no consumers (output or just unused)
+    auto orderedOps = func->get_ordered_ops();
+
+    // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node
+    std::map<std::shared_ptr<ngraph::Node>, MKLDNNNodePtr> op2node;
+    std::deque<ngraph::Output<ngraph::Node>> unusedOutputs;  // nodes which has no consumers (output or just unused)
+
+    auto getParentOutputPort = [](const std::shared_ptr<ngraph::Node> childOp, const std::shared_ptr<ngraph::Node> parentOp,
+                                  const size_t childInputPort) -> int {
+        for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) {
+            if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) {
+                return static_cast<int>(parentPort);
+            }
+        }

-    auto _parent_port = [] (const DataPtr &data) -> int {
-        auto parent = getCreatorLayer(data).lock();
-        for (int i = 0; parent->outData.size(); i++)
-            if (data == parent->outData[i])
-                return i;
        return -1;
    };

    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes");

    // Replicate All Nodes in topological order
-    for (const auto layer : CNNNetSortTopologically(network)) {
-        CNNLayerPtr _layer = layer;
-        if (layer->type == "Memory" && layer->GetParamAsString("index") == "1") {
-            auto memoryId = layer->GetParamAsString("id");
-            Precision portPrecision = layer->outData[0]->getTensorDesc().getPrecision();
-            _layer.reset(new CNNLayer({layer->name + "/id=" + memoryId, "MemoryInput", portPrecision}));
-            _layer->params = layer->params;
-            _layer->outData = layer->outData;
-        }
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(_layer, getEngine(), extMgr, weightsCache));
+    for (const auto& op : orderedOps) {
+        const MKLDNNNodePtr node(MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache));
        graphNodes.push_back(node);
-        layer2node[layer] = node;

-        if (layer->params.count("originalLayersNames")) {
-            node->originalLayers = layer->params["originalLayersNames"];
+        if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
+            if (inputsInfo.count(node->getName()) != 0) {
+                inputNodesMap[node->getName()] = node;
+            }
        }

-        for (int port = 0; port < layer->insData.size(); port++) {
-            auto data = layer->insData[port].lock();
-            auto parent_layer = getCreatorLayer(data).lock();
-            if (!parent_layer) continue;  // no parent means that it is input data node (or memory/const layer)
+        if (op->get_type_info() == ngraph::op::v0::Result::type_info) {
+            // [NM] TODO: Several network has model outputs which mismatch with result node name
+            const auto &input = op->input_value(0);
+            NGRAPH_SUPPRESS_DEPRECATED_START
+            auto name = input.get_tensor().get_name();
+            NGRAPH_SUPPRESS_DEPRECATED_END
+            if (name.empty()) {
+                name = ngraph::op::util::create_ie_output_name(input);
+            }

-            auto parent_node = layer2node[parent_layer];
+            if (outputsInfo.count(name) != 0) {
+                outputNodesMap[name] = node;
+            }
+        }

-            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port));
+        op2node[op] = node;
+
+        for (size_t port = 0; port < op->get_input_size(); port++) {
+            auto parentOp = op->get_input_node_shared_ptr(port);
+            auto parentNode = op2node[parentOp];
+
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast<int>(port)));
            node->addEdge(edge);
            graphEdges.push_back(edge);
        }
-        for (auto &out_data : layer->outData) {
-            if (getInputTo(out_data).empty()) {
-                unused_data.insert(out_data);
+
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v0::Result::type_info,
+                ngraph::op::v3::Assign::type_info,
+                ngraph::op::v6::Assign::type_info)) {
+            for (int oi = 0; oi < op->get_output_size(); oi++) {
+                if (op->get_output_target_inputs(oi).empty()) {
+                    unusedOutputs.push_back(op->output(oi));
+                }
            }
        }
    }

-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Outputs");
-
-    OutputsDataMap outputs = network.getOutputsInfo();
-    for (const auto &output : outputs) {
-        const auto data = output.second;
-
-        auto parent_layer = getCreatorLayer(data).lock();
-        auto parent_node = layer2node[parent_layer];
-
-        CNNLayerPtr layer(new CNNLayer({"out_" + output.first, "Output", data->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(data);
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), 0));
-        node->addEdge(edge);
+    // Add stub output node for unused outputs
+    for (auto unusedOutput : unusedOutputs) {
+        auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
+        const auto port = unusedOutput.get_index();
+        const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+                                                                        parentNode->getOriginalOutputPrecisionAtPort(port),
+                                                                        nodeName, "Result", getEngine(), weightsCache);
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
+        outNode->addEdge(edge);
        graphEdges.push_back(edge);
-
-        graphNodes.push_back(node);
-        outputNodes.push_back(node);
-
-        unused_data.erase(data);
+        graphNodes.push_back(outNode);
    }

-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AddStubs");
+    // We set all non const data paths precision to BF16 in case enforceBF16 flag is switched on.
+    if (config.enforceBF16) {
+        bool isQuantizedModel = false;
+        for (auto& node : graphNodes) {
+            if (node->getType() == FakeQuantize)
+                isQuantizedModel = true;
+        }

-    // Add stub output node for unused data
-    for (auto to_stub_data : unused_data) {
-        auto parent_layer = getCreatorLayer(to_stub_data).lock();
-        auto parent_node = layer2node[parent_layer];
+        // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
+        // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
+        if (implication(isQuantizedModel, config.manualEnforceBF16)) {
+            for (auto &node : graphNodes) {
+                if (node->getType() != Input && node->getType() != Output) {
+                    for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
+                        auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
+                        if (!(parent->getType() == Input && parent->isConstant()) && node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
+                            node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
+                    }

-        CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(to_stub_data);
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0));
-        node->addEdge(edge);
-        graphEdges.push_back(edge);
-        graphNodes.push_back(node);
-    }
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Inputs");
-
-    // Replicate input nodes
-    for (const auto& input : inputs) {
-        auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
-        inputNodes[input.first] = layer2node[inputLayer];
-
-        // Loading mean images
-        MKLDNNDims outDims;
-        if (!inputNodes[input.first]->getChildEdgeAt(0)->getDims().ndims())
-            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
-        else
-            outDims = MKLDNNDims(inputNodes[input.first]->getChildEdgeAt(0)->getDims());
-        if (inputs.find(input.first) != inputs.end()) {
-            InputInfo::Ptr ii = inputs[input.first];
-            if (ii && ii->getPreProcess().getNumberOfChannels()) {
-                _meanImages[input.first].Load(outDims, ii);
+                    for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
+                        if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
+                            node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
+                    }
+                }
            }
        }
    }
+
+    // change precision for input/output nodes to avoid extra data conversion when set input/output blobs
+    // also we need to change input/output precisions for consumers/producers to avoid inserting reorder
+    for (auto &input : inputNodesMap) {
+        const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
+        input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
+        const auto childEdges = input.second->getChildEdgesAtPort(0);
+        for (size_t i = 0; i < childEdges.size(); i++) {
+            const auto child = childEdges[i]->getChild();
+            if (child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()) != Precision::BF16)
+                child->setOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum(), precToSet);
+        }
+    }
+
+    for (auto &output : outputNodesMap) {
+        const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
+        output.second->setOriginalInputPrecisionAtPort(0, precToSet);
+        const auto parentEdges = output.second->getParentEdgesAtPort(0);
+        for (size_t i = 0; i < parentEdges.size(); i++) {
+            const auto parent = parentEdges[i]->getParent();
+            parent->setOriginalOutputPrecisionAtPort(parentEdges[i]->getInputNum(), precToSet);
+        }
+    }
+
+    // Loading mean images
+    for (const auto& input : inputsInfo) {
+        MKLDNNDims outDims;
+        if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) {
+            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
+        } else {
+            outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims();
+        }
+        InputInfo::Ptr ii = inputsInfo[input.first];
+        if (ii && ii->getPreProcess().getNumberOfChannels()) {
+            _meanImages[input.first].Load(outDims, ii);
+        }
+    }
 }

 void MKLDNNGraph::InitGraph() {
@ -349,11 +350,6 @@ void MKLDNNGraph::InitGraph() {

    CreatePrimitives();

-    SetOriginalLayerNames();
-
-    if (!config.dumpToDot.empty())
-        dumpToDotFile(config.dumpToDot + "_init.dot");
-
 #ifndef CPU_DEBUG_CAPS
    for (auto &graphNode : graphNodes) {
        graphNode->cleanup();
@ -366,31 +362,6 @@ void MKLDNNGraph::InitGraph() {
    ExecuteConstantNodesOnly();
 }

-void MKLDNNGraph::SetOriginalLayerNames() {
-    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
-
-    // Do it before cleanup. Because it will lose original layers information
-    for (auto &graphNode : graphNodes) {
-        auto nodeType = graphNode->getType();
-        if (nodeType == Reorder || nodeType == Output) continue;
-
-        if (graphNode->getOriginalLayers().empty()) {
-            graphNode->addOriginalLayer(graphNode->getCnnLayer());
-        }
-
-        if (graphNode->getFusedWith().size() || graphNode->getMergeWith().size()) {
-            // Original layer names
-            std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith();
-            auto &merged = graphNode->getMergeWith();
-            internal.insert(internal.end(), merged.begin(), merged.end());
-
-            for (auto &sub_node : internal) {
-                graphNode->addOriginalLayer(sub_node->getCnnLayer());
-            }
-        }
-    }
-}
-
 void MKLDNNGraph::InitNodes() {
    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
    for (auto &node : graphNodes) {
@ -500,7 +471,7 @@ void MKLDNNGraph::InitEdges() {

    std::unordered_set<std::string> uniqueLayerNames;
    for (auto node : graphNodes) {
-        uniqueLayerNames.insert(node->getCnnLayer()->name);
+        uniqueLayerNames.insert(node->getName());
    }

    for (auto i = 0; i < numberOfEdges; i++) {
@ -510,14 +481,17 @@ void MKLDNNGraph::InitEdges() {

            // Check if there is a reorder that supports the type conversion
            if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
-                !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
-                //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
-                std::string convertName = edge->getParent()->getName() + "_" +
-                                          edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
+                    !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
+                // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
+                const auto inDesc = edge->getInputDesc();
+                const auto outDesc = edge->getOutputDesc();

-                CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
-                auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
-                convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
+                std::string convertName = edge->getParent()->getName() + "_" +
+                                          inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
+
+                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName,
+                                                                       this->getEngine(), this->weightsCache);
+                convertNode->setDescs(inDesc, outDesc);
                InsertNode(edge, convertNode, true);

                //Check if reorder is still needed
@ -741,8 +715,8 @@ void MKLDNNGraph::CreatePrimitives() {
 void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) {
    if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready.";

-    auto input = inputNodes.find(name);
-    if (input != inputNodes.end()) {
+    auto input = inputNodesMap.find(name);
+    if (input != inputNodesMap.end()) {
        MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims();

        const void *ext_data_ptr = in->cbuffer();
@ -774,11 +748,12 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
    if (!IsReady())
        IE_THROW() << "Wrong state. Topology not ready.";

-    for (MKLDNNNodePtr &node : outputNodes) {
-        // remove out_ from node name
-        std::string name = node->getName().substr(4);
+    for (auto &outputMap : outputNodesMap) {
+        auto name = outputMap.first;
+        auto node = outputMap.second;
        const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory();
        if (out.find(name) == out.end()) {
+            // TODO [NM]: Do we really need this path?
            // TODO: Create blob from MemoryDesc
            Blob::Ptr outBlob = make_shared_blob<float>({Precision::FP32, node->getParentEdgeAt(0)->getDims().ToSizeVector(),
                                                         TensorDesc::getLayoutByDims(node->getParentEdgeAt(0)->getDims().ToSizeVector())},
@ -816,7 +791,29 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
            MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
        size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;

-        cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
+        const auto actualDesc = node->getParentEdgeAt(0)->getDesc();
+        const auto expectedDesc = ext_blob->getTensorDesc();
+
+        // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
+        // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
+        bool isScalarOutput = false;
+        if (actualDesc.getLayout() == SCALAR) {
+            isScalarOutput = expectedDesc.getLayout() == SCALAR ||
+                             std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
+        } else if (expectedDesc.getLayout() == SCALAR) {
+            isScalarOutput = actualDesc.getLayout() == SCALAR ||
+                             std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
+        }
+
+        if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
+            auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc};
+            auto outBloMem = MKLDNNMemory(eng);
+            outBloMem.Create(outBlobDesc, ext_blob_ptr, false);
+
+            outBloMem.SetData(intr_blob, 0, false);
+        } else {
+            cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
+        }
    }
 }

@ -966,8 +963,6 @@ void MKLDNNGraph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEn
    for (int i = 1; i < graphNodes.size(); i++) {
        getPerfMapFor(perfMap, graphNodes[i]);
    }
-
-    if (!config.dumpToDot.empty()) dumpToDotFile(config.dumpToDot + "_perf.dot");
 }

 void MKLDNNGraph::setConfig(const Config &cfg) {
@ -983,18 +978,14 @@ Config MKLDNNGraph::getProperty() const {
 }

 void MKLDNNGraph::getInputBlobs(InferenceEngine::BlobMap &resp) {
-    for (auto &it : inputNodes) {
-        MKLDNNInputNode* node = dynamic_cast<MKLDNNInputNode*>(it.second.get());
-        if (!node || node->isConstant())
-            continue;
-        resp[it.first] = node->getChildEdgeAt(0)->getBlob();
+    for (auto &it : inputNodesMap) {
+        resp[it.first] = it.second->getChildEdgeAt(0)->getBlob();
    }
 }

 void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) {
-    for (auto &it : outputNodes) {
-        std::string name = it->getName().substr(4);
-        resp[name] = it->getParentEdgeAt(0)->getBlob();
+    for (auto &it : outputNodesMap) {
+        resp[it.first] = it.second->getParentEdgeAt(0)->getBlob();
    }
 }

@ -1150,10 +1141,7 @@ void MKLDNNGraph::RemoveDroppedEdges() {

 MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
                                bool isOptimized, InferenceEngine::Blob::Ptr scales) {
-    CNNLayerPtr layer(new CNNLayer({layerName,
-                                    "Reorder",
-                                    inDesc.getPrecision()}));
-    MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
+    MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache));
    auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
    if (reorderPtr == nullptr) {
        IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode";
@ -1165,7 +1153,7 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
    InsertNode(edge, newReorder, true);

    // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
-    // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
+    // Due to the specificity of MKLDNNGraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks.
    if (!isOptimized) {
        newReorder->getParentEdgeAt(0)->getDesc();
        newReorder->getChildEdgeAt(0)->getDesc();
@ -1218,15 +1206,6 @@ InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
    return dump_graph_as_ie_ngraph_net(*this);
 }

-void MKLDNNGraph::dumpToDotFile(std::string file) const {
-    std::ofstream dot;
-    dot.open(file);
-    if (!dot.is_open()) IE_THROW() << "CPU Plugin cannot create dot file " << file << ".";
-
-    dump_graph_as_dot(*this, dot);
-    dot.close();
-}
-
 void MKLDNNGraph::printGraphInfo() const {
    for (auto &graphNode : graphNodes) {
        std::cout << "name: " << graphNode->getName() << " [ ";
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@ -4,14 +4,12 @@

 #pragma once

-#include "ie_parallel.hpp"
 #include "cpp/ie_cnn_network.h"
 #include "config.h"
 #include "mkldnn_memory.h"
 #include "mean_image.h"
 #include "mkldnn_node.h"
 #include "mkldnn_edge.h"
-#include "threading/ie_thread_local.hpp"
 #include <map>
 #include <string>
 #include <vector>
@ -48,7 +46,7 @@ public:
    void getOutputBlobs(InferenceEngine::BlobMap &out_map);

    template<typename NET>
-    void CreateGraph(const NET &network,
+    void CreateGraph(NET &network,
                     const MKLDNNExtensionManager::Ptr& extMgr,
                     MKLDNNWeightsSharing::Ptr &w_cache);

@ -73,15 +71,14 @@ public:
        return graphEdges;
    }

-    std::vector<MKLDNNNodePtr>& GetOutputNodes() {
-        return outputNodes;
+    std::map<std::string, MKLDNNNodePtr>& GetInputNodesMap() {
+        return inputNodesMap;
    }

-    std::map<std::string, MKLDNNNodePtr>& GetInputNodes() {
-        return inputNodes;
+    std::map<std::string, MKLDNNNodePtr>& GetOutputNodesMap() {
+        return outputNodesMap;
    }

-
    mkldnn::engine getEngine() const {
        return eng;
    }
@ -152,9 +149,6 @@ public:

    InferenceEngine::CNNNetwork dump() const;

-    template<typename NET>
-    static void ApplyUnrollPasses(NET &net);
-
    void ResetInferCount() { infer_count = 0; }

    void SortTopologically();
@ -166,8 +160,8 @@ protected:
        status = NotReady;
        eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0);

-        inputNodes.clear();
-        outputNodes.clear();
+        inputNodesMap.clear();
+        outputNodesMap.clear();
        graphNodes.clear();
        graphEdges.clear();
        _meanImages.clear();
@ -183,8 +177,8 @@ protected:

    MKLDNNMemoryPtr memWorkspace;

-    std::map<std::string, MKLDNNNodePtr> inputNodes;
-    std::vector<MKLDNNNodePtr> outputNodes;
+    std::map<std::string, MKLDNNNodePtr> inputNodesMap;
+    std::map<std::string, MKLDNNNodePtr> outputNodesMap;
    std::vector<MKLDNNNodePtr> graphNodes;
    std::vector<MKLDNNEdgePtr> graphEdges;

@ -194,7 +188,7 @@ protected:
    static mkldnn::engine eng;

    void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
-    void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
+    void Replicate(const std::shared_ptr<const ngraph::Function> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
    void InitGraph();
    void InitNodes();
    void InitDescriptors();
@ -204,22 +198,13 @@ protected:
    void AllocateWithReuse();
    void CreatePrimitives();
    void ExecuteConstantNodesOnly();
-    void SetOriginalLayerNames();

    friend class MKLDNNInferRequest;
    friend class MKLDNNGraphlessInferRequest;
-    friend InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph);
    friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);

 private:
-    void dumpToDotFile(std::string file) const;
    void printGraphInfo() const;
-
-    struct ParsedLayer {
-        MKLDNNNodePtr parent;
-        InferenceEngine::CNNLayerPtr cnnLayer;
-        size_t outIdx;
-    };
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
@ -3,8 +3,6 @@
 //

 #include "mkldnn_graph_dumper.h"
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
 #include <ie_ngraph_utils.hpp>
 #include "exec_graph_info.hpp"
 #include "mkldnn_debug.h"
@ -22,188 +20,6 @@ namespace MKLDNNPlugin {

 namespace {

-std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &);
-void drawer_callback(const InferenceEngine::CNNLayerPtr, ordered_properties &, ordered_properties &);
-
-}  // namespace
-
-CNNLayer::Ptr create_cnnlayer(const MKLDNNNodePtr &node) {
-    CNNLayer::Ptr layer(new CNNLayer({node->getName(), "type", Precision::FP32}));
-
-    layer->params = extract_node_metadata(node);
-    layer->type = layer->params[ExecGraphInfoSerialization::LAYER_TYPE];
-    layer->params.erase(ExecGraphInfoSerialization::LAYER_TYPE);
-
-    auto &cfg = node->getSelectedPrimitiveDescriptor()->getConfig();
-    layer->insData.resize(cfg.inConfs.size());
-    layer->outData.resize(cfg.outConfs.size());
-
-    return layer;
-}
-
-InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) {
-    std::map<MKLDNNNodePtr, std::shared_ptr<ngraph::Node> > node2layer;
-
-    ngraph::ResultVector results;
-    ngraph::ParameterVector params;
-    ngraph::NodeVector to_hold;
-
-    auto get_inputs = [&] (const MKLDNNNodePtr & node) {
-        auto pr_edges = node->getParentEdges();
-        ngraph::OutputVector inputs(pr_edges.size());
-
-        for (int i = 0; i < pr_edges.size(); i++) {
-            auto edge = node->getParentEdgeAt(i);
-            int pr_port = edge->getInputNum();
-            int ch_port = edge->getOutputNum();
-            auto pr_node = edge->getParent();
-
-            IE_ASSERT(node2layer.count(pr_node) == 1);
-            auto pr = node2layer[pr_node];
-
-            inputs[ch_port] = pr->output(pr_port);
-        }
-
-        return inputs;
-    };
-
-    auto create_ngraph_node = [&](const MKLDNNNodePtr &node) {
-        bool is_input = false, is_output = false, should_be_hold = false;
-        for (auto && kvp : graph.inputNodes) {
-            if (kvp.second == node) {
-                is_input = true;
-                break;
-            }
-        }
-
-        for (auto && onode : graph.outputNodes) {
-            if (onode == node) {
-                is_output = true;
-                break;
-            }
-        }
-
-        if (!is_output && node->getChildEdges().empty()) {
-            // The node has no consumer and is not an output.
-            // Should be hold in other irregular way.
-            should_be_hold = true;
-        }
-
-        auto meta_data = extract_node_metadata(node);
-        std::shared_ptr<ngraph::Node> return_node;
-        if (is_input) {
-            auto desc = node->getChildEdgeAt(0)->getDesc();
-            auto param = std::make_shared<ngraph::op::Parameter>(
-                details::convertPrecision(desc.getPrecision()),
-                ngraph::PartialShape(desc.getDims()));
-            return_node = param;
-            params.push_back(param);
-        } else if (is_output) {
-            results.emplace_back(std::make_shared<ngraph::op::Result>(get_inputs(node).back()));
-            return_node = results.back();
-        } else {
-            return_node = std::make_shared<ExecGraphInfoSerialization::ExecutionNode>(
-                get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size());
-
-            for (size_t port = 0; port < return_node->get_output_size(); ++port) {
-                auto desc = node->getChildEdgeAt(port)->getDesc();
-                return_node->set_output_type(port,
-                    details::convertPrecision(desc.getPrecision()),
-                    ngraph::PartialShape(desc.getDims()));
-            }
-        }
-
-        if (should_be_hold) {
-            to_hold.push_back(return_node);
-        }
-
-        for (auto && kvp : meta_data)
-            return_node->get_rt_info()[kvp.first] = std::make_shared<::ngraph::VariantWrapper<std::string>>(kvp.second);
-        return_node->set_friendly_name(node->getName());
-
-        return return_node;
-    };
-
-    ngraph::NodeVector nodes;
-    nodes.reserve(graph.graphNodes.size());
-    for (auto &node : graph.graphNodes) {  // important: graph.graphNodes are in topological order
-        nodes.emplace_back(create_ngraph_node(node));
-        node2layer[node] = nodes.back();
-    }
-
-    auto holder = results[0];
-    for (auto &node : to_hold) {
-        holder->add_control_dependency(node);
-    }
-
-    auto function = std::make_shared<ngraph::Function>(results, params, graph._name);
-    InferenceEngine::CNNNetwork net(function);
-    return net;
-}
-
-InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph) {
-    auto net = std::make_shared<details::CNNNetworkImpl>();
-
-    net->setName(graph._name);
-    std::map<MKLDNNNodePtr, CNNLayerPtr> node2layer;
-
-    // Copy all nodes to network
-    for (auto &node : graph.graphNodes) {
-        auto layer = create_cnnlayer(node);
-        node2layer[node] = layer;
-        net->addLayer(layer);
-    }
-
-    // Copy all edges to network
-    for (auto &node : graph.graphNodes) {
-        auto pr = node2layer[node];
-        auto ch_edges = node->getChildEdges();
-
-        for (int i = 0; i < ch_edges.size(); i++) {
-            auto edge = node->getChildEdgeAt(i);
-            int in_port = edge->getOutputNum();
-            auto ch_node = edge->getChild();
-            auto ch  = node2layer[ch_node];
-
-            DataPtr data;
-            if (i < pr->outData.size()) {
-                std::string data_name = node->getName() + "_out" + std::to_string(i);
-                pr->outData[i] = std::make_shared<Data>(data_name, edge->getDesc());
-                data = pr->outData[i];
-                getCreatorLayer(data) = pr;
-            } else {
-                data = pr->outData[0];
-            }
-
-            getInputTo(data)[ch->name] = ch;
-            ch->insData[in_port] = data;
-        }
-    }
-
-    // Specify inputs data
-    for (auto kvp : graph.inputNodes) {
-        auto in_node = kvp.second;
-        auto in_layer = node2layer[in_node];
-
-        auto in_info = std::make_shared<InputInfo>();
-        in_info->setInputData(in_layer->outData[0]);
-        net->setInputInfo(in_info);
-    }
-
-    return InferenceEngine::CNNNetwork{net};
-}
-
-void dump_graph_as_dot(const MKLDNNGraph &graph, std::ostream &out) {
-    InferenceEngine::CNNNetwork dump_net = dump_graph_as_ie_net(graph);
-    InferenceEngine::saveGraphToDot(dump_net, out, drawer_callback);
-}
-
-//**********************************
-// Special converters of meta data
-//**********************************
-
-namespace {
-
 std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &node) {
    std::map<std::string, std::string> serialization_info;

@ -289,39 +105,106 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
    return serialization_info;
 }

-const char BLUE[]  = "#D8D9F1";
-const char GREEN[] = "#D9EAD3";
-
-void drawer_callback(const InferenceEngine::CNNLayerPtr layer,
-                     ordered_properties &printed_properties,
-                     ordered_properties &node_properties) {
-    const auto &params = layer->params;
-
-    // Implementation
-    auto impl = params.find(ExecGraphInfoSerialization::IMPL_TYPE);
-    if (impl != params.end()) {
-        printed_properties.push_back({"impl", impl->second});
-    }
-
-    // Original names
-    auto orig = params.find(ExecGraphInfoSerialization::ORIGINAL_NAMES);
-    if (orig != params.end()) {
-        printed_properties.push_back({"originals", orig->second});
-    }
-
-    // Precision
-    auto prec = params.find(ExecGraphInfoSerialization::OUTPUT_PRECISIONS);
-    if (prec != params.end()) {
-        printed_properties.push_back({"precision", prec->second});
-        // Set color
-        node_properties.push_back({"fillcolor", prec->second == "FP32" ? GREEN : BLUE});
-    }
-
-    // Set xlabel containing PM data if calculated
-    auto perf = layer->params.find(ExecGraphInfoSerialization::PERF_COUNTER);
-    node_properties.push_back({"xlabel", (perf != layer->params.end()) ? perf->second : ""});
-}
-
 }  // namespace

+InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) {
+    std::map<MKLDNNNodePtr, std::shared_ptr<ngraph::Node> > node2layer;
+
+    ngraph::ResultVector results;
+    ngraph::ParameterVector params;
+    ngraph::NodeVector to_hold;
+
+    auto get_inputs = [&] (const MKLDNNNodePtr & node) {
+        auto pr_edges = node->getParentEdges();
+        ngraph::OutputVector inputs(pr_edges.size());
+
+        for (int i = 0; i < pr_edges.size(); i++) {
+            auto edge = node->getParentEdgeAt(i);
+            int pr_port = edge->getInputNum();
+            int ch_port = edge->getOutputNum();
+            auto pr_node = edge->getParent();
+
+            IE_ASSERT(node2layer.count(pr_node) == 1);
+            auto pr = node2layer[pr_node];
+
+            inputs[ch_port] = pr->output(pr_port);
+        }
+
+        return inputs;
+    };
+
+    auto create_ngraph_node = [&](const MKLDNNNodePtr &node) {
+        bool is_input = false, is_output = false, should_be_hold = false;
+        for (auto && kvp : graph.inputNodesMap) {
+            if (kvp.second == node) {
+                is_input = true;
+                break;
+            }
+        }
+
+        for (auto && kvp : graph.outputNodesMap) {
+            if (kvp.second == node) {
+                is_output = true;
+                break;
+            }
+        }
+
+        if (!is_output && node->getChildEdges().empty()) {
+            // The node has no consumer and is not an output.
+            // Should be hold in other irregular way.
+            should_be_hold = true;
+        }
+
+        auto meta_data = extract_node_metadata(node);
+        std::shared_ptr<ngraph::Node> return_node;
+        if (is_input) {
+            auto desc = node->getChildEdgeAt(0)->getDesc();
+            auto param = std::make_shared<ngraph::op::Parameter>(
+                details::convertPrecision(desc.getPrecision()),
+                ngraph::PartialShape(desc.getDims()));
+            return_node = param;
+            params.push_back(param);
+        } else if (is_output) {
+            results.emplace_back(std::make_shared<ngraph::op::Result>(get_inputs(node).back()));
+            return_node = results.back();
+        } else {
+            return_node = std::make_shared<ExecGraphInfoSerialization::ExecutionNode>(
+                get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size());
+
+            for (size_t port = 0; port < return_node->get_output_size(); ++port) {
+                auto desc = node->getChildEdgeAt(port)->getDesc();
+                return_node->set_output_type(port,
+                    details::convertPrecision(desc.getPrecision()),
+                    ngraph::PartialShape(desc.getDims()));
+            }
+        }
+
+        if (should_be_hold) {
+            to_hold.push_back(return_node);
+        }
+
+        for (auto && kvp : meta_data)
+            return_node->get_rt_info()[kvp.first] = std::make_shared<::ngraph::VariantWrapper<std::string>>(kvp.second);
+        return_node->set_friendly_name(node->getName());
+
+        return return_node;
+    };
+
+    ngraph::NodeVector nodes;
+    nodes.reserve(graph.graphNodes.size());
+    for (auto &node : graph.graphNodes) {  // important: graph.graphNodes are in topological order
+        nodes.emplace_back(create_ngraph_node(node));
+        node2layer[node] = nodes.back();
+    }
+
+    auto holder = results[0];
+    for (auto &node : to_hold) {
+        holder->add_control_dependency(node);
+    }
+
+    auto function = std::make_shared<ngraph::Function>(results, params, graph._name);
+    InferenceEngine::CNNNetwork net(function);
+    return net;
+}
+
 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
@ -11,9 +11,6 @@

 namespace MKLDNNPlugin {

-void dump_graph_as_dot(const MKLDNNGraph &graph, std::ostream &out);
-
-InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph);
 InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@ -19,36 +19,26 @@ public:
    void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);

 private:
-    void MergeGroupConvolution(MKLDNNGraph& graph);
-    void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
-    void FuseConvolutionAndActivation(MKLDNNGraph &graph);
+    void FuseConvolutionAndBias(MKLDNNGraph &graph);
+    void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
+    void FuseMultiplyAndAdd(MKLDNNGraph &graph);
    void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
-    void FuseConvolutionAndDepthwise(MKLDNNGraph &graph);
+    void FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph);
    void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph);
    void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
-    void FuseConvolutionAndQuantize(MKLDNNGraph &graph);
-    void FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph);
-    void FusePoolingAndQuantize(MKLDNNGraph &graph);
-    void FuseBatchNormWithScale(MKLDNNGraph& graph);
+    void FusePoolingAndFakeQuantize(MKLDNNGraph &graph);
    void FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph);
    void FuseMVNAndSimpleOperation(MKLDNNGraph &graph);
    void FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph);
-    void FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph);
-    void RemoveIdentityOperator(MKLDNNGraph& graph);
+    void FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph);

-    void RemoveIOScaleShifts(MKLDNNGraph& graph);
    void DropDoubleReorders(MKLDNNGraph& graph);
-    void DropConvertReorder(MKLDNNGraph& graph);
-    void AddConvertToReorder(MKLDNNGraph &graph);
    void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
    void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
    void FuseEltwiseAndSimple(MKLDNNGraph &graph);
-    void FuseScaleShiftAndQuantize(MKLDNNGraph &graph);
-    void FuseClampAndQuantize(MKLDNNGraph &graph);
-    void MergePermuteAndReorder(MKLDNNGraph &graph);
-
-    bool IsOneOf(Type type, std::vector<Type> types);
-    bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
+    void FuseMulAddAndFakeQuantize(MKLDNNGraph &graph);
+    void FuseClampAndFakeQuantize(MKLDNNGraph &graph);
+    void MergeTransposeAndReorder(MKLDNNGraph &graph);

    void removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge);
 };
--- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
@ -20,7 +20,8 @@
 #include "nodes/common/cpu_memcpy.h"
 #include "mkldnn_async_infer_request.h"
 #include <debug.h>
-
+#include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"

 MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap     networkInputs,
                                                     InferenceEngine::OutputsDataMap    networkOutputs,
@ -103,33 +104,14 @@ void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
            IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first;
        }
        auto inPrec = input.second->getTensorDesc().getPrecision();
+        if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
+            inPrec = InferenceEngine::Precision::FP32;
+        } else {
+            inPrec = normalizeToSupportedPrecision(inPrec);
+        }

-        switch (inPrec) {
-            // these precisions are supported by mkldnn, so we push the blob directly
-            case InferenceEngine::Precision::I8:
-            case InferenceEngine::Precision::I32:
-            case InferenceEngine::Precision::BF16:
-            case InferenceEngine::Precision::FP32: {
-                break;
-            }
-            // these precisions are supported by mkldnn, so we push the blob directly
-            // BUT if a mean image exists, we convert the blob and send FP32
-            case InferenceEngine::Precision::U8:
-            case InferenceEngine::Precision::BOOL: {
-                if (graph->hasMeanImageFor(input.first))
-                    inPrec = InferenceEngine::Precision::FP32;
-                break;
-            }
-            // these precisions are unsupported by mkldnn, so we convert the blob and send I32
-            case InferenceEngine::Precision::U16:
-            case InferenceEngine::Precision::I16:
-            case InferenceEngine::Precision::I64:
-            case InferenceEngine::Precision::U64: {
-                inPrec = InferenceEngine::Precision::I32;
-                break;
-            }
-            default:
-                IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
+        if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
+            IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
        }

        // User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
@ -246,7 +228,6 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
        }

        InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
-        InferenceEngine::Precision originPrecision = blobs[name]->getTensorDesc().getPrecision();
        if (_networkInputs.find(name) != _networkInputs.end()) {
            InferenceEngine::Layout l = _networkInputs[name]->getLayout();
            InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
@ -257,7 +238,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::

        _inputs[name] = make_blob_with_precision(desc);
        _inputs[name]->allocate();
-        if (desc.getPrecision() == originPrecision &&
+        if (blobs[name]->getTensorDesc() == desc &&
                graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
            externalPtr[name] = _inputs[name]->buffer();
        }
@ -274,7 +255,8 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
            return data;
        }

-        InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
+        InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();
+        desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision()));

        // WA: need to avoid exception thrown when we compare blocking desc in SetBlob
        // in situation if we push output blobs as inputs for next network (in Hetero plugin)
@ -285,7 +267,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::

        _outputs[name] = make_blob_with_precision(desc);
        _outputs[name]->allocate();
-        if (desc.getPrecision() == InferenceEngine::Precision::FP32 && !graph->getProperty().batchLimit) {
+        if (blobs[name]->getTensorDesc() == desc && !graph->getProperty().batchLimit) {
            externalPtr[name] = _outputs[name]->buffer();
        }
        data = _outputs[name];
@ -351,7 +333,12 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
                IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch.";
            }

-            if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
+            InferenceEngine::BlobMap blobs;
+            graph->getInputBlobs(blobs);
+            if (blobs.find(name) == blobs.end())
+                IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name;
+
+            if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() &&
                graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
                externalPtr[name] = data->buffer();
            } else if (externalPtr.find(name) != externalPtr.end()) {
@ -382,7 +369,13 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
            foundOutput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) {
                IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch.";
        }
-        if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
+
+        InferenceEngine::BlobMap blobs;
+        graph->getOutputBlobs(blobs);
+        if (blobs.find(name) == blobs.end())
+            IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name;
+
+        if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() &&
                !graph->getProperty().batchLimit) {
            externalPtr[name] = data->buffer();
        } else if (externalPtr.find(name) != externalPtr.end()) {
@ -398,8 +391,8 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *

 void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
    for (auto& it : externalPtr) {
-        auto input = graph->inputNodes.find(it.first);
-        if (input != graph->inputNodes.end()) {
+        auto input = graph->inputNodesMap.find(it.first);
+        if (input != graph->inputNodesMap.end()) {
            if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
                continue;
            // Input cannot be in-place with other primitives
@ -432,9 +425,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
        }

        MKLDNNNodePtr output;
-        for (auto& out : graph->outputNodes) {
-            if (out->getName() == "out_" + it.first) {
-                output = out;
+        for (auto& out : graph->outputNodesMap) {
+            if (out.first == it.first) {
+                output = out.second;
                break;
            }
        }
@ -493,4 +486,4 @@ void MKLDNNPlugin::MKLDNNInferRequest::ThrowIfCanceled() const {
    if (_asyncRequest != nullptr) {
        _asyncRequest->ThrowIfCanceled();
    }
-}
+}
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@ -489,8 +489,8 @@ static const std::map<int, std::vector<mkldnn::memory::format_tag>> form_tags_by
        mkldnn::memory::format_tag::aBCde4c8b2c,
    }}, {6, {                                    // Popular
        mkldnn::memory::format_tag::abcdef,      // plain
-        mkldnn::memory::format_tag::acbdef,      // permuted
-        mkldnn::memory::format_tag::defcab,      // permuted
+        mkldnn::memory::format_tag::acbdef,      // permute
+        mkldnn::memory::format_tag::defcab,      // permute
        mkldnn::memory::format_tag::aBcdef16b,   // blocked 16c

        mkldnn::memory::format_tag::aBCdef16b16c,
@ -565,18 +565,46 @@ bool MKLDNNMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const {
    auto refStrides = refDesc.data.format_desc.blocking.strides;

    std::vector<size_t> actualOrder(desc.data.ndims);
-    std::iota(actualOrder.begin(), actualOrder.end(), 0);
-    std::sort(actualOrder.begin(), actualOrder.end(),
-              [&actualStrides] (size_t ind_l, size_t ind_r) {
-                  return actualStrides[ind_l] > actualStrides[ind_r];
-              });
+    {
+        const auto dims = desc.dims();
+        std::vector<size_t> total_block_per_dim(dims.size(), 1);
+        const auto &blk_desc = desc.data.format_desc.blocking;
+        for (int i = 0; i < blk_desc.inner_nblks; i++) {
+            total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
+        }
+        std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + dims.size());
+        for (size_t i = 0; i < outer_block_dims.size(); i++) {
+            outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+        }
+
+        std::iota(actualOrder.begin(), actualOrder.end(), 0);
+        std::sort(actualOrder.begin(), actualOrder.end(),
+                  [&actualStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+                      return (actualStrides[ind_l] > actualStrides[ind_r]) ||
+                             (actualStrides[ind_l] == actualStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+                  });
+    }

    std::vector<size_t> refOrder(refDesc.data.ndims);
-    std::iota(refOrder.begin(), refOrder.end(), 0);
-    std::sort(refOrder.begin(), refOrder.end(),
-              [&refStrides] (size_t ind_l, size_t ind_r) {
-                  return refStrides[ind_l] > refStrides[ind_r];
-              });
+    {
+        const auto dims = refDesc.dims();
+        std::vector<size_t> total_block_per_dim(dims.size(), 1);
+        const auto &blk_desc = refDesc.data.format_desc.blocking;
+        for (int i = 0; i < blk_desc.inner_nblks; i++) {
+            total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
+        }
+        std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + dims.size());
+        for (size_t i = 0; i < outer_block_dims.size(); i++) {
+            outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+        }
+
+        std::iota(refOrder.begin(), refOrder.end(), 0);
+        std::sort(refOrder.begin(), refOrder.end(),
+                  [&refStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+                      return (refStrides[ind_l] > refStrides[ind_r]) ||
+                             (refStrides[ind_l] == refStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+                  });
+    }

    if (actualOrder != refOrder) {
        return false;
@ -682,14 +710,6 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
    const size_t inner_ndims = blk_desc.inner_nblks;
    const size_t total_ndims = outer_ndims + inner_ndims;

-    // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3}
-    std::vector<size_t> outer_order(outer_ndims);
-    std::iota(outer_order.begin(), outer_order.end(), 0);
-    std::sort(outer_order.begin(), outer_order.end(),
-              [&blk_desc] (size_t ind_l, size_t ind_r) {
-        return blk_desc.strides[ind_l] > blk_desc.strides[ind_r];
-    });
-
    // strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
    std::vector<size_t> inner_strides(inner_ndims, 1);
    for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
@ -701,6 +721,19 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
    for (int i = 0; i < inner_ndims; i++) {
        total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
    }
+    std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims);
+    for (size_t i = 0; i < outer_block_dims.size(); i++) {
+        outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+    }
+
+    // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3}
+    std::vector<size_t> outer_order(outer_ndims);
+    std::iota(outer_order.begin(), outer_order.end(), 0);
+    std::sort(outer_order.begin(), outer_order.end(),
+              [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+        return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) ||
+               (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+    });

    // IE blocked order
    // [new_outer_order] U [inner_idxs]
@ -721,7 +754,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
    std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks,
              ie_blk_dims.end() - blk_desc.inner_nblks);
    std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(),
-                   [&] (size_t i) { return div_up(dims[i], total_block_per_dim[i]); });
+                   [&] (size_t i) { return outer_block_dims[i]; });

    // IE offset padded to data. Same as for oneDNN
    SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims};
@ -742,7 +775,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
        MKLDNNMemory::convertToIePrec(desc.data_type()),
        SizeVector {begin(dims), end(dims)},
        ie_blk_desc };
-    // TODO: BLOCKED is the most common layout which covers all other permuted layout like NHWC.
+    // TODO: BLOCKED is the most common layout which covers all other permute layout like NHWC.
    //       But for some cases we have to specify it more correctly.. may be.. or just keep
    //       auto detected layout in constructor of TensorDesc.
    return res;
@ -809,7 +842,7 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
        is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]);
    }

-    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to permute blocked dims
+    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims
    //       and may be we can achieve correct "descending strides" form which allow conversion.
    if (!is_descending_strides)
        IE_THROW() << "Unsupported case for conversion";
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -13,12 +13,11 @@
 #include <cstdint>
 #include <unordered_map>

-#include <nodes/mkldnn_batchnorm_node.h>
 #include <nodes/mkldnn_concat_node.h>
 #include <nodes/mkldnn_conv_node.h>
 #include <nodes/mkldnn_deconv_node.h>
 #include <nodes/mkldnn_eltwise_node.h>
-#include <nodes/mkldnn_gemm_node.h>
+#include <nodes/mkldnn_matmul_node.h>
 #include <nodes/mkldnn_fullyconnected_node.h>
 #include <nodes/mkldnn_generic_node.h>
 #include <nodes/mkldnn_input_node.h>
@ -30,7 +29,7 @@
 #include <nodes/mkldnn_tile_node.h>
 #include <nodes/mkldnn_split_node.h>
 #include <nodes/mkldnn_pad_node.h>
-#include <nodes/mkldnn_permute_node.h>
+#include <nodes/mkldnn_transpose_node.h>
 #include <nodes/mkldnn_memory_node.hpp>
 #include <nodes/mkldnn_mvn_node.h>
 #include <nodes/mkldnn_normalize_node.h>
@ -41,6 +40,8 @@
 #include <nodes/mkldnn_depth_to_space_node.h>
 #include <nodes/mkldnn_space_to_depth_node.h>
 #include <nodes/mkldnn_strided_slice_node.h>
+#include <nodes/mkldnn_reference_node.h>
+#include <nodes/mkldnn_fake_quantize_node.h>
 #include <mkldnn_types.h>
 #include <dnnl_types.h>
 #include "mkldnn_extension_utils.h"
@ -49,6 +50,10 @@
 #include "mkldnn_debug.h"
 #include "utils/rt_info/memory_formats_attribute.hpp"

+#include <ie_ngraph_utils.hpp>
+#include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace openvino;
@ -56,94 +61,120 @@ using namespace openvino;
 using namespace InferenceEngine::details;
 namespace MKLDNNPlugin {
 static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
-        { "Unknown", Unknown },
-        { "Input", Input },
-        { "Const", Input },
-        { "Output", Output },
-        { "Reorder", Reorder },
+        { "Constant", Input },
+        { "Parameter", Input },
+        { "Result", Output },
        { "Convolution", Convolution },
-        { "ReLU", Eltwise },
-        { "GELU", Eltwise },
-        { "ELU", Eltwise },
+        { "GroupConvolution", Convolution },
+        { "MatMul", MatMul },
+        { "FullyConnected", FullyConnected },
+        { "MaxPool", Pooling },
+        { "AvgPool", Pooling },
+        { "Add", Eltwise },
+        { "Subtract", Eltwise },
+        { "Multiply", Eltwise },
+        { "Divide", Eltwise },
+        { "SquaredDifference", Eltwise },
+        { "Maximum", Eltwise },
+        { "Minimum", Eltwise },
+        { "Mod", Eltwise },
+        { "FloorMod", Eltwise },
+        { "Power", Eltwise },
+        { "PowerStatic", Eltwise },
+        { "Equal", Eltwise },
+        { "NotEqual", Eltwise },
+        { "Greater", Eltwise },
+        { "GreaterEqual", Eltwise },
+        { "Less", Eltwise },
+        { "LessEqual", Eltwise },
+        { "LogicalAnd", Eltwise },
+        { "LogicalOr", Eltwise },
+        { "LogicalXor", Eltwise },
+        { "LogicalNot", Eltwise },
+        { "Relu", Eltwise },
+        { "LeakyRelu", Eltwise },
+        { "Gelu", Eltwise },
+        { "Elu", Eltwise },
+        { "Tanh", Eltwise },
        { "Sigmoid", Eltwise },
-        { "Logistic", Eltwise },
-        { "TanH", Eltwise },
-        { "ReLU6", Eltwise },
-        { "Exp", Eltwise },
-        { "Not", Eltwise },
-        { "Activation", Eltwise },
+        { "Abs", Eltwise },
+        { "Sqrt", Eltwise },
        { "Clamp", Eltwise },
-        { "Swish", Eltwise },
+        { "Exp", Eltwise },
+        { "SwishCPU", Eltwise },
        { "HSwish", Eltwise },
        { "Mish", Eltwise },
        { "HSigmoid", Eltwise },
        { "Round", Eltwise },
-        { "ScaleShift", Eltwise },
-        { "PReLU", Eltwise },
+        { "PRelu", Eltwise },
+        { "Erf", Eltwise },
        { "SoftPlus", Eltwise },
-        { "Norm", Lrn },
-        { "LRN", Lrn },
-        { "Pooling", Pooling },
-        { "FullyConnected", FullyConnected },
-        { "InnerProduct", FullyConnected },
-        { "Gemm", Gemm },
-        { "Softmax", SoftMax },
-        { "SoftMax", SoftMax },
-        { "Split", Split },
-        { "Slice", Split },
-        { "Concat", Concatenation },
-        { "Deconvolution", Deconvolution },
-        { "Eltwise", Eltwise },
-        { "Mod", Eltwise },
-        { "Power", Eltwise },
        { "Reshape", Reshape },
+        { "Squeeze", Reshape },
+        { "Unsqueeze", Reshape },
+        { "Softmax", Softmax },
+        { "Reorder", Reorder },
+        { "BatchToSpace", BatchToSpace },
+        { "SpaceToBatch", SpaceToBatch },
+        { "DepthToSpace", DepthToSpace },
+        { "SpaceToDepth", SpaceToDepth },
+        { "Roll", Roll },
+        { "LRN", Lrn },
+        { "Split", Split },
+        { "VariadicSplit", Split },
+        { "Concat", Concatenation },
+        { "ConvolutionBackpropData", Deconvolution },
+        { "GroupConvolutionBackpropData", Deconvolution },
+        { "StridedSlice", StridedSlice },
        { "Tile", Tile },
-        { "SimplerNMS", SimplerNMS },
        { "ROIAlign", ROIAlign },
        { "ROIPooling", ROIPooling },
-        { "BatchNormalization", BatchNormalization },
-        { "DepthToSpace", DepthToSpace },
-        { "Flatten", Flatten },
+        { "PSROIPooling", PSROIPooling },
+        { "DeformablePSROIPooling", PSROIPooling },
        { "Pad", Pad },
-        { "Permute", Permute },
-        { "SpaceToDepth", SpaceToDepth },
-        { "StridedSlice", StridedSlice },
-        { "Copy", Copy },
+        { "Transpose", Transpose },
        { "LSTMCell", RNNCell },
        { "GRUCell", RNNCell },
        { "RNNCell", RNNCell },
        { "LSTMSequence", RNNSeq },
        { "GRUSequence", RNNSeq },
        { "RNNSequence", RNNSeq },
-        { "Quantize", Quantize },
-        { "FakeQuantize", Quantize },
+        { "FakeQuantize", FakeQuantize },
        { "BinaryConvolution", BinaryConvolution },
        { "DeformableConvolution", DeformableConvolution },
        { "TensorIterator", TensorIterator },
        { "Loop", TensorIterator },
-        { "MemoryInput", MemoryInput},  // for construction from name ctor, arbitrary name is used
-        { "Memory", MemoryOutput },  // for construction from layer ctor
+        { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
+        { "Assign", MemoryOutput },  // for construction from layer ctor
        { "Convert", Convert },
        { "MVN", MVN},
-        { "Normalize", Normalize},
+        { "NormalizeL2", NormalizeL2},
        { "ScatterUpdate", ScatterUpdate},
        { "ScatterElementsUpdate", ScatterElementsUpdate},
        { "ScatterNDUpdate", ScatterNDUpdate},
        { "Interpolate", Interpolate},
-        { "ReduceAnd", ReduceAnd},
-        { "ReduceL1", ReduceL1},
-        { "ReduceL2", ReduceL2},
-        { "ReduceLogSum", ReduceLogSum},
-        { "ReduceLogSumExp", ReduceLogSumExp},
-        { "ReduceMax", ReduceMax},
-        { "ReduceMean", ReduceMean},
-        { "ReduceMin", ReduceMin},
-        { "ReduceOr", ReduceOr},
-        { "ReduceProd", ReduceProd},
-        { "ReduceSum", ReduceSum},
-        { "ReduceSumSquare", ReduceSumSquare},
-        { "Erf", Eltwise },
-        { "Roll", Roll },
+        { "ReduceL1", Reduce},
+        { "ReduceL2", Reduce},
+        { "ReduceLogicalAnd", Reduce},
+        { "ReduceLogicalOr", Reduce},
+        { "ReduceMax", Reduce},
+        { "ReduceMean", Reduce},
+        { "ReduceMin", Reduce},
+        { "ReduceProd", Reduce},
+        { "ReduceSum", Reduce},
+        { "ReduceLogSum", Reduce},
+        { "ReduceLogSumExp", Reduce},
+        { "ReduceSumSquare", Reduce},
+        { "Broadcast", Broadcast},
+        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
+        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
+        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
+        { "Gather", Gather},
+        { "GatherElements", GatherElements},
+        { "GatherND", GatherND},
+        { "OneHot", OneHot},
+        { "RegionYolo", RegionYolo},
+        { "Select", Select}
 };

 Type TypeFromName(const std::string type) {
@ -162,44 +193,65 @@ MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
    return factoryInstance;
 }

-MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-        MKLDNNWeightsSharing::Ptr &w_cache)
+MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
        : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
-          weightCache(w_cache), cnnLayer(layer), engine(eng), name(layer->name), typeStr(layer->type),
-          type(TypeFromName(layer->type)), profiling(layer->name) {
-    if (!layer->outData.empty()) {
-        for (const auto& outData : layer->outData) {
-            outDims.emplace_back(outData->getDims());
+          weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
+          type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
+    algorithm = Algorithm::Undefined;
+    fusingPort = -1;
+
+    const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        if (op->get_input_partial_shape(i).is_dynamic())
+            IE_THROW() << errorPrefix << " has dynamic input shape on " << i << " port, but CPU plug-in supports only static shape";
+    }
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        if (op->get_output_partial_shape(i).is_dynamic())
+            IE_THROW() << errorPrefix << " has dynamic output shape on " << i << " port, but CPU plug-in supports only static shape";
+    }
+
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        const auto &shape = op->get_input_shape(i);
+        inDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape);
+        originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i)));
+    }
+
+    if (typeStr != "Result" && typeStr != "Assign") {
+        if (op->get_output_size() == 0) {
+            IE_THROW() << "Node with type '" << typeStr << "' and name '" << name << "' does not have any outputs.";
        }
-    } else {
-        if (!(CaselessEq<std::string>()(layer->type, "memory") ||
-            CaselessEq<std::string>()(layer->type, "memoryinput") ||
-            CaselessEq<std::string>()(layer->type, "output") ||
-            CaselessEq<std::string>()(layer->type, "reorder") ||
-            CaselessEq<std::string>()(layer->type, "convert"))) {
-            IE_THROW() << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
+        for (size_t i = 0; i < op->get_output_size(); i++) {
+            const auto &shape = op->get_output_shape(i);
+            outDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape);
+            originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i)));
        }
    }

-    for (const auto& inData : layer->insData) {
-        inDims.emplace_back(inData.lock()->getDims());
+    const auto& rtInfo = op->get_rt_info();
+    if (rtInfo.count("originalLayersNames")) {
+        originalLayers = getRTInfoValue(rtInfo, "originalLayersNames");
    }
-    if (layer->params.find("PrimitivesPriority") != layer->params.end()) {
-        std::istringstream stream(layer->params["PrimitivesPriority"]);
+
+    if (originalLayers.empty()) {
+        addOriginalLayer(name);
+    }
+
+    auto primitivesPriority = getPrimitivesPriorityValue(op);
+    if (!primitivesPriority.empty()) {
+        std::istringstream stream(primitivesPriority);
        std::string str;
        while (getline(stream, str, ',')) {
            if (str.substr(0, 4) != "cpu:")
                continue;
            implPriorities.push_back(parse_impl_name(str));
            if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown &&
-                    str != "cpu:unknown")
+                str != "cpu:unknown")
                IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName();
        }
    }

-    auto ngraphNode = layer->getNode();
-    if (ngraphNode != nullptr) {
-        std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(ngraphNode);
+    if (op != nullptr) {
+        std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(op);
        if (!inputMemoryFormats.empty()) {
            std::istringstream stream(inputMemoryFormats);
            std::string str;
@ -210,7 +262,7 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
            }
        }

-        std::string outputMemoryFormats = ngraph::getMLKDNNOutputMemoryFormats(ngraphNode);
+        std::string outputMemoryFormats = ngraph::getMLKDNNOutputMemoryFormats(op);
        if (!outputMemoryFormats.empty()) {
            std::istringstream stream(outputMemoryFormats);
            std::string str;
@ -223,6 +275,13 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
    }
 }

+MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
+        : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
+          weightCache(w_cache), engine(eng), name(name), typeStr(type),
+          type(TypeFromName(type)), profiling(name) {
+    // TODO [NM]: What about filling inDims and outDims?
+}
+
 void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
    auto edgePtr = edge.lock();
    if (!edgePtr)
@ -669,67 +728,6 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
    selectedPD->getConfig() = rightConfig;
 }

-InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool isGrouped) {
-    auto checkSize = [](size_t dst_size, size_t src_size) {
-        if (dst_size < src_size) {
-            IE_THROW() << "Cannot create internal buffer. Buffer can be overrun.";
-        }
-    };
-    auto * wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(getCnnLayer().get());
-    if (wLayer == nullptr)
-        IE_THROW() << "Cannot get weightable layer for node " << getName() << ".";
-
-    InferenceEngine::Blob::Ptr blb = weights ? wLayer->_weights : wLayer->_biases;
-
-    if (blb == nullptr)
-        IE_THROW() << "Cannot get internal blob layer for node " << getName() << ".";
-
-    auto intLayout = getWeightsLayoutByDims(dims, isGrouped);
-
-    InferenceEngine::TensorDesc desc(blb->getTensorDesc().getPrecision(), dims, intLayout);
-
-    auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
-        size_t offset = blb->byteSize();
-        checkSize(intBuffSize, offset);
-        cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
-        data += blb->byteSize();
-        for (const auto &merged : getMergeWith()) {
-            wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
-            if (wLayer == nullptr)
-                IE_THROW() << "Cannot convert merged weightable layer for node "
-                                   << getName() << ".";
-            blb = weights ? wLayer->_weights : wLayer->_biases;
-
-            if (blb == nullptr)
-                IE_THROW() << "Cannot get internal blob layer for node " << getName() << ".";
-            offset += blb->byteSize();
-            checkSize(intBuffSize, offset);
-            cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
-            data += blb->byteSize();
-        }
-    };
-
-    Blob::Ptr internalBlob;
-    if (blb->getTensorDesc().getPrecision() == Precision::BIN) {
-        internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
-    } else if (blb->getTensorDesc().getPrecision() == Precision::I8) {
-        internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
-    } else if (blb->getTensorDesc().getPrecision() == Precision::I32) {
-        internalBlob = InferenceEngine::make_shared_blob<int32_t>(desc);
-    } else if (blb->getTensorDesc().getPrecision() == Precision::BF16) {
-        internalBlob = InferenceEngine::make_shared_blob<int16_t>(desc);
-    } else {
-        internalBlob = InferenceEngine::make_shared_blob<float>(desc);
-    }
-    internalBlob->allocate();
-    char *data = internalBlob->buffer();
-    size_t intBuffSize = internalBlob->byteSize();
-
-    fillInternalBlob(data, intBuffSize);
-
-    return internalBlob;
-}
-
 void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
    for (size_t i = 0; i < getChildEdges().size(); i++) {
        auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
@ -837,18 +835,17 @@ MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNN
    return constant;
 }

-void MKLDNNNode::addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer) {
-    if (!layer) return;
+void MKLDNNNode::addOriginalLayer(const std::string& layerName) {
+    if (layerName.empty()) return;
    if (originalLayers.empty()) {
-        originalLayers = layer->name;
+        originalLayers = layerName;
    } else {
-        originalLayers += "," + layer->name;
+        originalLayers += "," + layerName;
    }
 }

 void MKLDNNNode::cleanup() {
    internalBlobs.clear();
-    cnnLayer.reset();

    for (auto it : fusedWith) {
        it->cleanup();
@ -1185,18 +1182,54 @@ InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const {
    return runtimePrecision;
 }

-MKLDNNNode* MKLDNNNode::NodesFactory::create(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
+MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
                                             const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) {
    MKLDNNNode *newNode = nullptr;
-
-    std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, Generic, layer, eng, w_cache));
-    if (ol != nullptr && ol->created(extMgr))
-        newNode = ol.release();
-
-    if (newNode == nullptr) {
-        std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, TypeFromName(layer->type), layer, eng, w_cache));
+    std::string errorMessage;
+    try {
+        std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, Generic, op, eng, w_cache));
        if (ol != nullptr && ol->created(extMgr))
            newNode = ol.release();
+    } catch (const InferenceEngine::Exception& ex) {
+        IE_SUPPRESS_DEPRECATED_START
+        if (ex.getStatus() != NOT_IMPLEMENTED) {
+            throw;
+        } else {
+            errorMessage += getExceptionDescWithoutStatus(ex);
+        }
+        IE_SUPPRESS_DEPRECATED_END
+    }
+
+    if (newNode == nullptr) {
+        try {
+            std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, TypeFromName(op->get_type_name()), op, eng, w_cache));
+            if (ol != nullptr && ol->created(extMgr))
+                newNode = ol.release();
+        } catch (const InferenceEngine::Exception& ex) {
+            IE_SUPPRESS_DEPRECATED_START
+            if (ex.getStatus() != NOT_IMPLEMENTED) {
+                throw;
+            } else {
+                errorMessage += getExceptionDescWithoutStatus(ex);
+            }
+            IE_SUPPRESS_DEPRECATED_END
+        }
+    }
+
+    if (newNode == nullptr) {
+        try {
+            std::unique_ptr<MKLDNNNode> ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage));
+            if (ol != nullptr && ol->created(extMgr))
+                newNode = ol.release();
+        } catch (const InferenceEngine::Exception& ex) {
+            IE_SUPPRESS_DEPRECATED_START
+            if (ex.getStatus() != NOT_IMPLEMENTED) {
+                throw;
+            } else {
+                errorMessage += getExceptionDescWithoutStatus(ex);
+            }
+            IE_SUPPRESS_DEPRECATED_END
+        }
    }

    //  WA-start : TI node requires all attributes to construct internal subgpath
@ -1206,8 +1239,75 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const InferenceEngine::CNNLayerPtr&
        ti->setExtManager(extMgr);
    //  WA-end

-    if (!newNode)
-        IE_THROW() << "Unsupported primitive of type: " << layer->type << " name: " << layer->name;
+    if (!newNode) {
+        std::string errorDetails;
+        if (!errorMessage.empty()) {
+            errorDetails = "\nDetails: \n" + errorMessage;
+        }
+        IE_THROW() << "Unsupported operation of type: " << op->get_type_name() << " name: " << op->get_friendly_name() << errorDetails;
+    }

    return newNode;
 }
+
+bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
+    size_t fusingPort = 0;
+    for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) {
+        MKLDNNNode *node = getParentEdgeAt(i)->getParent().get();
+        if (node == nullptr) {
+            IE_THROW() << "Cannot get parent node for " << getName() << " on " << i << " port";
+        }
+        if (node == parentNode) {
+            fusingPort = i;
+            continue;
+        }
+        if (!node->isConstant() || node->getType() != Input) {
+            return false;
+        }
+    }
+
+    const auto isBroadcastableToDataInput = [&]() {
+        const auto dataShape = getParentEdgeAt(fusingPort)->getDims().ToSizeVector();
+        for (size_t i = 0; i < getParentEdges().size(); i++) {
+            if (i == fusingPort)
+                continue;
+            auto weightShape = getParentEdgeAt(i)->getDims().ToSizeVector();
+            // [NM] TODO: PRelu is not broadcastable
+            // WA: [1,32,46,46], [32] -> [1,32,46,46], [1, 32, 1, 1]
+            if (getAlgorithm() == EltwisePrelu && weightShape.size() == 1 && weightShape.back() != 1) {
+                auto newWeightShape = std::vector<size_t>(dataShape.size(), 1);
+                newWeightShape[1] = weightShape[0];
+                weightShape = newWeightShape;
+            }
+            if (!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
+                return false;
+        }
+        return true;
+    };
+
+    const auto isConvertablePowerStatic = [&]() {
+        if (getAlgorithm() == EltwisePowerStatic) {
+            const auto eltwise = dynamic_cast<const MKLDNNEltwiseNode *>(this);
+            if (!eltwise) {
+                IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
+            }
+            return eltwise->getAlpha() == 1.0f;
+        }
+        return false;
+    };
+
+    return (one_of(getAlgorithm(), EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd) && isBroadcastableToDataInput())
+            || isConvertablePowerStatic();
+}
+
+bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
+    if (node->getType() == FakeQuantize) {
+        return node->getAlgorithm() != FQBinarization;
+    } else if (node->getType() == Eltwise) {
+        return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
+                                            EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
+                                            EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
+                      node->canBePerformedAsScaleShift(this);
+    }
+    return false;
+}
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -11,7 +11,6 @@
 #include <cassert>
 #include <algorithm>
 #include <caseless.hpp>
-#include <ie_common.h>
 #include "mkldnn_dims.h"
 #include "mkldnn_memory.h"
 #include "mkldnn_edge.h"
@ -23,13 +22,19 @@
 #include "mkldnn_weights_cache.hpp"
 #include "mkldnn.hpp"
 #include <openvino/itt.hpp>
+#include "utils/ngraph_utils.hpp"
+#include <ngraph/ops.hpp>
 #include <ngraph/node.hpp>
+#include <ie_precision.hpp>
+#include <nodes/common/tensor_desc_creator.h>
+#include "cpu_types.h"

 namespace MKLDNNPlugin {

 using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
 using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;

+// TODO [NM]: move into separate header
 enum Type {
    Unknown,
    Generic,
@ -38,57 +43,54 @@ enum Type {
    Output,
    Convolution,
    Deconvolution,
-    Activation,
-    Depthwise,
    Lrn,
    Pooling,
    FullyConnected,
-    SoftMax,
+    Softmax,
    Split,
    Concatenation,
    Eltwise,
-    Gemm,
+    MatMul,
    Reshape,
    Tile,
-    SimplerNMS,
    ROIAlign,
    ROIPooling,
-    BatchNormalization,
+    PSROIPooling,
+    BatchToSpace,
    DepthToSpace,
-    Flatten,
    Pad,
-    Permute,
+    Transpose,
+    SpaceToBatch,
    SpaceToDepth,
    StridedSlice,
-    Copy,
    MemoryOutput,
    MemoryInput,
    RNNCell,
    RNNSeq,
-    Quantize,
+    FakeQuantize,
    BinaryConvolution,
    DeformableConvolution,
    TensorIterator,
    Convert,
    MVN,
-    Normalize,
+    NormalizeL2,
    ScatterUpdate,
    ScatterElementsUpdate,
    ScatterNDUpdate,
    Interpolate,
-    ReduceAnd,
-    ReduceL1,
-    ReduceL2,
-    ReduceLogSum,
-    ReduceLogSumExp,
-    ReduceMax,
-    ReduceMean,
-    ReduceMin,
-    ReduceOr,
-    ReduceProd,
-    ReduceSum,
-    ReduceSumSquare,
-    Roll
+    Reduce,
+    Broadcast,
+    EmbeddingSegmentsSum,
+    EmbeddingBagPackedSum,
+    EmbeddingBagOffsetsSum,
+    Gather,
+    GatherElements,
+    GatherND,
+    OneHot,
+    RegionYolo,
+    Select,
+    Roll,
+    Reference,
 };

 Type TypeFromName(const std::string type);
@ -107,50 +109,44 @@ static std::string NameFromType(Type type) {
            return "Convolution";
        case Deconvolution:
            return "Deconvolution";
-        case Activation:
-            return "Activation";
        case Lrn:
            return "Lrn";
        case Pooling:
            return "Pooling";
        case FullyConnected:
            return "FullyConnected";
-        case Gemm:
-            return "Gemm";
-        case SoftMax:
-            return "SoftMax";
+        case MatMul:
+            return "MatMul";
+        case Softmax:
+            return "Softmax";
        case Split:
            return "Split";
        case Concatenation:
            return "Concatenation";
-        case Depthwise:
-            return "Depthwise";
+        case StridedSlice:
+            return "StridedSlice";
        case Reshape:
            return "Reshape";
        case Tile:
            return "Tile";
-        case SimplerNMS:
-            return "SimplerNMS";
        case ROIAlign:
            return "ROIAlign";
        case ROIPooling:
            return "ROIPooling";
-        case BatchNormalization:
-            return "BatchNormalization";
+        case PSROIPooling:
+            return "PSROIPooling";
        case DepthToSpace:
            return "DepthToSpace";
-        case Flatten:
-            return "Flatten";
+        case BatchToSpace:
+            return "BatchToSpace";
        case Pad:
            return "Pad";
-        case Permute:
-            return "Permute";
+        case Transpose:
+            return "Transpose";
        case SpaceToDepth:
            return "SpaceToDepth";
-        case StridedSlice:
-            return "StridedSlice";
-        case Copy:
-            return "Copy";
+        case SpaceToBatch:
+            return "SpaceToBatch";
        case MemoryOutput:
            return "MemoryOutput";
        case MemoryInput:
@ -161,8 +157,8 @@ static std::string NameFromType(Type type) {
            return "RNNCell";
        case Eltwise:
            return "Eltwise";
-        case Quantize:
-            return "Quantize";
+        case FakeQuantize:
+            return "FakeQuantize";
        case BinaryConvolution:
            return "BinaryConvolution";
        case DeformableConvolution:
@ -173,8 +169,8 @@ static std::string NameFromType(Type type) {
            return "TensorIterator";
        case Convert:
            return "Convert";
-        case Normalize:
-            return "Normalize";
+        case NormalizeL2:
+            return "NormalizeL2";
        case ScatterUpdate:
            return "ScatterUpdate";
        case ScatterElementsUpdate:
@ -183,30 +179,28 @@ static std::string NameFromType(Type type) {
            return "ScatterNDUpdate";
        case Interpolate:
            return "Interpolate";
-        case ReduceAnd:
-            return "ReduceAnd";
-        case ReduceL1:
-            return "ReduceL1";
-        case ReduceL2:
-            return "ReduceL2";
-        case ReduceLogSum:
-            return "ReduceLogSum";
-        case ReduceLogSumExp:
-            return "ReduceLogSumExp";
-        case ReduceMax:
-            return "ReduceMax";
-        case ReduceMean:
-            return "ReduceMean";
-        case ReduceMin:
-            return "ReduceMin";
-        case ReduceOr:
-            return "ReduceOr";
-        case ReduceProd:
-            return "ReduceProd";
-        case ReduceSum:
-            return "ReduceSum";
-        case ReduceSumSquare:
-            return "ReduceSumSquare";
+        case Reduce:
+            return "Reduce";
+        case Broadcast:
+            return "Broadcast";
+        case EmbeddingSegmentsSum:
+            return "EmbeddingSegmentsSum";
+        case EmbeddingBagPackedSum:
+            return "EmbeddingBagPackedSum";
+        case EmbeddingBagOffsetsSum:
+            return "EmbeddingBagPackedSum";
+        case Gather:
+            return "Gather";
+        case GatherElements:
+            return "GatherElements";
+        case GatherND:
+            return "GatherND";
+        case OneHot:
+            return "OneHot";
+        case RegionYolo:
+            return "RegionYolo";
+        case Select:
+            return "Select";
        case Roll:
            return "Roll";
        default:
@ -269,6 +263,31 @@ private:
    std::vector<mkldnn::memory::format_tag> outputLayouts;
 };

+class DataConfigurator {
+public:
+    DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc, const InferenceEngine::SizeVector& shape,
+                     bool constant = false, int inplace = -1) :
+            tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape(shape), constant(constant), inplace(inplace) {}
+
+    DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED,
+                     bool constant = false, int inplace = -1) :
+            tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape({}), constant(constant), inplace(inplace) {}
+
+    const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator;
+    const InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED;
+    const InferenceEngine::SizeVector shape;
+    const bool constant = false;
+    const int inplace = -1;
+private:
+    static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) {
+        auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
+        if (creators.find(tensorDescType) == creators.end()) {
+            IE_THROW() << "Cannot find tensor descriptor creator";
+        }
+        return creators.at(tensorDescType);
+    }
+};
+
 class MKLDNNNode : public InferenceEngine::details::no_copy {
 public:
    template<typename T, int N>
@ -343,8 +362,35 @@ public:

    bool isFusedWith(Type type) const;

-    void fuseWith(const MKLDNNNodePtr &fuse) {
-        fusedWith.push_back(fuse);
+    void addFusedNode(const MKLDNNNodePtr &fusingNode) {
+        fusedWith.push_back(fusingNode);
+    }
+
+    virtual void fuseInto(MKLDNNNodePtr& parentNode) {
+        // The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
+        for (int i = 0; i < getParentEdges().size(); i++) {
+            if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) {
+                setFusingPort(i);
+                break;
+            }
+        }
+
+        auto parentFusedNodes = parentNode->getFusedWith();
+        if (getFusingPort() < 0 && !parentFusedNodes.empty()) {
+            for (int i = 0; i < getParentEdges().size(); i++) {
+                if (getParentEdgesAtPort(i)[0]->getParent().get() == parentFusedNodes[parentFusedNodes.size() - 1].get()) {
+                    setFusingPort(i);
+                    break;
+                }
+            }
+        }
+
+        if (getFusingPort() == -1) {
+            IE_THROW() << "Cannot determine fusing port between nodes: " << parentNode->getName() << " and " << getName();
+        }
+
+        parentNode->addFusedNode(getParentEdgesAtPort(getFusingPort())[0]->getChild());
+        parentNode->addOriginalLayer(getOriginalLayers());
    }

    void clearFusedWith() {
@ -355,8 +401,6 @@ public:
        mergedWith.push_back(merge);
    }

-    void addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer);
-
    const std::vector <MKLDNNNodePtr> &getMergeWith() {
        return mergedWith;
    }
@ -365,10 +409,20 @@ public:
        return fusedWith;
    }

+    int getFusingPort() const {
+        return fusingPort;
+    }
+
+    void setFusingPort(int fusingPort) {
+        this->fusingPort = fusingPort;
+    }
+
    const std::string getName() const {
        return name;
    }

+    void addOriginalLayer(const std::string& layerName);
+
    const std::string getOriginalLayers() const {
        return originalLayers;
    }
@ -377,10 +431,6 @@ public:
        return type;
    }

-    const InferenceEngine::CNNLayerPtr &getCnnLayer() const {
-        return cnnLayer;
-    }
-
    const std::vector<PrimitiveDescInfo>& getSupportedPrimitiveDescriptors() const {
        return supportedPrimitiveDescriptors;
    }
@ -493,15 +543,6 @@ public:
        IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
    }

-    static void invertVectorCopyUtoI(const InferenceEngine::PropertyVector<unsigned int>& src, std::vector<ptrdiff_t>& dst) {
-        dst.clear();
-        for (int i = 1; i <= src.size(); i++) {
-            dst.push_back(static_cast<ptrdiff_t>(src[src.size() - i]));
-        }
-    }
-
-    std::vector<MKLDNNDims> inDims;
-
    int getExecIndex() const {
        return execIndex;
    }
@ -510,6 +551,10 @@ public:
        return typeStr;
    }

+    void setTypeStr(const std::string &typeStr) {
+        this->typeStr = typeStr;
+    }
+
    virtual size_t descInputNumbers(MKLDNNDescriptor desc) {
        return desc.inputNumbers();
    }
@ -532,9 +577,72 @@ public:
     */
    virtual InferenceEngine::Precision getRuntimePrecision() const;

+    const std::vector<InferenceEngine::Precision>& getOriginalInputPrecisions() const {
+        return originalInputPrecisions;
+    }
+    const std::vector<InferenceEngine::Precision>& getOriginalOutputPrecisions() const {
+        return originalOutputPrecisions;
+    }
+
+    InferenceEngine::Precision getOriginalInputPrecisionAtPort(size_t port) const {
+        if (originalInputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect input port number for node " << getName();
+        }
+        return originalInputPrecisions[port];
+    }
+    InferenceEngine::Precision getOriginalOutputPrecisionAtPort(size_t port) const {
+        if (originalOutputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect output port number for node " << getName();
+        }
+        return originalOutputPrecisions[port];
+    }
+
+    void setOriginalInputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
+        if (originalInputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect input port number for node " << getName();
+        }
+        originalInputPrecisions[port] = precision;
+    }
+
+    void setOriginalOutputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
+        if (originalOutputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect output port number for node " << getName();
+        }
+        originalOutputPrecisions[port] = precision;
+    }
+
+    void addOriginalInputPrecision(InferenceEngine::Precision precision) {
+        originalInputPrecisions.push_back(precision);
+    }
+
+    void addOriginalOutputPrecision(InferenceEngine::Precision precision) {
+        originalOutputPrecisions.push_back(precision);
+    }
+
+    size_t getOriginalInputsNumber() const {
+        return originalInputPrecisions.size();
+    }
+
+    size_t getOriginalOutputsNumber() const {
+        return originalOutputPrecisions.size();
+    }
+
+    Algorithm getAlgorithm() const {
+        return algorithm;
+    }
+
+    void setAlgorithm(Algorithm alg) {
+        algorithm = alg;
+    }
+
+    virtual bool canFuse(const MKLDNNNodePtr& node) const {
+        return false;
+    }
+
 protected:
-    // TODO: It is necessary only in order to avoid modifications of cnnLayers and original topology
-    std::vector<MKLDNNDims> outDims;
+    bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const;
+    bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
+
    void setType(Type type) {
        this->type = type;
    }
@ -559,6 +667,9 @@ protected:
            GetPrimitiveMemoryFormatFunc;
    std::vector<GetPrimitiveMemoryFormatFunc> internalBlobDesc;

+    std::vector<MKLDNNDims> inDims;
+    std::vector<MKLDNNDims> outDims;
+
    std::vector <MKLDNNNodePtr> fusedWith;
    std::vector <MKLDNNNodePtr> mergedWith;
    std::vector <impl_desc_type> implPriorities;
@ -567,7 +678,8 @@ protected:

    std::string originalLayers;  // contains names of the original layers separated by comma

-    MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
+    MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
+    MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);

    int selectedPrimitiveDescriptorIndex = -1;
    bool permanent = false;
@ -589,6 +701,8 @@ protected:
    InferenceEngine::Blob::Ptr ext_scales;
    MKLDNNWeightsSharing::Ptr weightCache;

+    Algorithm algorithm = Algorithm::Undefined;
+
    friend class MKLDNNEdge;
    friend class MKLDNNGraph;
    friend class MKLDNNGraphOptimizer;
@ -604,8 +718,6 @@ protected:
    virtual std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const MKLDNNDims& dims) const;
    int batchToProcess();

-    InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool is_grouped = false);
-
    InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped);

    /**
@ -620,15 +732,62 @@ protected:
     */
    virtual std::vector<InferenceEngine::Precision> getOutputPrecisions() const;

+    void addSupportedPrimDesc(const std::vector<DataConfigurator>& inDataConfigurators,
+                              const std::vector<DataConfigurator>& outDataConfigurators,
+                              impl_desc_type implType,
+                              bool dynBatchSupport = false) {
+        auto fill_port = [] (const DataConfigurator& dataConfigurator, const InferenceEngine::SizeVector& dims,
+                             InferenceEngine::Precision prc, std::vector<InferenceEngine::DataConfig>& port) -> bool {
+            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator.
+            // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank.
+            if (dims.size() < dataConfigurator.tensorDescCreator->getMinimalRank())
+                return false;
+
+            InferenceEngine::DataConfig dataConfig;
+            dataConfig.inPlace = dataConfigurator.inplace;
+            dataConfig.constant = dataConfigurator.constant;
+
+            dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(prc, dims);
+
+            port.push_back(dataConfig);
+
+            return true;
+        };
+
+        InferenceEngine::LayerConfig config;
+        for (size_t i = 0; i < inDataConfigurators.size(); i++) {
+            auto dims = inDataConfigurators[i].shape.empty() ? getParentEdgesAtPort(i)[0]->getDims().ToSizeVector() : inDataConfigurators[i].shape;
+            auto prc = inDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i)
+                                                                                             : inDataConfigurators[i].prc;
+            if (!fill_port(inDataConfigurators[i], dims, prc, config.inConfs))
+                return;
+        }
+
+        for (size_t i = 0; i < outDataConfigurators.size(); i++) {
+            auto dims = outDataConfigurators[i].shape.empty() ? getChildEdgesAtPort(i)[0]->getDims().ToSizeVector() : outDataConfigurators[i].shape;
+            auto prc = outDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i)
+                                                                                              : outDataConfigurators[i].prc;
+            if (!fill_port(outDataConfigurators[i], dims, prc, config.outConfs))
+                return;
+        }
+
+        config.dynBatchSupport = dynBatchSupport;
+        supportedPrimitiveDescriptors.push_back({config, implType});
+    }
+
 private:
    std::vector<MKLDNNEdgeWeakPtr> parentEdges;
    std::vector<MKLDNNEdgeWeakPtr> childEdges;

-    InferenceEngine::CNNLayerPtr cnnLayer;
+    std::vector<InferenceEngine::Precision> originalInputPrecisions;
+    std::vector<InferenceEngine::Precision> originalOutputPrecisions;
+
+    int fusingPort;
+
    mkldnn::engine engine;

    std::string name;
-    const std::string typeStr;
+    std::string typeStr;
    Type type;
    int execIndex = -1;

@ -660,21 +819,21 @@ private:
 };

 class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
-                                            MKLDNNNode*(const InferenceEngine::CNNLayerPtr&,
+                                            MKLDNNNode*(const std::shared_ptr<ngraph::Node>& op,
                                                        const mkldnn::engine &,
                                                        MKLDNNWeightsSharing::Ptr &)> {
 public:
    NodesFactory()
        : Factory("NodesFactory") {}

-    MKLDNNNode* create(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
+    MKLDNNNode* create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
                       const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache);
 };

 template<typename MKLDNNNodeType>
 struct MKLDNNNodeImpl : public MKLDNNNodeType {
-    MKLDNNNodeImpl(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNodeType(layer, eng, cache) {
+    MKLDNNNodeImpl(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNodeType(op, eng, cache) {
        MKLDNNNodeType::perfCounters().template buildClassCounters<MKLDNNNodeType>(NameFromType(MKLDNNNodeType::getType()));
    }
 };
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -8,7 +8,6 @@
 #include "mkldnn_weights_cache.hpp"
 #include "mkldnn_itt.h"

-#include <legacy/net_pass.h>
 #include <threading/ie_executor_manager.hpp>
 #include <memory>
 #include <ie_plugin_config.hpp>
@ -16,19 +15,8 @@
 #include <tuple>
 #include <ie_system_conf.h>
 #include <nodes/list.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include <legacy/graph_transformer.h>
 #include <ie_ngraph_utils.hpp>

-#include <legacy/convert_function_to_cnn_network.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_nms_5_to_legacy.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.hpp>
-#include <legacy/ngraph_ops/fully_connected.hpp>
-
 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>

@ -62,6 +50,8 @@
 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
 #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
+#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
+#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
@ -86,8 +76,13 @@
 #include <low_precision/multiply_to_group_convolution.hpp>
 #include <low_precision/network_helper.hpp>

+#include <ie_algorithm.hpp>
+
+#include <ngraph/pass/visualize_tree.hpp>
+
 #include "nodes/mkldnn_mvn_node.h"
-#include "nodes/mkldnn_quantize_node.h"
+#include "nodes/mkldnn_fake_quantize_node.h"
+#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"

 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 # ifdef _WIN32
@ -127,8 +122,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
    }

    // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
-    manager.register_pass<ngraph::pass::ConvertPriorBox>();
-    manager.register_pass<ngraph::pass::ConvertNMS5ToLegacyMatcher>();
    manager.register_pass<ngraph::pass::CommonOptimizations>();
    manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
    manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
@ -141,6 +134,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
    manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
    manager.register_pass<ngraph::pass::GRUCellDecomposition>();
    manager.register_pass<ngraph::pass::RNNCellDecomposition>();
+    manager.register_pass<ngraph::pass::ConvertNMS1ToNMS5>();
+    manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
+    manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
+    manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+    manager.register_pass<ngraph::pass::ConstantFolding>();

    std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list{
            {ngraph::element::i64,     ngraph::element::i32},
@ -155,6 +153,10 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
            {ngraph::element::u4, ngraph::element::u8},
    };

+    // In case BF16 is not supported by the target CPU we explicitly convert it to FP32
+    if (!with_cpu_x86_avx512_core())
+        convert_precision_list.push_back({ngraph::element::bf16, ngraph::element::f32});
+
    for (auto &precision : convert_precision_list) {
        manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
    }
@ -171,12 +173,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
                       node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
            });

-    // Disable FC reshaping for 3D case
-    pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(
-            [](const_node_ptr &node) -> bool {
-                return node->input_value(0).get_shape().size() == 3ul;
-            });
-
    pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
                              ngraph::pass::ConvertSpaceToBatch>(
            [](const_node_ptr &node) -> bool {
@ -260,7 +256,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {

    pass_config->set_callback<ngraph::pass::MVN6Decomposition>(
            [](const_node_ptr &node) -> bool {
-                return MKLDNNMVNNode::checkAxesSuitability(node);
+                std::string errorMessage;
+                return MKLDNNMVNNode::isSupportedOperation(node, errorMessage);
            });

    pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
@ -279,7 +276,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
    pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
    pass_config->disable<ngraph::pass::ConvertMod>();
    pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
-    pass_config->disable<ngraph::pass::ConvertInterpolateToInterpOrResampleMatcher>();
    pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
    pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();

@ -325,57 +321,35 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
        transformer.transform(nGraphFunc);
    }

-    bool has_fake_quantize = ::ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
+    ngraph::pass::Manager postLPTPassManager;
+    postLPTPassManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
+    postLPTPassManager.register_pass<ngraph::pass::UnrollTensorIterator>();

-    ngraph::pass::Manager legacyManager;
-
-    legacyManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
-    legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
-    legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
-    // not legacy actually, but it should be the last transformation in the transformation pipeline
-    legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
-
-    auto legacyPassConfig = legacyManager.get_pass_config();
-    legacyPassConfig->disable<ngraph::pass::ConvertStridedSliceToCropMatcher>();
-
-    legacyPassConfig->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr &node) -> bool {
-        return !MKLDNNQuantizeNode::isNeedToDecompose(node);
+    postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr &node) -> bool {
+        std::string errMsg;
+        return MKLDNNFakeQuantizeNode::isSupportedOperation(node, errMsg);
    });
-
-    legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
+    postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
        if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
            auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
            auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
            bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
            if (add_op && constant && is_dequantization) {
                return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
-                    ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
-                    ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
+                       ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
+                       ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
            }
        }
        return false;
    });
-
-    legacyPassConfig->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
+    postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
        // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
        return node->get_rt_info().count("UNROLL_TI") == 0;
    });

-    legacyManager.run_passes(nGraphFunc);
+    postLPTPassManager.run_passes(nGraphFunc);

-    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
-
-    clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize));
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConvertIOPrecision");
-
-    // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
-    // so we need to apply additional precision conversion but only for inputs and outputs
-    for (auto & precision : convert_precision_list) {
-        NetPass::ConvertIOPrecision(clonedNetwork,
-            InferenceEngine::details::convertPrecision(precision.first),
-            InferenceEngine::details::convertPrecision(precision.second));
-    }
+    ConvertToCPUSpecificOpset(nGraphFunc);
 }

 InferenceEngine::ExecutableNetworkInternal::Ptr
@ -411,34 +385,9 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
        conf.batchLimit = static_cast<int>(network.getBatchSize());
    }

-    CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(network);
+    CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);

-    bool is_transformed = false;
-    if (clonedNetwork.getFunction()) {
-        Transformation(clonedNetwork, conf);
-        is_transformed = true;
-    }
-    IE_SUPPRESS_DEPRECATED_START
-    auto icnnnet = static_cast<ICNNNetwork::Ptr>(clonedNetwork);
-    IE_SUPPRESS_DEPRECATED_END
-    auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
-    if (implNetwork) {
-        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
-        // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
-        ConstTransformer transformator(implNetwork.get());
-        transformator.fullTrim();
-        if (!is_transformed) {
-            InferenceEngine::CNNNetwork implNetworkWrapper(implNetwork);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::I64, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::U64, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::U32, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP64, Precision::FP32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP16, Precision::FP32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::BOOL, Precision::U8);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::U16, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::I16, Precision::I32);
-        }
-    }
+    Transformation(clonedNetwork, conf);

    return std::make_shared<MKLDNNExecNetwork>(clonedNetwork, conf, extensionManager, weightsSharing);
 }
@ -540,6 +489,7 @@ void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {

 QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) const {
    QueryNetworkResult res;
+
    MKLDNNWeightsSharing::Ptr fake_w_cache;
    auto function = network.getFunction();
    if (function != nullptr) {
@ -556,21 +506,22 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
            conf.batchLimit = static_cast<int>(network.getBatchSize());
        }

-        auto clonedNetwork = InferenceEngine::cloneNetwork(network);
+        auto clonedNetwork = InferenceEngine::details::cloneNetwork(network);
+        auto ops = clonedNetwork.getFunction()->get_ordered_ops();
        Transformation(clonedNetwork, conf);
        std::unordered_set<std::string> supported;
        std::unordered_set<std::string> unsupported;
-        for (details::CNNNetworkIterator itLayer{clonedNetwork}; itLayer != details::CNNNetworkIterator(); itLayer++) {
+        for (auto op : ops) {
            auto layerIsSupported = [&] {
                std::unique_ptr<MKLDNNNode> ptr;
                try {
-                    ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
+                    ptr.reset(MKLDNNNode::factory().create(op, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
                } catch (InferenceEngine::Exception&) {
-                     return false;
+                    return false;
                }
                return true;
            } ();
-            for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
+            for (auto&& fusedLayerName : ngraph::getFusedNamesVector(op)) {
                if (InferenceEngine::details::contains(originalOps, fusedLayerName)) {
                    if (layerIsSupported) {
                        supported.emplace(fusedLayerName);
@ -614,17 +565,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
            res.supportedLayersMap.emplace(layerName, GetName());
        }
    } else {
-        details::CNNNetworkIterator i(network);
-        while (i != details::CNNNetworkIterator()) {
-            try {
-                mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
-                // if we can create and have not thrown exception, then layer is supported
-                std::unique_ptr <MKLDNNNode>(MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache));
-                res.supportedLayersMap.insert({ (*i)->name, GetName() });
-            } catch (InferenceEngine::Exception&) {
-            }
-            i++;
-        }
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
    }

    return res;
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.cpp
@ -0,0 +1,98 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_broadcast_to_tiles.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertBroadcastToTiles, "ConvertBroadcastToTiles", 0);
+
+MKLDNNPlugin::ConvertBroadcastToTiles::ConvertBroadcastToTiles() {
+    auto broadcast = ngraph::pattern::wrap_type<ngraph::opset1::Broadcast>();
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto broadcast = std::dynamic_pointer_cast<ngraph::opset1::Broadcast>(m.get_match_root());
+
+        if (!broadcast) {
+            return false;
+        }
+
+        auto data_node = broadcast->input_value(0);
+        if (data_node.get_partial_shape().is_dynamic()) {
+            return false;
+        }
+
+        auto shape_node = std::dynamic_pointer_cast<ngraph::opset1::Constant>(broadcast->input_value(1).get_node_shared_ptr());
+        auto axes_node = std::dynamic_pointer_cast<ngraph::opset1::Constant>(broadcast->input_value(2).get_node_shared_ptr());
+        if (!shape_node || !axes_node) return false;
+
+        auto output_shape = shape_node->cast_vector<int64_t>();
+        auto input_shape = data_node.get_shape();
+        int64_t cur_dim_id = output_shape.size() - 1;
+        size_t dims_count = output_shape.size();
+
+        auto last_node = data_node;
+
+        ngraph::NodeVector new_ops;
+
+        // In case if input_shape and output_shape differ we insert Reshape to align shapes
+        if (input_shape.size() != dims_count) {
+            if (input_shape.size() > dims_count) {
+                return false;
+            }
+            ngraph::Shape shape;
+            auto broadcast_type = broadcast->get_broadcast_spec();
+            if (broadcast_type == ngraph::op::AutoBroadcastType::NUMPY) {
+                shape = input_shape;
+                for (size_t i = 0; i < (dims_count - input_shape.size()); ++i) {
+                    shape.insert(shape.begin(), 1);
+                }
+            } else if (broadcast_type == ngraph::op::AutoBroadcastType::NONE) {
+                auto axes = axes_node->cast_vector<int64_t>();
+                shape.assign(output_shape.size(), 1);
+                for (size_t i = 0; i < input_shape.size(); ++i) {
+                    shape[axes[i]] = input_shape[i];
+                }
+            } else {
+                return false;
+            }
+            auto shape_const = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{shape.size()}, shape);
+            auto reshape = std::make_shared<ngraph::opset1::Reshape>(data_node, shape_const, true);
+            new_ops.push_back(reshape);
+            last_node = reshape;
+            input_shape = shape;
+        }
+
+        std::vector<int64_t> dims(dims_count, 1);
+        auto input_shape_it = input_shape.rbegin();
+        auto output_shape_it = output_shape.rbegin();
+        while (output_shape_it != output_shape.rend() && input_shape_it != input_shape.rend()) {
+            int64_t in_dim = *input_shape_it, out_dim = *output_shape_it;
+            if (in_dim != out_dim) {
+                if (in_dim != 1) {
+                    return false;
+                }
+                dims[cur_dim_id] = out_dim;
+            }
+
+            --cur_dim_id;
+            ++output_shape_it;
+            ++input_shape_it;
+        }
+
+        auto const_node = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{dims_count}, dims);
+        auto tile = register_new_node<ngraph::opset1::Tile>(last_node, const_node);
+        new_ops.push_back(tile);
+        tile->set_friendly_name(broadcast->get_friendly_name());
+
+        ngraph::copy_runtime_info(broadcast, new_ops);
+        ngraph::replace_node(broadcast, tile);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(broadcast, "ConvertBroadcastToTiles");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertBroadcastToTiles: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertBroadcastToTiles();
+};
+
+} // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp
@ -0,0 +1,251 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_matmul_to_fc_or_gemm.hpp"
+#include "op/fully_connected.hpp"
+#include <numeric>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/utils/utils.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertMatMulToFC, "ConvertMatMulToFC", 0);
+
+MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset1::MatMul>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                      ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
+                                                                      ngraph::pattern::has_static_shape());
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto matmul = std::dynamic_pointer_cast<ngraph::opset1::MatMul>(m.get_match_root());
+        if (!matmul) {
+            return false;
+        }
+
+        auto input_a = matmul->input(0).get_source_output();
+        auto input_b = matmul->input(1).get_source_output();
+
+        auto shape_a = input_a.get_shape();
+        auto shape_b = input_b.get_shape();
+        auto output_shape = matmul->get_shape();
+
+        // Transformation to FC is not supported for 1D second input
+        if (shape_b.size() == 1) {
+            return false;
+        }
+
+        /*
+         *  get_aligned_shapes function align two input shapes to have the same size and
+         *  the same batch dimensions (last two dimensions are not comparable).
+         *  It also checks that dimensions are compatible so in case with two shapes
+         *  for example: [2, 32, 64] [3, 64, 64] it will raise an exception.
+         */
+
+        auto get_aligned_shapes = [shape_a, shape_b, &matmul]() -> std::pair<ngraph::Shape, ngraph::Shape> {
+            ngraph::Shape shape_a_aligned(shape_a), shape_b_aligned(shape_b);
+            size_t max_size = std::max(shape_a_aligned.size(), shape_b_aligned.size());
+            for (size_t i = 0, cnt = max_size - shape_a_aligned.size(); i < cnt; ++i)
+                shape_a_aligned.insert(shape_a_aligned.begin(), 1);
+            for (size_t i = 0, cnt = max_size - shape_b_aligned.size(); i < cnt; ++i)
+                shape_b_aligned.insert(shape_b_aligned.begin(), 1);
+
+            if (matmul->get_transpose_a() && shape_a.size() != 1) {
+                std::swap(*(shape_a_aligned.end() - 1), *(shape_a_aligned.end() - 2));
+            }
+            if (matmul->get_transpose_b()) {
+                std::swap(*(shape_b_aligned.end() - 1), *(shape_b_aligned.end() - 2));
+            }
+
+            for (size_t i = 0; i < max_size - 2; ++i) {
+                if (shape_a_aligned[i] != shape_b_aligned[i] && shape_a_aligned[i] > 1 && shape_b_aligned[i] > 1) {
+                    std::ostringstream stream;
+                    stream << "Shapes can't be aligned: " << shape_a_aligned << " " << shape_b_aligned;
+                    throw ngraph::ngraph_error(stream.str());
+                }
+                size_t max_value = std::max(shape_a_aligned[i], shape_b_aligned[i]);
+                shape_a_aligned[i] = shape_b_aligned[i] = max_value;
+            }
+
+            return {shape_a_aligned, shape_b_aligned};
+        };
+
+        /*
+         *  create_transpose function return Transpose operation to replace transpose_a or transpose_b
+         *  arguments with an operation. In other words in this function we create Transpose operation
+         *  with order length equal to output_shape length of given node and fill order with increasing
+         *  sequence starting from 0 and replace last two dimension. For example for length = 4  the
+         *  order will be [0, 1, 3, 2] that emulates transpose_a or transpose_b attribute.
+         */
+
+        auto create_transpose = [this](ngraph::Output<ngraph::Node> node, const std::string& transpose_name) -> std::shared_ptr<ngraph::Node> {
+            ngraph::Shape output_shape = node.get_node_shared_ptr()->get_shape();
+
+            std::vector<size_t> transpose_order(output_shape.size());
+            std::iota(transpose_order.begin(), transpose_order.end(), 0);
+            std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2));
+
+            auto transpose = ngraph::pass::MatcherPass::register_new_node<ngraph::opset1::Transpose>(
+                    node, ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{transpose_order.size()}, transpose_order));
+            transpose->set_friendly_name(transpose_name);
+            return transpose;
+        };
+
+        // fc_input_a and fc_input_b - are the final inputs that will be set to FullyConnected of GemmIE operations.
+        // So in case of adding new operations that takes matmul inputs we need keep update fc_input_a and
+        // fc_input_b updated.
+        auto fc_input_a = input_a, fc_input_b = input_b;
+
+        // vector of new nGraph operations
+        ngraph::NodeVector new_ops;
+
+        // Check that if second inputs is Constant operation and it's shape without ones dimensions has length <= 2
+        // we replace MatMul with FullyConnected operation.
+        // Otherwise we replace MatMul with Gemm.
+        if ((std::dynamic_pointer_cast<ngraph::opset1::Constant>(fc_input_b.get_node_shared_ptr()) ||
+             std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(fc_input_b.get_node_shared_ptr())) &&
+             std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2) {
+            ngraph::Shape shape_a_aligned, shape_b_aligned;
+            std::tie(shape_a_aligned, shape_b_aligned) = get_aligned_shapes();
+
+            if (shape_a_aligned.size() < 2 || shape_b_aligned.size() < 2) {
+                throw ngraph::ngraph_error("MatMul " + matmul->get_friendly_name() + " shapes are inconsistent.");
+            }
+
+            // Transferring from MatMul representation: [B, I, K] * [B, K, O] = [B, I, O]
+            // to FullyConnected representation: [I, K] * [K, O] = [I, O]
+            size_t K = *(shape_a_aligned.end() - 1);
+            ngraph::Shape B(shape_a_aligned.begin(), shape_a_aligned.end() - 2);
+
+            // Weights normalization
+            if (!matmul->get_transpose_b()) {
+                fc_input_b = create_transpose(fc_input_b, matmul->get_friendly_name() + "/transpose_b");
+                new_ops.push_back(fc_input_b.get_node_shared_ptr());
+            }
+
+            if (shape_b.size() != 2) {
+                auto reshape_shape =
+                        ngraph::opset1::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{2}, {-1ll, static_cast<int64_t>(K)});
+                fc_input_b = std::make_shared<ngraph::opset1::Reshape>(fc_input_b, reshape_shape, true);
+                new_ops.push_back(fc_input_b.get_node_shared_ptr());
+            }
+
+            // Input normalization
+            if (matmul->get_transpose_a() && shape_a.size() != 1) {
+                fc_input_a = create_transpose(fc_input_a, matmul->get_friendly_name() + "/transpose_a");
+                new_ops.push_back(fc_input_a.get_node_shared_ptr());
+            }
+
+            // Create FullyConnected
+            auto fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(fc_input_a, fc_input_b, output_shape, matmul->output(0).get_element_type());
+            fc->set_friendly_name(matmul->get_friendly_name());
+            new_ops.push_back(fc);
+
+            ngraph::copy_runtime_info(matmul, new_ops);
+            ngraph::replace_node(matmul, fc);
+            return true;
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatMulToFC");
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertMatMulToGemm, "ConvertMatMulToGemm", 0);
+
+MKLDNNPlugin::ConvertMatMulToGemm::ConvertMatMulToGemm() {
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset1::MatMul>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                      ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
+                                                                      ngraph::pattern::has_static_shape());
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto matmul = std::dynamic_pointer_cast<ngraph::opset1::MatMul>(m.get_match_root());
+        if (!matmul) {
+            return false;
+        }
+
+        auto input_a = matmul->input(0).get_source_output();
+        auto input_b = matmul->input(1).get_source_output();
+
+        auto shape_a = input_a.get_shape();
+        auto shape_b = input_b.get_shape();
+        auto output_shape = matmul->get_shape();
+
+        auto fc_input_a = input_a, fc_input_b = input_b;
+        ngraph::NodeVector new_ops;
+
+        if (shape_a.size() == 1) {
+            // If the first input is 1D tensor, it is unsqueezed to 2D tensor (row vector)
+            // by adding axes with size 1 at ROW_INDEX_DIM, to the left of the shape.
+            // For example {S} will be reshaped to {1, S}.
+            fc_input_a = std::make_shared<ngraph::opset1::Unsqueeze>(fc_input_a,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}));
+            shape_a = fc_input_a.get_shape();
+            new_ops.push_back(fc_input_a.get_node_shared_ptr());
+            // For 1D inputs transpose flag is expected to always act like `false`
+            matmul->set_transpose_a(false);
+        }
+        if (shape_b.size() == 1) {
+            // If the second input is 1D tensor, it is unsqueezed to 2D tensor (column vector)
+            // by adding axes with size 1 at COL_INDEX_DIM, to the right of the shape.
+            // For example {S} will be reshaped to {S, 1}.
+            fc_input_b = std::make_shared<ngraph::opset1::Unsqueeze>(fc_input_b,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}));
+            shape_b = fc_input_b.get_shape();
+            new_ops.push_back(fc_input_b.get_node_shared_ptr());
+            // For 1D inputs transpose flag is expected to always act like `false`
+            matmul->set_transpose_b(false);
+        }
+
+        // WA for IE that Gemm must have inputs with the same length.
+        // If ranks of input arguments are still different,
+        // the smaller tensor is unsqueezed from the left side of the shape
+        // by necessary number of axes to make both shapes of the same rank.
+        if (shape_a.size() < shape_b.size()) {
+            // Reshape first input (fc_input_a)
+            ngraph::Shape reshape_shape(shape_b.size() - shape_a.size(), 1);
+            reshape_shape.insert(reshape_shape.end(), shape_a.begin(), shape_a.end());
+            fc_input_a = ngraph::op::util::reshapeTo(fc_input_a, reshape_shape);
+            new_ops.push_back(fc_input_a.get_node_shared_ptr());
+        } else if (shape_b.size() < shape_a.size()) {
+            // Reshape second input (fc_input_b)
+            ngraph::Shape reshape_shape(shape_a.size() - shape_b.size(), 1);
+            reshape_shape.insert(reshape_shape.end(), shape_b.begin(), shape_b.end());
+            fc_input_b = ngraph::op::util::reshapeTo(fc_input_b, reshape_shape);
+            new_ops.push_back(fc_input_b.get_node_shared_ptr());
+        }
+
+        auto gemm = matmul->copy_with_new_inputs({ fc_input_a, fc_input_b });
+        new_ops.push_back(gemm);
+
+        if (gemm->get_shape() != output_shape) {
+            // This case is possible when one of the inputs has exactly 1 dimension (that is not supported by GEMM operation)
+            // So to preserve output shape we insert additional reshape operation
+            std::shared_ptr<ngraph::Node> reshape_output;
+            if (output_shape.size() == 0) {
+                std::vector<int64_t> dim_indices(gemm->get_shape().size());
+                std::iota(dim_indices.begin(), dim_indices.end(), 0);
+                reshape_output = std::make_shared<ngraph::opset1::Squeeze>(gemm,
+                    ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{dim_indices.size()}, dim_indices));
+            } else {
+                reshape_output = ngraph::op::util::reshapeTo(gemm, output_shape);
+            }
+
+            new_ops.push_back(reshape_output);
+            gemm->set_friendly_name(matmul->get_friendly_name() + "/gemm");
+            reshape_output->set_friendly_name(matmul->get_friendly_name());
+            ngraph::copy_runtime_info(matmul, new_ops);
+            ngraph::replace_node(matmul, reshape_output);
+        } else {
+            gemm->set_friendly_name(matmul->get_friendly_name());
+            ngraph::copy_runtime_info(matmul, new_ops);
+            ngraph::replace_node(matmul, gemm);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatMulToGemm");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp
@ -0,0 +1,23 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertMatMulToFC: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertMatMulToFC();
+};
+
+class ConvertMatMulToGemm: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertMatMulToGemm();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.cpp
@ -0,0 +1,95 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_tile_to_seq_tiles.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include <ngraph/rt_info.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertTileToSeqTiles, "ConvertTileToSeqTiles", 0);
+
+MKLDNNPlugin::ConvertTileToSeqTiles::ConvertTileToSeqTiles() {
+    auto tile = ngraph::pattern::wrap_type<ngraph::opset1::Tile>({ngraph::pattern::any_input(ngraph::pattern::has_static_rank()),
+                                                                  ngraph::pattern::wrap_type<ngraph::opset1::Constant>()});
+
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher& m) {
+        auto tile = std::dynamic_pointer_cast<ngraph::opset1::Tile>(m.get_match_root());
+        if (!tile) {
+            return false;
+        }
+
+        auto tiles_node = std::dynamic_pointer_cast<ngraph::opset1::Constant>(tile->input_value(1).get_node_shared_ptr());
+        if (!tiles_node) return false;
+
+        auto tiles = tiles_node->cast_vector<int64_t>();
+        auto input_shape_rank = static_cast<size_t>(tile->get_input_partial_shape(0).rank().get_length());
+        int64_t cur_dim_id = tiles.size() - 1;
+
+        if (static_cast<int64_t>(tiles.size()) != input_shape_rank) return false;
+
+        auto last_node = tile->input_value(0);
+        auto friendly_name = tile->get_friendly_name();
+
+        int num_of_tile_dims = 0;
+        for (auto t : tiles) {
+            if (t != 1) {
+                num_of_tile_dims++;
+            }
+        }
+
+        if (num_of_tile_dims == 0) {
+            auto outputs = tile->get_output_target_inputs(0);
+            for (const auto &out : outputs) {
+                if (std::dynamic_pointer_cast<ngraph::opset1::Result>(out.get_node()->shared_from_this())) {
+                    return false;
+                }
+            }
+            ngraph::replace_node(tile, {last_node});
+            return true;
+        }
+
+        // Will generate sequence of Tile operations if num_of_tile_dims != 1
+        // because IE Tile operations supports only one axis to be tiled.
+        // To keep op name unique will use special IE specific delimiter ':'
+        // Original frameworks doesn't use such delimiter in names, so it will
+        // guarantee that newly generated name like "original_name:_1" doesn't
+        // match with already existed names.
+        if (num_of_tile_dims > 1) {
+            friendly_name += ":";
+        }
+
+        ngraph::NodeVector new_ops;
+
+        auto tiles_it = tiles.rbegin();
+        while (tiles_it != tiles.rend()) {
+            int64_t tile_dim = *tiles_it;
+            if (tile_dim != 1) {
+                std::vector<int64_t> dims(input_shape_rank, 1);
+                dims[cur_dim_id] = tile_dim;
+                auto const_node = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{input_shape_rank}, dims);
+                auto new_tile = std::make_shared<ngraph::opset1::Tile>(last_node, const_node);
+                new_tile->set_friendly_name(friendly_name);
+                friendly_name += "_" + std::to_string(cur_dim_id);
+                new_ops.push_back(new_tile);
+
+                last_node = new_tile;
+            }
+            --cur_dim_id;
+            ++tiles_it;
+        }
+
+        last_node.get_node_shared_ptr()->set_friendly_name(tile->get_friendly_name());
+        ngraph::copy_runtime_info(tile, new_ops);
+        ngraph::replace_node(tile, {last_node});
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(tile, "ConvertTileToSeqTiles");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertTileToSeqTiles: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertTileToSeqTiles();
+};
+
+} // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@ -0,0 +1,49 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/pass/constant_folding.hpp>
+#include "convert_matmul_to_fc_or_gemm.hpp"
+#include "fc_bias_fusion.hpp"
+#include "reshape_fc_fusion.hpp"
+#include "reshape_fully_connected.hpp"
+#include "convert_broadcast_to_tiles.hpp"
+#include "convert_tile_to_seq_tiles.hpp"
+#include "reshape_1d_ops.hpp"
+#include "convert_to_power_static.hpp"
+#include "convert_to_leaky_relu.hpp"
+#include "convert_to_swish_cpu.hpp"
+#include "reshape_prelu.hpp"
+#include "rnn_sequences_optimization.hpp"
+
+namespace MKLDNNPlugin {
+
+inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::ConstantFolding>();
+    manager.register_pass<Reshape1DConvolution>();
+    manager.register_pass<Reshape1DGroupConvolution>();
+    manager.register_pass<Reshape1DAvgPool>();
+    manager.register_pass<Reshape1DMaxPool>();
+    manager.register_pass<ConvertBroadcastToTiles>();
+    manager.register_pass<ConvertTileToSeqTiles>();
+    manager.register_pass<ConvertMatMulToFC>();
+    manager.register_pass<ConvertMatMulToGemm>();
+    manager.register_pass<FullyConnectedBiasFusion>();
+    manager.register_pass<ReshapeFullyConnected>();
+    manager.register_pass<ConvertToPowerStatic>();
+    manager.register_pass<ConvertToLeakyRelu>();
+    manager.register_pass<ReshapePRelu>();
+    manager.register_pass<ConvertToSwishCPU>();
+    manager.register_pass<OptimizeGRUSequenceTransposes>();
+    manager.register_pass<OptimizeLSTMSequenceTransposes>();
+    manager.register_pass<OptimizeRNNSequenceTransposes>();
+    if (!ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc)) {
+        manager.register_pass<ReshapeFullyConnectedFusion>();
+    }
+    manager.register_pass<ngraph::pass::ConstantFolding>();
+    manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
+    manager.run_passes(nGraphFunc);
+}
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.cpp
@ -0,0 +1,38 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_to_leaky_relu.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "op/leaky_relu.hpp"
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertToLeakyRelu, "ConvertToLeakyRelu", 0);
+
+MKLDNNPlugin::ConvertToLeakyRelu::ConvertToLeakyRelu() {
+    auto prelu = ngraph::pattern::wrap_type<ngraph::opset1::PRelu>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                    ngraph::pattern::any_input(ngraph::pattern::has_static_shape())});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto prelu = std::dynamic_pointer_cast<ngraph::opset1::PRelu>(m.get_match_root());
+        if (!prelu) {
+            return false;
+        }
+        auto slopeNode = std::dynamic_pointer_cast<ngraph::opset1::Constant>(prelu->get_input_node_shared_ptr(1));
+        if (slopeNode != nullptr && ngraph::shape_size(prelu->get_input_shape(1)) == 1) {
+            const float slope = slopeNode->cast_vector<float>()[0];
+            const auto leakyRelu = std::make_shared<MKLDNNPlugin::LeakyReluNode>(prelu->input(0).get_source_output(), slope,
+                                                                                 prelu->output(0).get_element_type());
+            leakyRelu->set_friendly_name(prelu->get_friendly_name());
+            ngraph::copy_runtime_info(prelu, leakyRelu);
+            ngraph::replace_node(prelu, leakyRelu);
+            return true;
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(prelu, "ConvertToLeakyRelu");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertToLeakyRelu: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertToLeakyRelu();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.cpp
@ -0,0 +1,131 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_to_power_static.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset4.hpp>
+#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include "op/power_static.hpp"
+#include "op/fully_connected.hpp"
+#include "utils/general_utils.h"
+
+int getConstPort(const std::shared_ptr<ngraph::Node> &node) {
+    const auto const1 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(0));
+    const auto const2 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(1));
+    int constPort = -1;
+    if (const2) {
+        constPort = 1;
+    } else if (const1) {
+        constPort = 0;
+    }
+    return constPort;
+}
+
+template <class BaseOp>
+bool isConvertableToPowerStatic(const std::shared_ptr<BaseOp> &node) {
+    const int constPort = getConstPort(node);
+    if ((!node->get_input_element_type(0).is_real() && !node->get_input_element_type(1).is_real()) || !node->get_output_element_type(0).is_real() ||
+            constPort == -1) {
+        return false;
+    }
+
+    const int nonConstPort = 1 - constPort;
+    const auto constNode = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(constPort));
+
+    return ngraph::shape_size(node->get_input_shape(constPort)) == 1 &&
+           node->get_input_shape(nonConstPort).size() >= node->get_input_shape(constPort).size() &&
+           !MKLDNNPlugin::one_of(node->get_input_node_shared_ptr(nonConstPort)->get_type_info(), ngraph::opset1::NormalizeL2::type_info,
+                                                                                                 ngraph::opset4::Interpolate::type_info,
+                                                                                                 ngraph::opset1::Convolution::type_info,
+                                                                                                 ngraph::opset1::GroupConvolution::type_info,
+                                                                                                 ngraph::opset1::ConvolutionBackpropData::type_info,
+                                                                                                 ngraph::opset1::GroupConvolutionBackpropData::type_info,
+                                                                                                 MKLDNNPlugin::FullyConnectedNode::type_info,
+                                                                                                 ngraph::op::v0::MVN::type_info,
+                                                                                                 ngraph::opset6::MVN::type_info);
+}
+
+template <>
+bool isConvertableToPowerStatic(const std::shared_ptr<ngraph::opset1::Power> &node) {
+    return std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(1)) != nullptr &&
+           node->get_input_shape(0).size() >= node->get_input_shape(1).size() && ngraph::shape_size(node->get_input_shape(1)) == 1;
+}
+
+template <class BaseOp>
+std::shared_ptr<ngraph::Node> convert(const std::shared_ptr<BaseOp> &node) {
+    const int constPort = getConstPort(node);
+    const int nonConstPort = 1 - constPort;
+    std::shared_ptr<ngraph::opset1::Constant> powerNode = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(constPort));
+    const float value = powerNode->cast_vector<float>()[0];
+    if (std::is_same<BaseOp, ngraph::opset1::Power>::value) {
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), value, 1.0f, 0.0f,
+                                                               node->output(0).get_element_type());
+    } else if (std::is_same<BaseOp, ngraph::opset1::Add>::value) {
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), 1.0f, 1.0f, value,
+                                                               node->output(0).get_element_type());
+    } else if (std::is_same<BaseOp, ngraph::opset1::Subtract>::value) {
+        float scale = 1.0f;
+        float shift = value;
+        if (constPort == 0) {
+            scale *= -1.0f;
+        } else {
+            shift *= -1.0f;
+        }
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), 1.0f, scale, shift,
+                                                               node->output(0).get_element_type());
+    } else if (std::is_same<BaseOp, ngraph::opset1::Multiply>::value) {
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), 1.f, value, 0.0f,
+                                                               node->output(0).get_element_type());
+    } else {
+        throw ngraph::ngraph_error("ConvertToPowerStatic: op type is not supported");
+    }
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertToPowerStatic, "ConvertToPowerStatic", 0);
+
+MKLDNNPlugin::ConvertToPowerStatic::ConvertToPowerStatic() {
+    ngraph::OutputVector twoInputs = {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                      ngraph::pattern::any_input(ngraph::pattern::has_static_shape())};
+    auto power = ngraph::pattern::wrap_type<ngraph::opset1::Power>(twoInputs);
+    auto add = ngraph::pattern::wrap_type<ngraph::opset1::Add>(twoInputs);
+    auto sub = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>(twoInputs);
+    auto mult = ngraph::pattern::wrap_type<ngraph::opset1::Multiply>(twoInputs);
+    const auto candidate = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{power, add, sub, mult});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) {
+        auto node = m.get_match_root();
+
+        std::shared_ptr<ngraph::Node> toReplace = node;
+        if (auto power = std::dynamic_pointer_cast<ngraph::opset1::Power>(node)) {
+            if (!isConvertableToPowerStatic(power))
+                return false;
+            toReplace = convert(power);
+        } else if (auto add = std::dynamic_pointer_cast<ngraph::opset1::Add>(node)) {
+            if (!isConvertableToPowerStatic(add))
+                return false;
+            toReplace = convert(add);
+        } else if (auto sub = std::dynamic_pointer_cast<ngraph::opset1::Subtract>(node)) {
+            if (!isConvertableToPowerStatic(sub))
+                return false;
+            toReplace = convert(sub);
+        } else if (auto mult = std::dynamic_pointer_cast<ngraph::opset1::Multiply>(node)) {
+            if (!isConvertableToPowerStatic(mult))
+                return false;
+            toReplace = convert(mult);
+        } else {
+            throw ngraph::ngraph_error("ConvertToPowerStatic: op type is not supported");
+        }
+        toReplace->set_friendly_name(node->get_friendly_name());
+        ngraph::copy_runtime_info(node, toReplace);
+        ngraph::replace_node(node, toReplace);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(candidate, "ConvertToPowerStatic");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertToPowerStatic: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertToPowerStatic();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.cpp
@ -0,0 +1,41 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_to_swish_cpu.hpp"
+
+#include <ngraph/opsets/opset4.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "op/swish_cpu.hpp"
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertToSwishCPU, "ConvertToSwishCPU", 0);
+
+MKLDNNPlugin::ConvertToSwishCPU::ConvertToSwishCPU() {
+    auto swish = ngraph::pattern::wrap_type<ngraph::opset4::Swish>();
+
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher& m) {
+        auto swish = std::dynamic_pointer_cast<ngraph::opset4::Swish> (m.get_match_root());
+        if (!swish) {
+            return false;
+        }
+        float beta_value = 1.0;
+        if (swish->input_values().size() == 2) {
+            auto beta = std::dynamic_pointer_cast<ngraph::opset4::Constant>(swish->get_input_node_shared_ptr(1));
+
+            if (!beta || ngraph::shape_size(swish->get_input_shape(1)) != 1) {
+                return false;
+            }
+            beta_value = beta->cast_vector<float>()[0];
+        }
+
+        auto swish_cpu = std::make_shared<MKLDNNPlugin::SwishNode>(swish->input(0).get_source_output(), beta_value);
+        swish_cpu->set_friendly_name(swish->get_friendly_name());
+        ngraph::copy_runtime_info(swish, swish_cpu);
+        ngraph::replace_node(swish, swish_cpu);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(swish, "ConvertToSwishCPU");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertToSwishCPU: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertToSwishCPU();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.cpp
@ -0,0 +1,70 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "fc_bias_fusion.hpp"
+#include "op/fully_connected.hpp"
+#include <numeric>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::FullyConnectedBiasFusion, "FullyConnectedBiasFusion", 0);
+
+MKLDNNPlugin::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
+    auto m_fc = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>([](ngraph::Output<ngraph::Node> output) {
+        return ngraph::pattern::consumers_count(1)(output) && ngraph::pattern::has_static_shape()(output);
+    });
+    auto m_bias = ngraph::pattern::any_input();
+    auto m_add = ngraph::pattern::wrap_type<ngraph::opset1::Add>({m_fc, m_bias});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        auto & pattern_to_output = m.get_pattern_value_map();
+
+        auto add = pattern_to_output[m_add].get_node_shared_ptr();
+        auto bias = pattern_to_output[m_bias].get_node_shared_ptr();
+        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(pattern_to_output[m_fc].get_node_shared_ptr());
+        if (!fc) {
+            return false;
+        }
+
+        if (auto bcast = std::dynamic_pointer_cast<ngraph::opset1::Broadcast>(bias)) {
+            bias = bcast->input_value(0).get_node_shared_ptr();
+        }
+
+        if (!std::dynamic_pointer_cast<ngraph::opset1::Constant>(bias)) {
+            return false;
+        }
+
+        ngraph::Shape bias_shape(bias->get_shape());
+        ngraph::Shape output_shape(fc->get_shape());
+        size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
+        if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) {
+            return false;
+        }
+
+        ngraph::NodeVector new_ops;
+
+        std::shared_ptr<ngraph::Node> final_bias = bias;
+        if (bias->get_shape().size() >= 2) {
+            final_bias = std::make_shared<ngraph::opset1::Reshape>(final_bias, ngraph::opset1::Constant::create(ngraph::element::i64,
+                                                                                                                ngraph::Shape{1}, {-1}), true);
+            new_ops.push_back(final_bias);
+        }
+
+        auto new_fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(fc->input(0).get_source_output(),
+                                                                         fc->input(1).get_source_output(),
+                                                                         final_bias,
+                                                                         fc->get_shape(),
+                                                                         fc->get_output_type());
+        new_ops.push_back(new_fc);
+
+        new_fc->set_friendly_name(add->get_friendly_name());
+        ngraph::copy_runtime_info({fc, add}, new_ops);
+        ngraph::replace_node(add, new_fc);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(m_add, "FullyConnectedBiasFusion");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class FullyConnectedBiasFusion : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    FullyConnectedBiasFusion();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.cpp
@ -0,0 +1,45 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "fully_connected.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::FullyConnectedNode::type_info;
+
+MKLDNNPlugin::FullyConnectedNode::FullyConnectedNode(const ngraph::Output<Node>& A,
+                                                     const ngraph::Output<Node>& B,
+                                                     const ngraph::Shape& output_shape,
+                                                     const ngraph::element::Type output_type)
+    : Op({A, B}), m_output_shape(output_shape), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+MKLDNNPlugin::FullyConnectedNode::FullyConnectedNode(const ngraph::Output<Node>& A,
+                                                     const ngraph::Output<Node>& B,
+                                                     const ngraph::Output<Node>& C,
+                                                     const ngraph::Shape& output_shape,
+                                                     const ngraph::element::Type output_type)
+    : Op({A, B, C}), m_output_shape(output_shape), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::FullyConnectedNode::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    if (new_args.size() == 2) {
+        return std::make_shared<MKLDNNPlugin::FullyConnectedNode>(new_args.at(0), new_args.at(1), m_output_shape);
+    } else if (new_args.size() == 3) {
+        return std::make_shared<MKLDNNPlugin::FullyConnectedNode>(new_args.at(0), new_args.at(1), new_args.at(2), m_output_shape);
+    }
+
+    throw ngraph::ngraph_error("Unsupported number of arguments for FullyConnected operation");
+}
+
+void MKLDNNPlugin::FullyConnectedNode::validate_and_infer_types() {
+    m_output_size = m_output_shape.back();
+    set_output_type(0, m_output_type == ngraph::element::undefined ? input_value(0).get_element_type() : m_output_type, m_output_shape);
+}
+
+bool MKLDNNPlugin::FullyConnectedNode::visit_attributes(ngraph::AttributeVisitor &visitor) {
+    visitor.on_attribute("out-size", m_output_size);
+    return true;
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.hpp
@ -0,0 +1,47 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/node.hpp>
+#include <ngraph/op/op.hpp>
+#include <ngraph/op/util/fused_op.hpp>
+
+namespace MKLDNNPlugin {
+
+class FullyConnectedNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"FullyConnected", 0};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    FullyConnectedNode() = default;
+
+    FullyConnectedNode(const ngraph::Output<Node> &A,
+                       const ngraph::Output<Node> &B,
+                       const ngraph::Shape &output_shape,
+                       const ngraph::element::Type output_type = ngraph::element::undefined);
+
+    FullyConnectedNode(const ngraph::Output<Node> &A,
+                       const ngraph::Output<Node> &B,
+                       const ngraph::Output<Node> &C,
+                       const ngraph::Shape &output_shape,
+                       const ngraph::element::Type output_type = ngraph::element::undefined);
+
+    bool visit_attributes(ngraph::AttributeVisitor &visitor) override;
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
+
+    size_t get_out_size() const { return m_output_size; }
+
+    ngraph::element::Type get_output_type() const { return m_output_type; }
+
+private:
+    size_t m_output_size = 0;
+    ngraph::Shape m_output_shape = {};
+    ngraph::element::Type m_output_type;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "leaky_relu.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::LeakyReluNode::type_info;
+
+MKLDNNPlugin::LeakyReluNode::LeakyReluNode(const ngraph::Output<ngraph::Node> &data,
+                                           const float &negative_slope,
+                                           const ngraph::element::Type output_type)
+    : Op({data}), m_negative_slope(negative_slope), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::LeakyReluNode::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<MKLDNNPlugin::LeakyReluNode>(new_args.at(0), m_negative_slope, m_output_type);
+}
+
+void MKLDNNPlugin::LeakyReluNode::validate_and_infer_types() {
+    set_output_type(
+        0,
+        m_output_type == ngraph::element::undefined ? get_input_element_type(0) : m_output_type,
+        get_input_partial_shape(0));
+}
+
+bool MKLDNNPlugin::LeakyReluNode::visit_attributes(ngraph::AttributeVisitor &visitor) {
+    visitor.on_attribute("negative_slope", m_negative_slope);
+    return true;
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.hpp
@ -0,0 +1,33 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace MKLDNNPlugin {
+
+class LeakyReluNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"LeakyRelu", 0};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    LeakyReluNode(const ngraph::Output<ngraph::Node> &data, const float &negative_slope, const ngraph::element::Type output_type);
+
+    void validate_and_infer_types() override;
+
+    bool visit_attributes(ngraph::AttributeVisitor &visitor) override;
+
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector &new_args) const override;
+
+    float get_slope() { return m_negative_slope; }
+
+    ngraph::element::Type get_output_type() const { return m_output_type; }
+
+private:
+    float m_negative_slope;
+    ngraph::element::Type m_output_type;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.cpp
@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "power_static.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::PowerStaticNode::type_info;
+
+MKLDNNPlugin::PowerStaticNode::PowerStaticNode(const ngraph::Output<Node> &data,
+                                               const float &power,
+                                               const float &scale,
+                                               const float &shift,
+                                               const ngraph::element::Type output_type)
+    : Op({data}), scale(scale), power(power), shift(shift), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::PowerStaticNode::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
+    if (new_args.size() != 1) {
+        throw ngraph::ngraph_error("Incorrect number of new arguments");
+    }
+
+    return std::make_shared<MKLDNNPlugin::PowerStaticNode>(new_args.at(0), this->power, this->scale, this->shift, this->m_output_type);
+}
+
+void MKLDNNPlugin::PowerStaticNode::validate_and_infer_types() {
+    set_output_type(0, m_output_type == ngraph::element::undefined ? get_input_element_type(0) : m_output_type, get_input_partial_shape(0));
+}
+
+bool MKLDNNPlugin::PowerStaticNode::visit_attributes(ngraph::AttributeVisitor &visitor) {
+    visitor.on_attribute("scale", scale);
+    visitor.on_attribute("power", power);
+    visitor.on_attribute("shift", shift);
+    return true;
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.hpp
@ -0,0 +1,34 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace MKLDNNPlugin {
+
+class PowerStaticNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"PowerStatic", 0};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    PowerStaticNode(const ngraph::Output<ngraph::Node> &data, const float &power, const float &scale, const float &shift,
+                    const ngraph::element::Type output_type = ngraph::element::undefined);
+
+    void validate_and_infer_types() override;
+
+    bool visit_attributes(ngraph::AttributeVisitor &visitor) override;
+
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector &new_args) const override;
+
+    float get_power() const { return power; }
+    float get_scale() const { return scale; }
+    float get_shift() const { return shift; }
+
+private:
+    float scale, power, shift;
+    ngraph::element::Type m_output_type;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "swish_cpu.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::SwishNode::type_info;
+
+MKLDNNPlugin::SwishNode::SwishNode(const ngraph::Output<ngraph::Node> & input, const float alpha)
+        : Op({input}), m_alpha(alpha) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::SwishNode::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<MKLDNNPlugin::SwishNode>(new_args.at(0), m_alpha);
+}
+
+bool MKLDNNPlugin::SwishNode::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    visitor.on_attribute("alpha", m_alpha);
+    return true;
+}
+
+void MKLDNNPlugin::SwishNode::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
+float MKLDNNPlugin::SwishNode::get_alpha() const {
+    return m_alpha;
+}
+
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.hpp
@ -0,0 +1,27 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace MKLDNNPlugin {
+
+class SwishNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"SwishCPU", 0};
+    const ngraph::NodeTypeInfo &get_type_info() const override { return type_info; }
+
+    explicit SwishNode(const ngraph::Output<Node> &input, float alpha = 1.0);
+
+    void validate_and_infer_types() override;
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector &new_args) const override;
+
+    float get_alpha() const;
+protected:
+    float m_alpha;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.cpp
@ -0,0 +1,175 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_1d_ops.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph_ops/type_relaxed.hpp>
+
+#include "transformations/utils/utils.hpp"
+
+template <class BaseOp>
+std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<BaseOp> node, ngraph::NodeVector &new_ops) {
+    auto new_strides = node->get_strides();
+    auto new_dilations = node->get_dilations();
+    auto new_pads_begin = node->get_pads_begin();
+    auto new_pad_end = node->get_pads_end();
+
+    new_strides.insert(new_strides.begin(), 1);
+    new_dilations.insert(new_dilations.begin(), 1);
+    new_pads_begin.insert(new_pads_begin.begin(), 0);
+    new_pad_end.insert(new_pad_end.begin(), 0);
+
+    ngraph::Shape new_weights_shape(node->input_value(1).get_shape());
+    new_weights_shape.insert(new_weights_shape.begin() + new_weights_shape.size() - 1, 1);
+    auto weights = ngraph::op::util::reshapeTo(node->input_value(1), new_weights_shape);
+
+    new_ops.push_back(weights);
+
+    if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node)) {
+        return std::make_shared<ngraph::op::TypeRelaxed<BaseOp>>(std::vector<ngraph::element::Type>{ngraph::element::f32, ngraph::element::f32},
+                                                                 std::vector<ngraph::element::Type>{ngraph::element::f32},
+                                                                 ngraph::op::TemporaryReplaceOutputType(data, ngraph::element::f32).get(),
+                                                                 ngraph::op::TemporaryReplaceOutputType(weights, ngraph::element::f32).get(),
+                                                                 new_strides,
+                                                                 new_pads_begin,
+                                                                 new_pad_end,
+                                                                 new_dilations,
+                                                                 node->get_auto_pad());
+    } else {
+        return std::make_shared<BaseOp>(data,
+                                        weights,
+                                        new_strides,
+                                        new_pads_begin,
+                                        new_pad_end,
+                                        new_dilations,
+                                        node->get_auto_pad());
+    }
+}
+
+template <>
+std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::MaxPool> node, ngraph::NodeVector & new_ops) {
+    auto new_strides = node->get_strides();
+    auto new_pads_begin = node->get_pads_begin();
+    auto new_pad_end = node->get_pads_end();
+    auto new_kernel = node->get_kernel();
+
+    new_strides.insert(new_strides.begin(), 1);
+    new_pads_begin.insert(new_pads_begin.begin(), 0);
+    new_pad_end.insert(new_pad_end.begin(), 0);
+    new_kernel.insert(new_kernel.begin(), 1);
+
+    return std::make_shared<ngraph::opset1::MaxPool>(data,
+                                                     new_strides,
+                                                     new_pads_begin,
+                                                     new_pad_end,
+                                                     new_kernel,
+                                                     node->get_rounding_type(),
+                                                     node->get_auto_pad());
+}
+
+template <>
+std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::AvgPool> node, ngraph::NodeVector & new_ops) {
+    // Update Pooling attributes with additional dimension
+    auto new_strides = node->get_strides();
+    auto new_pads_begin = node->get_pads_begin();
+    auto new_pad_end = node->get_pads_end();
+    auto new_kernel = node->get_kernel();
+
+    new_strides.insert(new_strides.begin(), 1);
+    new_pads_begin.insert(new_pads_begin.begin(), 0);
+    new_pad_end.insert(new_pad_end.begin(), 0);
+    new_kernel.insert(new_kernel.begin(), 1);
+
+    return std::make_shared<ngraph::opset1::AvgPool>(data,
+                                             new_strides,
+                                             new_pads_begin,
+                                             new_pad_end,
+                                             new_kernel,
+                                             node->get_exclude_pad(),
+                                             node->get_rounding_type(),
+                                             node->get_auto_pad());
+}
+
+ngraph::matcher_pass_callback get_callback() {
+    return [](ngraph::pattern::Matcher& m) {
+        auto node = m.get_match_root();
+        if (node->input(0).get_partial_shape().rank().get_length() != 3) {
+            return false;
+        }
+
+        // Insert H dimension equal to 1
+        auto input_shape = node->input(0).get_shape();
+        auto output_shape = node->output(0).get_shape();
+
+        input_shape.insert(input_shape.begin() + 2, 1);
+
+        ngraph::NodeVector new_ops;
+
+        // Reshape(input_shape)->Op->Reshape(output_shape)
+        ngraph::Output<ngraph::Node> last = ngraph::op::util::reshapeTo(node->input_value(0), input_shape);
+        last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/reshape_begin");
+        new_ops.push_back(last.get_node_shared_ptr());
+
+        if (auto conv = std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node)) {
+            last = convert(last, conv, new_ops);
+        } else if (auto group_conv = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node)) {
+            last = convert(last, group_conv, new_ops);
+        } else if (auto max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(node)) {
+            last = convert(last, max_pool, new_ops);
+        } else if (auto avg_pool = std::dynamic_pointer_cast<ngraph::opset1::AvgPool>(node)) {
+            last = convert(last, avg_pool, new_ops);
+        } else {
+            throw ngraph::ngraph_error("Reshape1DOps: op type is not supported");
+        }
+
+        last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/new");
+        new_ops.push_back(last.get_node_shared_ptr());
+
+        last = ngraph::op::util::reshapeTo(last, output_shape);
+        last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name());
+        new_ops.push_back(last.get_node_shared_ptr());
+
+        ngraph::copy_runtime_info(node, new_ops);
+        node->output(0).replace(last);
+        return true;
+    };
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DConvolution, "Reshape1DConvolution", 0);
+
+MKLDNNPlugin::Reshape1DConvolution::Reshape1DConvolution() {
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset1::Convolution>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "Reshape1DConvolution");
+    this->register_matcher(m, get_callback());
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DGroupConvolution, "Reshape1DGroupConvolution", 0);
+
+MKLDNNPlugin::Reshape1DGroupConvolution::Reshape1DGroupConvolution() {
+    auto group_conv = ngraph::pattern::wrap_type<ngraph::opset1::GroupConvolution>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(group_conv, "Reshape1DGroupConvolution");
+    this->register_matcher(m, get_callback());
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DAvgPool, "Reshape1DAvgPool", 0);
+
+MKLDNNPlugin::Reshape1DAvgPool::Reshape1DAvgPool() {
+    auto pool = ngraph::pattern::wrap_type<ngraph::opset1::AvgPool>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DAvgPool");
+    this->register_matcher(m, get_callback());
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DMaxPool, "Reshape1DMaxPool", 0);
+
+MKLDNNPlugin::Reshape1DMaxPool::Reshape1DMaxPool() {
+    auto pool = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DMaxPool");
+    this->register_matcher(m, get_callback());
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.hpp
@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class Reshape1DConvolution: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DConvolution();
+};
+
+class Reshape1DGroupConvolution: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DGroupConvolution();
+};
+
+class Reshape1DAvgPool: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DAvgPool();
+};
+
+class Reshape1DMaxPool: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DMaxPool();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
@ -0,0 +1,80 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_fc_fusion.hpp"
+#include "op/fully_connected.hpp"
+#include <numeric>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnectedFusion, "ReshapeFullyConnectedFusion", 0);
+
+MKLDNNPlugin::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
+    auto m_reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>(ngraph::pattern::has_static_shape());
+    ngraph::OutputVector twoInputs = {m_reshape, ngraph::pattern::any_input()};
+    ngraph::OutputVector threeInputs = {m_reshape, ngraph::pattern::any_input(), ngraph::pattern::any_input()};
+    auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_shape());
+    auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_shape());
+    const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) {
+        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
+        auto reshape = std::dynamic_pointer_cast<ngraph::opset1::Reshape>(fc->get_input_node_shared_ptr(0));
+
+        // Check that Reshape reshapes 4D tensor to 2D or input shape = output shape
+        auto shape_in = reshape->input_value(0).get_shape();
+        auto shape_out = reshape->get_shape();
+        if (!((shape_in.size() == 4 && reshape->get_shape().size() == 2) || (shape_in == shape_out && !shape_in.empty()))) {
+            return false;
+        }
+
+        // Check that Weights[O, C*H*W] consistent with Input[N, C, H, W]
+        auto shape_w = fc->input_value(1).get_shape();
+        if (shape_in[0] != shape_out[0] || std::accumulate(shape_in.begin() + 1, shape_in.end(), size_t{1}, std::multiplies<size_t>()) != shape_w[1]) {
+            return false;
+        }
+
+        ngraph::NodeVector new_ops;
+        auto weightInput = fc->input(1).get_source_output();
+        ngraph::Shape newWeightsShape;
+        const auto outShape = fc->get_shape();
+        if (shape_in.size() == 3) {
+            newWeightsShape = ngraph::Shape({outShape[2], shape_in[2]});
+        } else {
+            newWeightsShape.push_back(outShape[1]);
+            for (int i = 1; i < shape_in.size(); i++)
+                newWeightsShape.push_back(shape_in[i]);
+        }
+
+        if (newWeightsShape != weightInput.get_shape()) {
+            auto newShape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{newWeightsShape.size()}, newWeightsShape);
+            weightInput = std::make_shared<ngraph::opset1::Reshape>(weightInput, newShape, true);
+            new_ops.push_back(weightInput.get_node_shared_ptr());
+        }
+
+        std::shared_ptr<ngraph::Node> new_fc;
+        if (fc->get_input_size() == 2) {
+            new_fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape->input_value(0),
+                                                                        weightInput,
+                                                                        outShape,
+                                                                        fc->output(0).get_element_type());
+        } else if (fc->get_input_size() == 3) {
+            new_fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape->input_value(0),
+                                                                        weightInput,
+                                                                        fc->input_value(2),
+                                                                        outShape,
+                                                                        fc->output(0).get_element_type());
+        }
+        new_ops.push_back(new_fc);
+        new_fc->set_friendly_name(fc->get_friendly_name());
+        ngraph::copy_runtime_info({reshape, fc}, new_ops);
+        ngraph::replace_node(fc, new_fc);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnectedFusion");
+    register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ReshapeFullyConnectedFusion : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ReshapeFullyConnectedFusion();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
@ -0,0 +1,84 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_fully_connected.hpp"
+#include "op/fully_connected.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph/pattern/op/or.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0);
+
+MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
+    ngraph::OutputVector twoInputs = {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), ngraph::pattern::any_input()};
+    ngraph::OutputVector threeInputs = {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), ngraph::pattern::any_input(),
+                                        ngraph::pattern::any_input()};
+    auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_shape());
+    auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_shape());
+    const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode> (m.get_match_root());
+        if (!fc || transformation_callback(fc)) {
+            return false;
+        }
+
+        auto input_shape = fc->input_value(0).get_shape();
+        auto output_shape = fc->get_shape();
+
+        if (input_shape.size() == 2) {
+            return false;
+        }
+
+        ngraph::NodeVector new_ops;
+
+        std::vector<int64_t> reshape_shape{-1, static_cast<int64_t>(input_shape.back())};
+        auto reshape = std::make_shared<ngraph::opset1::Reshape>(fc->input_value(0),
+                                                         ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, reshape_shape), true);
+        new_ops.push_back(reshape);
+
+        reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape");
+
+        // Calculate output shape for new FullyConnected layer
+        // [I, K] * [O, K] = [I, O]
+        auto I = reshape->get_shape()[0];
+        auto O = fc->input_value(1).get_shape()[0];
+        ngraph::Shape output_shape_new{I, O};
+
+        std::shared_ptr<ngraph::Node> fc_new;
+        if (fc->get_input_size() == 2) {
+            fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
+                                                                        fc->input_value(1),
+                                                                        output_shape_new,
+                                                                        fc->get_output_type());
+        } else if (fc->get_input_size() == 3) {
+            fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
+                                                                        fc->input_value(1),
+                                                                        fc->input_value(2),
+                                                                        output_shape_new,
+                                                                        fc->get_output_type());
+        }
+        new_ops.push_back(fc_new);
+
+        if (output_shape != output_shape_new) {
+            auto reshape_output = ngraph::op::util::reshapeTo(fc_new, output_shape);
+            new_ops.push_back(reshape_output);
+            reshape_output->set_friendly_name(fc->get_friendly_name());
+            fc_new->set_friendly_name(fc->get_friendly_name() + "/FC");
+            ngraph::copy_runtime_info(fc, new_ops);
+            ngraph::replace_node(fc, reshape_output);
+        } else {
+            fc_new->set_friendly_name(fc->get_friendly_name());
+            ngraph::copy_runtime_info(fc, new_ops);
+            ngraph::replace_node(fc, fc_new);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnected");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+/*
+ * Description:
+ *     ReshapeFullyConnected transformation detects FullyConnected operations
+ *     and for each operation where input shape is greater than 2 inserts Reshape
+ *     operations before and after FullyConnected operation. This transformation is
+ *     required because of IE restrictions.
+ */
+
+namespace MKLDNNPlugin {
+
+class ReshapeFullyConnected: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ReshapeFullyConnected();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_prelu.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "transformations/utils/utils.hpp"
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapePRelu, "ReshapePRelu", 0);
+
+MKLDNNPlugin::ReshapePRelu::ReshapePRelu() {
+    auto prelu = ngraph::pattern::wrap_type<ngraph::opset1::PRelu>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                    ngraph::pattern::any_input(ngraph::pattern::has_static_shape())});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto prelu = std::dynamic_pointer_cast<ngraph::opset1::PRelu>(m.get_match_root());
+        if (!prelu || ngraph::shape_size(prelu->get_input_shape(1)) == 1 || prelu->get_input_shape(1).size() != 1) {
+            return false;
+        }
+        ngraph::Shape new_shape(prelu->input_value(0).get_shape().size(), 1);
+        new_shape[new_shape.size() > 1 ? 1 : 0] = prelu->input_value(1).get_shape()[0];
+        auto slope = ngraph::op::util::reshapeTo(prelu->input_value(1), new_shape);
+        auto new_prelu = std::make_shared<ngraph::opset1::PRelu>(prelu->input(0).get_source_output(), slope);
+        new_prelu->set_friendly_name(prelu->get_friendly_name());
+        ngraph::copy_runtime_info(prelu, new_prelu);
+        ngraph::replace_node(prelu, new_prelu);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(prelu, "ReshapePRelu");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ReshapePRelu: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ReshapePRelu();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
@ -0,0 +1,153 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rnn_sequences_optimization.hpp"
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph/variant.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeGRUSequenceTransposes, "OptimizeGRUSequenceTransposes", 0);
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeLSTMSequenceTransposes, "OptimizeLSTMSequenceTransposes", 0);
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeRNNSequenceTransposes, "OptimizeRNNSequenceTransposes", 0);
+
+namespace {
+    int64_t getSeqAxis(const std::shared_ptr<ngraph::Node>& sequenceOp) {
+        // Optimization.
+        // Plug-ins support seqAxis attribute (value 1 or 0) for Seq ops, but according to the spec we don't
+        // support this attribute and should insert Transpose layer before and after Seq op in TI to Sequences
+        // transformation. Additional Transpose layers affect the performance, so we try to detect pattern
+        // Transpose(axis_order={1,0,2}) -> Seq -> Transpose(axis_order={2,1,0,3}
+        // and replace unnecessary Transpose ops with SeqIE (seqAxis = 0) to transfer value
+        // of the attribute to plug-ins.
+        // todo: specify seqAxis attribute for Sequence ops.
+        int64_t seqAxis = 1; // default
+        const auto& target_inputs = sequenceOp->output(0).get_target_inputs();
+        if (target_inputs.size() == 1) {
+            const auto& transpose_before = std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(sequenceOp->input_value(0).get_node_shared_ptr());
+            const auto& transpose_after = std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(target_inputs.begin()->get_node()->shared_from_this());
+            if (transpose_after != nullptr && transpose_before != nullptr) {
+                auto order_before = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(
+                        transpose_before->input_value(1).get_node_shared_ptr());
+                auto order_after = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(
+                        transpose_after->input_value(1).get_node_shared_ptr());
+                if (order_before != nullptr && order_after != nullptr) {
+                    auto order_before_values = order_before->cast_vector<int64_t>();
+                    auto order_after_values = order_after->cast_vector<int64_t>();
+                    std::vector<int64_t> order_ref_before = {1, 0, 2};
+                    std::vector<int64_t> order_ref_after = {2, 1, 0, 3};
+                    if (order_before_values == order_ref_before && order_after_values == order_ref_after) {
+                        seqAxis = 0;
+                    }
+                }
+            }
+        }
+        return seqAxis;
+    }
+
+    bool transform(const std::shared_ptr<ngraph::Node>& sequenceOp) {
+        // Detect pattern: Transpose_before -> Seq -> Transpose_after
+        auto seqAxis = getSeqAxis(sequenceOp);
+        if (seqAxis == 0) {
+            ngraph::Output<ngraph::Node> in_0 = sequenceOp->get_input_source_output(0).get_node_shared_ptr()->get_input_source_output(0);
+
+            auto newInShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, sequenceOp->get_input_shape(0));
+            auto reshape1 = std::make_shared<ngraph::op::v1::Reshape>(in_0, newInShape, false);
+            ngraph::replace_node(sequenceOp->get_input_node_shared_ptr(0), {reshape1->output(0)});
+
+            const auto &gruTargetInputs = sequenceOp->output(0).get_target_inputs();
+            if (gruTargetInputs.empty())
+                return false;
+            auto transposeAfter = gruTargetInputs.begin()->get_node()->shared_from_this();
+
+            auto newOutShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, transposeAfter->get_output_shape(0));
+            auto reshape2 = std::make_shared<ngraph::op::v1::Reshape>(sequenceOp->output(0), newOutShape, false);
+            reshape2->set_friendly_name(transposeAfter->get_friendly_name());
+            ngraph::replace_node(transposeAfter, {reshape2->output(0)});
+        } else {
+            auto originShape = sequenceOp->get_output_shape(0);
+            const auto targetInputs = sequenceOp->get_output_target_inputs(0);
+            if (targetInputs.empty()) {
+                return false;
+            }
+            auto seqOut = targetInputs.begin()->get_node()->shared_from_this();
+
+            auto tncShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {originShape[2], originShape[0], originShape[3]});
+            auto reshape1 = std::make_shared<ngraph::op::v1::Reshape>(sequenceOp->output(0), tncShape, false);
+
+            auto order = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {1, 0, 2});
+            auto transpose = std::make_shared<ngraph::op::v1::Transpose>(reshape1->output(0), order);
+
+            auto ndtcShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, originShape);
+            auto reshape2 = std::make_shared<ngraph::op::v1::Reshape>(transpose->output(0), ndtcShape, false);
+            reshape2->set_friendly_name(sequenceOp->get_friendly_name()+".0");
+
+            ngraph::insert_new_node_between(sequenceOp, seqOut, reshape2);
+        }
+
+        sequenceOp->get_rt_info()["seqAxis"] = std::make_shared<ngraph::VariantWrapper<int64_t>>(seqAxis);
+
+        return true;
+    }
+} // namespace
+
+MKLDNNPlugin::OptimizeGRUSequenceTransposes::OptimizeGRUSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto gruSequence = std::dynamic_pointer_cast<ngraph::op::v5::GRUSequence>(m.get_match_root());
+        if (!gruSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (gruSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(gruSequence);
+    };
+
+    auto gruSequenceNgraph = ngraph::pattern::wrap_type<ngraph::op::v5::GRUSequence>();
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(gruSequenceNgraph, "OptimizeGRUSequenceTransposes");
+    this->register_matcher(m, callback);
+}
+
+MKLDNNPlugin::OptimizeRNNSequenceTransposes::OptimizeRNNSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto rnnSequence = std::dynamic_pointer_cast<ngraph::op::v5::RNNSequence>(m.get_match_root());
+        if (!rnnSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (rnnSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(rnnSequence);
+    };
+
+    auto rnnSequenceNgraph = ngraph::pattern::wrap_type<ngraph::op::v5::RNNSequence>();
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(rnnSequenceNgraph, "OptimizeRNNSequenceTransposes");
+    this->register_matcher(m, callback);
+}
+
+MKLDNNPlugin::OptimizeLSTMSequenceTransposes::OptimizeLSTMSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto lstmSequence = std::dynamic_pointer_cast<ngraph::op::v5::LSTMSequence>(m.get_match_root());
+        if (!lstmSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (lstmSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(lstmSequence);
+    };
+
+    auto lstmSequenceNgraph_0 = ngraph::pattern::wrap_type<ngraph::op::v0::LSTMSequence>();
+    auto lstmSequenceNgraph_5 = ngraph::pattern::wrap_type<ngraph::op::v5::LSTMSequence>();
+    const auto lstmSeqInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{lstmSequenceNgraph_0, lstmSequenceNgraph_5});
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(lstmSeqInputs, "OptimizeLSTMSequenceTransposes");
+
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
@ -0,0 +1,29 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class OptimizeGRUSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeGRUSequenceTransposes();
+};
+
+class OptimizeLSTMSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeLSTMSequenceTransposes();
+};
+
+class OptimizeRNNSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeRNNSequenceTransposes();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
@ -1,55 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include "argmax_imp.hpp"
-
-#include <string>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class ArgMaxImpl: public ExtLayerBase {
-public:
-    explicit ArgMaxImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            conf.out_max_val_ = layer->GetParamAsBool("out_max_val", false);
-            conf.top_k_       = layer->GetParamAsInt("top_k");
-
-            conf.has_axis_ = (layer->params.find("axis") != layer->params.end());
-            conf.axis_index_ = conf.has_axis_ ?
-                                std::stoi(layer->params.at("axis")) :0;
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        SizeVector in_dims = inputs[0]->getTensorDesc().getDims();
-
-        float* src_data = inputs[0]->buffer();
-        float* dst_data = outputs[0]->buffer();
-
-        XARCH::arg_max_execute(src_data, dst_data, in_dims, conf);
-        return OK;
-    }
-
-private:
-    argmax_conf conf;
-};
-
-REG_FACTORY_FOR(ArgMaxImpl, ArgMax);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
@ -1,417 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "argmax_imp.hpp"
-
-#include <cstring>
-#include <algorithm>
-#include <string>
-#include <vector>
-#include <cmath>
-#include <utility>
-#include <functional>
-#include <ie_parallel.hpp>
-#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
-#include <immintrin.h>
-#include "nodes/common/uni_simd.h"
-#endif
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-namespace XARCH {
-
-using Shape = std::vector<size_t>;
-
-#if defined(HAVE_AVX512F)
-    constexpr int count_vec = 32;
-#elif defined(HAVE_SSE) || defined(HAVE_AVX2)
-    constexpr int count_vec = 16;
-#endif
-
-inline int count(Shape dims, size_t start_ind, size_t end_ind) {
-    size_t count = 1;
-    for (size_t i = start_ind; i < end_ind; i++)
-        count *= dims[i];
-    return static_cast<int>(count);
-}
-
-inline int count(Shape dims, size_t start_ind = 0) {
-    return count(dims, start_ind, dims.size());
-}
-
-template <bool out_max_val>
-void argmax_one_class_has_axis(const float* src_data, float* dst_data, Shape in_dims, argmax_conf& conf) {
-    const auto axis_index_ = conf.axis_index_;
-    int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast<int>(in_dims.size()) : axis_index_;
-    const int dim = static_cast<int>(in_dims[axis_]);
-    int before_num = count(in_dims, 0, axis_);
-    int after_num = count(in_dims, axis_ + 1, in_dims.size());
-    int first_index = 0;
-#if defined(HAVE_AVX512F)
-    const int block_size = 16;
-    typedef __m512 vec_type_f;
-    typedef __m512i vec_type_i;
-    typedef __mmask16 vmask_type;
-#elif defined(HAVE_AVX2)
-    const int block_size = 8;
-    typedef __m256 vec_type_f;
-    typedef __m256i vec_type_i;
-    typedef __m256 vmask_type;
-#elif defined(HAVE_SSE)
-    const int block_size = 4;
-    typedef __m128 vec_type_f;
-    typedef __m128i vec_type_i;
-    typedef __m128 vmask_type;
-#endif
-
-#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
-    parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) {
-        int s_index = i0 * dim * after_num + ib1 * block_size;
-        vec_type_f vmax_val = _mm_uni_loadu_ps(src_data + s_index);
-        vec_type_i vindex_max_val = _mm_uni_setzero_si();
-        for (int i2 = 1; i2 < dim; i2++) {
-            s_index += after_num;
-            vec_type_f vsrc = _mm_uni_loadu_ps(src_data + s_index);
-            vmask_type vmask = _mm_uni_cmpgt_ps(vsrc, vmax_val);
-            vmax_val = _mm_uni_blendv_ps(vmax_val, vsrc, vmask);
-            if (!out_max_val) {
-                vec_type_i vindex_cur_val = _mm_uni_set1_epi32(i2);
-#if defined(HAVE_AVX512F)
-                vindex_max_val = _mm512_mask_blend_epi32(vmask, vindex_max_val, vindex_cur_val);
-#else
-                vindex_max_val = _mm_uni_blendv_epi8(vindex_max_val, vindex_cur_val, _mm_uni_castps_si(vmask));
-#endif
-            }
-        }
-        if (!out_max_val) {
-            vec_type_f vindex_max_val_fp32 = _mm_uni_cvtepi32_ps(vindex_max_val);
-            _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vindex_max_val_fp32);
-        } else {
-            _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vmax_val);
-        }
-    });
-    first_index = after_num / block_size * block_size;
-#endif
-    int rest = after_num - first_index;
-    parallel_for2d(before_num, rest, [&](int i0, int i1) {
-        int index_max_val = 0;
-        int s_index = i0 * dim * after_num + first_index + i1;
-        float max_val = src_data[s_index];
-        for (int i2 = 1; i2 < dim; i2++) {
-            s_index += after_num;
-            if (src_data[s_index] > max_val) {
-                max_val = src_data[s_index];
-                if (!out_max_val) {
-                    index_max_val = i2;
-                }
-            }
-        }
-        if (!out_max_val)
-            dst_data[i0 * after_num + first_index + i1] = static_cast<float>(index_max_val);
-        else
-            dst_data[i0 * after_num + first_index + i1] = max_val;
-    });
-}
-
-template <bool out_max_val>
-void argmax_one_class(const float* src_data, float* dst_data, Shape in_dims) {
-    const int dim = count(in_dims, 1);
-    int before_num = in_dims[0];
-    parallel_for(before_num, [&](int i0) {
-        int index_max_val = 0;
-        int s_index = i0 * dim;
-        float max_val = src_data[s_index];
-        for (int i1 = 1; i1 < dim; i1++) {
-            s_index++;
-            if (src_data[s_index] > max_val) {
-                max_val = src_data[s_index];
-                index_max_val = i1;
-            }
-        }
-        if (!out_max_val) {
-            dst_data[i0] = static_cast<float>(index_max_val);
-        } else {
-            dst_data[i0 * 2] = static_cast<float>(index_max_val);
-            dst_data[i0 * 2 + 1] = max_val;
-        }
-    });
-}
-
-template <bool out_max_val>
-void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape in_dims, argmax_conf& conf) {
-    const auto axis_index_ = conf.axis_index_;
-    const auto top_k_ = conf.top_k_;
-    int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast<int>(in_dims.size()) : axis_index_;
-    const int dim = static_cast<int>(in_dims[axis_]);
-    int before_num = count(in_dims, 0, axis_);
-    int after_num = count(in_dims, axis_ + 1, in_dims.size());
-    int first_index = 0;
-#if defined(HAVE_AVX512F)
-    const int block_size = 16;
-    typedef __m512 vec_type_f;
-    typedef __m512i vec_type_i;
-    typedef __mmask16 vmask_type;
-#elif defined(HAVE_AVX2)
-    const int block_size = 8;
-    typedef __m256 vec_type_f;
-    typedef __m256i vec_type_i;
-    typedef __m256 vmask_type;
-#elif defined(HAVE_SSE)
-    const int block_size = 4;
-    typedef __m128 vec_type_f;
-    typedef __m128i vec_type_i;
-    typedef __m128 vmask_type;
-#endif
-
-#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
-    if (top_k_ < count_vec) {
-        parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) {
-#if defined(HAVE_AVX512F)
-            const int N = 32;
-            vec_type_f vmax_values[N];
-            vec_type_i vmax_indexes[N];
-#else
-            const int N = 16;
-            vec_type_f vmax_values[N];
-            vec_type_i vmax_indexes[N];
-#endif
-            vec_type_f vtmp;
-            vec_type_i vtmp_indexes;
-            vmask_type vmask;
-            int s_index = i0 * dim * after_num + ib1 * block_size;
-
-            auto vswap_func = [&](int index1, int index2) {
-                vtmp = vmax_values[index1];
-                vmax_values[index1] = _mm_uni_blendv_ps(vmax_values[index1], vmax_values[index2], vmask);
-                vmax_values[index2] = _mm_uni_blendv_ps(vmax_values[index2], vtmp, vmask);
-                if (!out_max_val) {
-                    vtmp_indexes = vmax_indexes[index1];
-#if defined(HAVE_AVX512F)
-                    vmax_indexes[index1] = _mm512_mask_blend_epi32(vmask, vmax_indexes[index1], vmax_indexes[index2]);
-                    vmax_indexes[index2] = _mm512_mask_blend_epi32(vmask, vmax_indexes[index2], vtmp_indexes);
-#else
-                    vmax_indexes[index1] = _mm_uni_blendv_epi8(vmax_indexes[index1], vmax_indexes[index2], _mm_uni_castps_si(vmask));
-                    vmax_indexes[index2] = _mm_uni_blendv_epi8(vmax_indexes[index2], vtmp_indexes, _mm_uni_castps_si(vmask));
-#endif
-                }
-            };
-
-            for (int i2 = 0; i2 < top_k_; i2++) {
-                vmax_values[i2] = _mm_uni_loadu_ps(src_data + s_index);
-                if (!out_max_val) {
-                    vmax_indexes[i2] = _mm_uni_set1_epi32(i2);
-                }
-                s_index += after_num;
-            }
-            for (int i2 = 0; i2 < top_k_ - 1; i2++) {
-                for (int i3 = top_k_ - 1; i3 > i2; i3--) {
-                    vmask = _mm_uni_cmpgt_ps(vmax_values[i3], vmax_values[i3 - 1]);
-#if defined(HAVE_AVX512F)
-                    if (vmask) {
-                        vswap_func(i3, i3 - 1);
-                    }
-#else
-                    int swap = _mm_uni_movemask_ps(vmask);
-                    if (swap) {
-                        vswap_func(i3, i3 - 1);
-                    }
-#endif
-                }
-            }
-            for (int i2 = top_k_; i2 < dim; i2++) {
-                vmax_values[top_k_] = _mm_uni_loadu_ps(src_data + s_index);
-                if (!out_max_val) {
-                    vmax_indexes[top_k_] = _mm_uni_set1_epi32(i2);
-                }
-                for (int i3 = top_k_; i3 > 0; i3--) {
-                    vmask = _mm_uni_cmpgt_ps(vmax_values[i3], vmax_values[i3 - 1]);
-#if defined(HAVE_AVX512F)
-                    if (vmask) {
-                        vswap_func(i3, i3 - 1);
-                    } else {
-                        break;
-                    }
-#else
-                    int swap = _mm_uni_movemask_ps(vmask);
-                    if (swap) {
-                        vswap_func(i3, i3 - 1);
-                    } else {
-                        break;
-                    }
-#endif
-                }
-                s_index += after_num;
-            }
-            for (int i2 = 0; i2 < top_k_; i2++) {
-                if (!out_max_val) {
-                    _mm_uni_storeu_ps(dst_data + (i0 * top_k_ + i2) * after_num + ib1 * block_size,
-                                  _mm_uni_cvtepi32_ps(vmax_indexes[i2]));
-                } else {
-                    _mm_uni_storeu_ps(dst_data + (i0 * top_k_ + i2) * after_num + ib1 * block_size, vmax_values[i2]);
-                }
-            }
-        });
-        first_index = after_num / block_size * block_size;
-    }
-#endif
-    int rest = after_num - first_index;
-    parallel_for2d(before_num, rest, [&](int i0, int i1) {
-        std::vector<float> max_values(top_k_ + 1);
-        std::vector<int> max_indexes(top_k_ + 1);
-        float tmp_value;
-        int tmp_index;
-        int s_index = i0 * dim * after_num + first_index + i1;
-
-        auto swap_func = [&](int index1, int index2) {
-            tmp_value = max_values[index1];
-            max_values[index1] = max_values[index2];
-            max_values[index2] = tmp_value;
-            if (!out_max_val) {
-                tmp_index = max_indexes[index1];
-                max_indexes[index1] = max_indexes[index2];
-                max_indexes[index2] = tmp_index;
-            }
-        };
-
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            max_values[i2] = src_data[s_index];
-            if (!out_max_val) {
-                max_indexes[i2] = i2;
-            }
-            s_index += after_num;
-        }
-        for (int i2 = 0; i2 < top_k_ - 1; i2++) {
-            for (int i3 = top_k_ - 1; i3 > i2; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                }
-            }
-        }
-        for (int i2 = top_k_; i2 < dim; i2++) {
-            max_values[top_k_] = src_data[s_index];
-            if (!out_max_val) {
-                max_indexes[top_k_] = i2;
-            }
-            for (int i3 = top_k_; i3 > 0; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                } else {
-                    break;
-                }
-            }
-            s_index += after_num;
-        }
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            if (!out_max_val) {
-                dst_data[i0 * top_k_ * after_num + i2 * after_num + first_index + i1] = static_cast<float>(max_indexes[i2]);
-            } else {
-                dst_data[i0 * top_k_ * after_num + i2 * after_num + first_index + i1] = max_values[i2];
-            }
-        }
-    });
-}
-
-template <bool out_max_val>
-void argmax_many_classes(const float* src_data, float* dst_data, Shape in_dims, argmax_conf& conf) {
-    const int dim = count(in_dims, 1);
-    auto top_k_ = conf.top_k_;
-    int before_num = in_dims[0];
-    parallel_for(before_num, [&](int i0) {
-        std::vector<float> max_values(top_k_ + 1);
-        std::vector<int> max_indexes(top_k_ + 1);
-        float tmp_value;
-        int tmp_index;
-        int s_index = i0 * dim;
-
-        auto swap_func = [&](int index1, int index2) {
-            tmp_value = max_values[index1];
-            max_values[index1] = max_values[index2];
-            max_values[index2] = tmp_value;
-
-            tmp_index = max_indexes[index1];
-            max_indexes[index1] = max_indexes[index2];
-            max_indexes[index2] = tmp_index;
-        };
-
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            max_values[i2] = src_data[s_index];
-            max_indexes[i2] = i2;
-            s_index++;
-        }
-        for (int i2 = 0; i2 < top_k_ - 1; i2++) {
-            for (int i3 = top_k_ - 1; i3 > i2; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                }
-            }
-        }
-        for (int i2 = top_k_; i2 < dim; i2++) {
-            max_values[top_k_] = src_data[s_index];
-            max_indexes[top_k_] = i2;
-            for (int i3 = top_k_; i3 > 0; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                } else {
-                    break;
-                }
-            }
-            s_index++;
-        }
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            if (!out_max_val) {
-                dst_data[i0 * top_k_ + i2] = static_cast<float>(max_indexes[i2]);
-            } else {
-                dst_data[i0 * 2 * top_k_ + i2] = static_cast<float>(max_indexes[i2]);
-                dst_data[i0 * 2 * top_k_ + top_k_ + i2] = max_values[i2];
-            }
-        }
-    });
-}
-
-void arg_max_execute(const float* input, float *output, std::vector<size_t> dims, argmax_conf& conf) {
-    Shape in_dims = dims;
-
-    const float* src_data = input;
-    float* dst_data = output;
-
-    auto top_k_ = conf.top_k_;
-    auto has_axis_ = conf.has_axis_;
-    auto out_max_val_ = conf.out_max_val_;
-
-    if (top_k_ == 1) {
-        if (has_axis_) {
-            if (out_max_val_) {
-                argmax_one_class_has_axis<true>(src_data, dst_data, in_dims, conf);
-            } else {
-                argmax_one_class_has_axis<false>(src_data, dst_data, in_dims, conf);
-            }
-        } else {
-            if (out_max_val_) {
-                argmax_one_class<true>(src_data, dst_data, in_dims);
-            } else {
-                argmax_one_class<false>(src_data, dst_data, in_dims);
-            }
-        }
-    } else {
-        if (has_axis_) {
-            if (out_max_val_) {
-                argmax_many_classes_has_axis<true>(src_data, dst_data, in_dims, conf);
-            } else {
-                argmax_many_classes_has_axis<false>(src_data, dst_data, in_dims, conf);
-            }
-        } else {
-            if (out_max_val_) {
-                argmax_many_classes<true>(src_data, dst_data, in_dims, conf);
-            } else {
-                argmax_many_classes<false>(src_data, dst_data, in_dims, conf);
-            }
-        }
-    }
-}
-
-}  // namespace XARCH
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.hpp
@ -1,27 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <cstddef>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-struct argmax_conf {
-    bool out_max_val_;
-    int top_k_;
-    bool has_axis_;
-    int axis_index_;
-};
-
-namespace XARCH {
-
-void arg_max_execute(const float* inputs, float *outputs, std::vector<size_t> dims, argmax_conf& conf);
-
-}  // namespace XARCH
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
@ -5,8 +5,11 @@
 #pragma once

 #include <ie_iextension.h>
-#include <legacy/ie_util_internal.hpp>
 #include "nodes/list.hpp"
+#include "common/tensor_desc_creator.h"
+#include "ngraph/descriptor/tensor.hpp"
+#include <ie_ngraph_utils.hpp>
+#include "cpu_types.h"

 #include <string>
 #include <vector>
@ -53,99 +56,76 @@ public:
    }

 protected:
-    enum class ConfLayout { ANY, PLN, BLK8, BLK16 };
+    MKLDNNPlugin::Algorithm getAlgorithm() const {
+        return algorithm;
+    }
+    MKLDNNPlugin::Algorithm algorithm;

    class DataConfigurator {
    public:
-        explicit DataConfigurator(ConfLayout l):
-            layout(l) {}
+        DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, Precision prc = Precision::UNSPECIFIED, bool constant = false, int inplace = -1) :
+                tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), constant(constant), inplace(inplace) {}

-        DataConfigurator(ConfLayout l, bool constant, int inplace = -1, Precision::ePrecision prc = Precision::UNSPECIFIED):
-            layout(l), constant(constant), inplace(inplace), prc(prc) {}
+        DataConfigurator(const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr& tensorDescCreator, Precision prc = Precision::UNSPECIFIED,
+                bool constant = false, int inplace = -1) : tensorDescCreator(tensorDescCreator), prc(prc), constant(constant), inplace(inplace) {}

-        DataConfigurator(ConfLayout l, Precision::ePrecision prc):
-            layout(l), prc(prc) {}
-
-        ConfLayout layout;
-        bool constant = false;
-        int inplace = -1;
-        Precision::ePrecision prc = Precision::UNSPECIFIED;     // by default use the layer precision
+        const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator;
+        const bool constant = false;
+        const int inplace = -1;
+        const Precision prc = Precision::UNSPECIFIED; // By default ngraph node precision is used
+    private:
+        static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) {
+            auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
+            if (creators.find(tensorDescType) == creators.end()) {
+                IE_THROW() << "Cannot find tensor descriptor creator";
+            }
+            return creators.at(tensorDescType);
+        }
    };

-    void addConfig(const CNNLayer* layer, std::vector<DataConfigurator> in_l,
-            std::vector<DataConfigurator> out_l, bool dynBatchSupport = false) {
+    void addConfig(const std::shared_ptr<ngraph::Node>& op,
+                   const std::vector<DataConfigurator>& inDataConfigurators,
+                   const std::vector<DataConfigurator>& outDataConfigurators,
+                   bool dynBatchSupport = false) {
        LayerConfig config;

-        if (in_l.size() != layer->insData.size())
-            IE_THROW() << "Incorrect number of input edges for layer " << layer->name << ". Expected " << layer->insData.size()
-                << " but layout specification provided for " << in_l.size();
-        if (out_l.size() != layer->outData.size())
-            IE_THROW() << "Incorrect number of output edges for layer " << layer->name << ". Expected " << layer->outData.size()
-                << " but layout specification provided for " << out_l.size();
+        if (inDataConfigurators.size() != op->get_input_size())
+            IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of inputs: " <<
+                                  "expected: " << op->get_input_size() << ", provided: " << inDataConfigurators.size();
+        if (outDataConfigurators.size() != op->get_output_size())
+            IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of outputs: " <<
+                               "expected: " << op->get_output_size() << ", provided: " << outDataConfigurators.size();

-        // Fill tensor parameters into config
-        auto fill_port = [] (std::vector<DataConfig>& port, DataConfigurator conf, const DataPtr& data) {
-            auto div_up = [](const int a, const int b) -> int {
-                if (!b)
-                    return 0;
-                return (a + b - 1) / b;
-            };
-            if (!data) IE_THROW() << "Cannot get input data!";
+        auto fill_port = [] (const DataConfigurator& dataConfigurator, const ngraph::descriptor::Tensor& tensor, std::vector<DataConfig>& port) -> bool {
+            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator.
+            // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank.
+            if (tensor.get_shape().size() < dataConfigurator.tensorDescCreator->getMinimalRank())
+                return false;
+
+            auto precision = dataConfigurator.prc != Precision::UNSPECIFIED ? dataConfigurator.prc : details::convertPrecision(tensor.get_element_type());

            DataConfig dataConfig;
-            dataConfig.inPlace = conf.inplace;
-            dataConfig.constant = conf.constant;
+            dataConfig.inPlace = dataConfigurator.inplace;
+            dataConfig.constant = dataConfigurator.constant;
+            dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(precision, tensor.get_shape());

-            const TensorDesc& data_desc = data->getTensorDesc();
-            const SizeVector& data_dims = data_desc.getDims();
-
-            std::vector<size_t> blocks = data_dims;
-            std::vector<size_t> order(blocks.size());
-            for (size_t i = 0; i < order.size(); i++) order[i] = i;
-
-            const bool isInt8 = (data->getPrecision() == Precision::I8 || data->getPrecision() == Precision::U8);
-
-            if (conf.layout == ConfLayout::BLK8 || conf.layout == ConfLayout::BLK16) {
-                if (data_dims.size() < 4 || data_dims.size() > 5)
-                    IE_THROW() << "Inapplicable blocking layout."
-                        << "Tensor should be 4D or 5D.";
-
-                int blk_size = conf.layout == ConfLayout::BLK8 ? 8 : 16;
-
-                // Blocking through Channel dimension. Like [nChwXc]
-                order.push_back(1);
-                blocks[1] = div_up(blocks[1], blk_size);
-                blocks.push_back(blk_size);
-            } else if (isInt8) {
-                if (data_dims.size() == 4) {
-                    order = {0, 2, 3, 1};
-                    blocks = {data_dims[0], data_dims[2], data_dims[3], data_dims[1]};
-                } else if (data_dims.size() == 5) {
-                    order = {0, 2, 3, 4, 1};
-                    blocks = {data_dims[0], data_dims[2], data_dims[3], data_dims[4], data_dims[1]};
-                }  // all over keep original plain format
-
-                conf.layout = ConfLayout::PLN;
-            }
-
-            InferenceEngine::Precision precision = (conf.prc == Precision::UNSPECIFIED) ? data_desc.getPrecision() : Precision(conf.prc);
-            if (conf.layout == ConfLayout::ANY) {
-                dataConfig.desc = TensorDesc(precision, data_dims, InferenceEngine::Layout::ANY);
-            } else {
-                dataConfig.desc = TensorDesc(precision, data_dims, {blocks, order});
-            }
            port.push_back(dataConfig);
+
+            return true;
        };

-        for (size_t i = 0; i < in_l.size(); i++)
-            fill_port(config.inConfs, in_l[i], layer->insData[i].lock());
+        for (size_t i = 0; i < inDataConfigurators.size(); i++)
+            if (!fill_port(inDataConfigurators[i], op->get_input_tensor(i), config.inConfs))
+                return;

-        for (size_t i = 0; i < out_l.size(); i++)
-            fill_port(config.outConfs, out_l[i], layer->outData[i]);
+        for (size_t i = 0; i < outDataConfigurators.size(); i++)
+            if (!fill_port(outDataConfigurators[i], op->get_output_tensor(i), config.outConfs))
+                return;

        config.dynBatchSupport = dynBatchSupport;
        confs.push_back(config);
    }
+
    std::string errorMsg;
    std::vector<LayerConfig> confs;
 };
@ -153,20 +133,22 @@ protected:
 template <class IMPL>
 class ImplFactory : public ILayerImplFactory {
 public:
-    explicit ImplFactory(const CNNLayer *layer) {
-        cnnLayer = InferenceEngine::clonelayer(*layer);
-        cnnLayer->_fusedWith = layer->_fusedWith;
-        cnnLayer->insData = layer->insData;
-        cnnLayer->outData = layer->outData;
-    }
+    explicit ImplFactory(const std::shared_ptr<ngraph::Node>& op) : ngraphOp(op) {}

    // First implementation has more priority than next
    StatusCode getImplementations(std::vector<ILayerImpl::Ptr>& impls, ResponseDesc *resp) noexcept override {
-        impls.push_back(ILayerImpl::Ptr(new IMPL(cnnLayer.get())));
+        try {
+            impls.push_back(ILayerImpl::Ptr(new IMPL(ngraphOp)));
+        } catch (const InferenceEngine::Exception& ex) {
+            strncpy(resp->msg, ex.what(), sizeof(resp->msg) - 1);
+            IE_SUPPRESS_DEPRECATED_START
+            return ex.getStatus() != OK ? ex.getStatus() : GENERAL_ERROR;
+            IE_SUPPRESS_DEPRECATED_END
+        }
        return OK;
    }
 protected:
-    InferenceEngine::CNNLayerPtr cnnLayer;
+    const std::shared_ptr<ngraph::Node> ngraphOp;
 };

 #define REG_FACTORY_FOR(__prim, __type) \
--- a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
@ -1,244 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include "ie_parallel.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <set>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class BatchToSpaceImpl: public ExtLayerBase {
-public:
-    explicit BatchToSpaceImpl(const CNNLayer *layer) {
-        try {
-            const auto batchToSpaceLayer = dynamic_cast<const BatchToSpaceLayer*>(layer);
-            if (!batchToSpaceLayer)
-                IE_THROW() << "BatchToSpace layer with name '" << layer->name << "' isn't instance of BatchToSpaceLayer class";
-
-            if (batchToSpaceLayer->insData.size() != 4)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has incorrect number of input edges";
-
-            if (batchToSpaceLayer->outData.size() != 1)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has incorrect number of output edges";
-
-            auto data = batchToSpaceLayer->insData[0].lock();
-            if (!data)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has nullable input data";
-
-            inDims = data->getTensorDesc().getDims();
-            if (inDims.size() < 4)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' doesn't support dimensions with rank less than 4";
-
-            if (inDims.size() > 5)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' doesn't support dimensions with rank greater than 5";
-
-            outDims = batchToSpaceLayer->outData[0]->getTensorDesc().getDims();
-            if (inDims.size() != outDims.size())
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has incorrect number of input/output dimensions";
-
-            const auto precision = data->getTensorDesc().getPrecision();
-            const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
-            if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has unsupported precision: " << precision.name();
-
-            blockShapeIn = batchToSpaceLayer->_block_shape;
-            cropsBeginIn = batchToSpaceLayer->_crops_begin;
-
-            auto createConfig = [&](Layout layout) {
-                LayerConfig config;
-                // TODO: remove Const layers
-                for (int i = 0; i < batchToSpaceLayer->insData.size(); i++) {
-                    auto inData = batchToSpaceLayer->insData[i].lock();
-                    if (!inData)
-                        IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has nullable input data";
-                    DataConfig inConfig;
-                    if (i == 0)
-                        inConfig.desc = TensorDesc(precision, inData->getTensorDesc().getDims(), layout);
-                    else
-                        inConfig.desc = TensorDesc(inData->getPrecision(), inData->getTensorDesc().getDims(), inData->getTensorDesc().getLayout());
-                    config.inConfs.push_back(inConfig);
-                }
-
-                DataConfig outConfig;
-                outConfig.desc = TensorDesc(precision, outDims, layout);
-                config.outConfs.push_back(outConfig);
-
-                config.dynBatchSupport = false;
-                confs.push_back(config);
-            };
-
-            createConfig(inDims.size() == 4 ? NHWC : NDHWC);
-            createConfig(TensorDesc::getLayoutByDims(inDims));
-
-            std::vector<std::pair<ConfLayout, ConfLayout>>  blockConfs { };
-            if (inDims[1] % 8 == 0)  blockConfs.push_back({ConfLayout::BLK8, ConfLayout::BLK8});
-            if (inDims[1] % 16 == 0) blockConfs.push_back({ConfLayout::BLK16, ConfLayout::BLK16});
-            for (auto conf : blockConfs) {
-                addConfig(layer, {DataConfigurator(conf.first, precision),
-                                  DataConfigurator(ConfLayout::PLN, batchToSpaceLayer->insData[1].lock()->getPrecision()),
-                                  DataConfigurator(ConfLayout::PLN, batchToSpaceLayer->insData[2].lock()->getPrecision()),
-                                  DataConfigurator(ConfLayout::PLN, batchToSpaceLayer->insData[3].lock()->getPrecision())},
-                          {DataConfigurator(conf.second, precision)});
-            }
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-    StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs, ResponseDesc *resp) noexcept override {
-        switch (inputs[0]->getTensorDesc().getPrecision().size()) {
-            case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type> (inputs, outputs);  break;
-            case 2: batchToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(inputs, outputs); break;
-            case 4: batchToSpaceKernel<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs); break;
-            default: {
-                if (resp) {
-                    std::string errorMsg = "BatchToSpace layer does not support precision '"
-                                           + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                    return GENERAL_ERROR;
-                }
-            }
-        }
-        return OK;
-    }
-
-private:
-    std::vector<size_t> getShape5D(const SizeVector &shape) {
-        std::vector<size_t> shape5D(5, 1);
-        for (int i = 0; i < 2; i++) {
-            shape5D[i] = shape[i];
-            shape5D[4 - i] = shape[shape.size() - 1 - i];
-        }
-        shape5D[2] = shape.size() == 5 ? shape[2] : shape5D[2];
-        return shape5D;
-    }
-
-    template<typename T>
-    void batchToSpaceKernel(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs) noexcept {
-        const T *srcData = inputs[0]->cbuffer().as<const T *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        T *dstData = outputs[0]->buffer().as<T *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const auto layout = inputs[0]->getTensorDesc().getLayout();
-        const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
-        const auto dimsSize = inDims.size();
-
-        auto inShape5D  = getShape5D(inDims);
-        auto outShape5D = getShape5D(outDims);
-        auto blockShape = getShape5D(blockShapeIn);
-
-        if (layout == NHWC || layout == NDHWC) {
-            inShape5D.push_back(inShape5D[1]);
-            inShape5D.erase(inShape5D.begin() + 1);
-            outShape5D.push_back(outShape5D[1]);
-            outShape5D.erase(outShape5D.begin() + 1);
-            blockShape.push_back(blockShape[1]);
-            blockShape.erase(blockShape.begin() + 1);
-        }
-
-        const size_t blockSize = blocked ? outputs[0]->getTensorDesc().getBlockingDesc().getBlockDims().back() : 1lu;
-        const size_t blockCountInput = inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1];
-        const size_t blockCountOutput = outputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1];
-        const auto blockRemainder = inShape5D[1] % blockSize;
-        const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
-
-        const size_t inSpatialStep = inShape5D[2] * inShape5D[3] * inShape5D[4];
-        const size_t inBatchStep = (blocked ? blockSize * blockCountInput : inShape5D[1]) * inSpatialStep;
-
-        const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
-        const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
-
-        size_t channels = (inShape5D[1] / blockSize);
-        channels = channels == 0 ? 1 : channels;
-        const size_t workAmount = inShape5D[0] * channels;
-
-        parallel_nt(0, [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            std::vector<size_t> indxStart(2, 0);
-            std::vector<size_t> indxEnd(2, 0);
-            parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
-            parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
-            std::vector<int64_t> oAdd(5, 1);
-            std::vector<size_t> begin(5, 0);
-            std::vector<size_t> finish(5, 1);
-            for (size_t i0 = indxStart[0]; i0 < indxEnd[0] + 1; ++i0) {
-                int64_t bIdx = i0 / outShape5D[0];
-                const size_t srcIdx0 = i0 * inBatchStep;
-                const size_t dstIdx0 = (i0 - (bIdx * outShape5D[0])) * outBatchStep;
-                oAdd[4] = bIdx % blockShapeIn[dimsSize - 1] - cropsBeginIn[dimsSize - 1];
-                bIdx /= blockShapeIn[dimsSize - 1];
-                oAdd[3] = bIdx % blockShapeIn[dimsSize - 2] - cropsBeginIn[dimsSize - 2];
-                bIdx /= blockShapeIn[dimsSize - 2];
-                oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu;
-                bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
-                oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1];
-                if (layout == NHWC || layout == NDHWC) {
-                    oAdd.push_back(oAdd[1]);
-                    oAdd.erase(oAdd.begin() + 1);
-                }
-                begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
-                finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
-                begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
-                finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
-                begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
-                finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
-                begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
-                finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
-                const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
-                const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
-                indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
-                const size_t lastI1 = i0 == indxEnd[0] ? (indxEnd[1] > finish[1] ? finish[1] : indxEnd[1]) : finish[1];
-                for (; indxStart[1] < lastI1 + 1; ++indxStart[1]) {
-                    const size_t block = indxStart[1] == finish[1] ? lastBlock : blockSize;
-                    const int64_t tmpOC = indxStart[1] * blockShape[1] + addTmpOC;
-                    const size_t srcIdx1 = srcIdx0 + indxStart[1] * inSpatialStep * blockSize;
-                    const size_t dstIdx1 = dstIdx0 + tmpOC * outSpatialStep * blockSize;
-                    const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
-                    for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
-                        const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
-                        const size_t srcIdx2 = srcIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
-                        const size_t dstIdx2 = dstIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
-                        for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
-                            const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
-                            const size_t srcIdx3 = srcIdx2 + i3 * inShape5D[4] * blockSize;
-                            const size_t dstIdx3 = dstIdx2 + tmpOh * outShape5D[4] * blockSize;
-                            for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
-                                const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
-                                const size_t srcIdx4 = srcIdx3 + i4 * blockSize;
-                                const size_t dstIdx4 = dstIdx3 + tmpOw * blockSize;
-                                for (size_t it = 0; it < itEnd + 1; ++it) {
-                                    const size_t i5Begin = it == 0 ? 0 : (it * blockSize - 1 - oAdd[1]) / blockShape[1] + 1;
-                                    const size_t i5End = it == itEnd ? (block - 1) : ((it + 1) * blockSize - 1 - oAdd[1]) / blockShape[1];
-                                    for (size_t i5 = i5Begin; i5 < i5End + 1; ++i5) {
-                                        const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
-                                        const size_t srcIdx5 = srcIdx4 + i5;
-                                        const size_t dstIdx5 = dstIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
-                                        dstData[dstIdx5] = srcData[srcIdx5];
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-                indxStart[1] = 0lu;
-            }
-        });
-    }
-
-    SizeVector inDims;
-    SizeVector outDims;
-    std::vector<size_t> blockShapeIn;
-    std::vector<size_t> cropsBeginIn;
-};
-
-REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
@ -1,135 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class BroadcastImpl: public ExtLayerBase {
-public:
-    explicit BroadcastImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            if (layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            SizeVector shape_dims = layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getDims();
-            if (shape_dims.size() > 1)
-                IE_THROW() << layer->name << " Shape vector should be 1 dimension";
-
-            LayerConfig config;
-            DataConfig dataConfig, shapeConfig;
-            Precision dataPrecision = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision();
-            const SizeVector& data_dims = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getDims();
-            dataConfig.desc = TensorDesc(dataPrecision, data_dims,
-                                         layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getLayout());
-            config.inConfs.push_back(dataConfig);
-            shapeConfig.desc = TensorDesc(layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getPrecision(),
-                                          shape_dims, TensorDesc::getLayoutByDims(shape_dims));
-            config.inConfs.push_back(shapeConfig);
-
-            DataConfig outConfig;
-            const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
-            outConfig.desc = TensorDesc(dataPrecision, out_dims, layer->outData[0]->getTensorDesc().getLayout());
-            config.outConfs.push_back(outConfig);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        size_t shape_size = (inputs[BROADCAST_SHAPE]->getTensorDesc().getDims())[0];
-        SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
-        SizeVector src_dims = inputs[BROADCAST_INPUT]->getTensorDesc().getDims();
-        SizeVector srcStrides = inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getStrides();
-        size_t data_size = inputs[BROADCAST_INPUT]->getTensorDesc().getPrecision().size();
-
-        if (!src_dims.size())
-            src_dims = SizeVector(1, 1);
-        if (!srcStrides.size())
-            srcStrides = SizeVector(1, 1);
-
-        if (dst_dims.size() != shape_size) {
-            if (resp) {
-                std::string errorMsg = "Output tensor dimension mismatch";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return PARAMETER_MISMATCH;
-        }
-
-        if (src_dims.size() > dst_dims.size()) {
-            if (resp) {
-                std::string errorMsg = "Output tensor dimension is smaller then input tensor dimension";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return PARAMETER_MISMATCH;
-        }
-
-        InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides();
-        InferenceEngine::SizeVector src_aligned(dst_dims.size());
-        InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
-        size_t prefix_size = dst_dims.size() - src_dims.size();
-        for (size_t i = 0; i < dst_dims.size(); i++) {
-            if (i < prefix_size) {
-                src_aligned[i] = 1;
-                srcStrides_aligned[i] = srcStrides[0];
-            } else {
-                src_aligned[i] = src_dims[i - prefix_size];
-                srcStrides_aligned[i] = srcStrides[i - prefix_size];
-            }
-        }
-
-        size_t work_amount_dst = dstStrides[0] * dst_dims[0];
-        const uint8_t *src_data = inputs[BROADCAST_INPUT]->cbuffer().as<const uint8_t *>() +
-                                inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        uint8_t* dst_data = outputs[0]->cbuffer().as<uint8_t *>() +
-                          outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        parallel_nt(0, [&](const int ithr, const int nthr) {
-            size_t i, src_idx, start = 0, end = 0;
-            SizeVector counters(dst_dims.size(), 0);
-            splitter(work_amount_dst, nthr, ithr, start, end);
-            for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-                counters[j] = i % dst_dims[j];
-                i /= dst_dims[j];
-            }
-            for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
-                for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                    src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-                cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
-
-                for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                    counters[j] = (counters[j] + 1) % dst_dims[j];
-                    if (counters[j] != 0) break;
-                }
-            }
-        });
-
-        return OK;
-    }
-
-private:
-    const size_t BROADCAST_INPUT = 0;
-    const size_t BROADCAST_SHAPE = 1;
-};
-
-REG_FACTORY_FOR(BroadcastImpl, Broadcast);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
@ -12,55 +12,72 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

 class BucketizeImpl : public ExtLayerBase {
-public:
-    explicit BucketizeImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            if (layer->insData.size() != 2 || layer->outData.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
+            if (!bucketsize) {
+                errorMessage = "Only opset3 Bucketize operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
+
+    std::string errorPrefix;
+
+public:
+    explicit BucketizeImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            errorPrefix = "Bucketize layer with name '" + op->get_friendly_name() + "' ";
+            const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
+
+            if (op->get_input_size() != 2 || op->get_output_size() != 1) {
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
            }

            // check one attribute
-            with_right = layer->GetParamAsBool("with_right_bound");
-
-            auto input = layer->insData[INPUT_TENSOR_PORT].lock();
-            if (!input) {
-                IE_THROW() << "Missing input for " << layer->name << " layer";
-            }
-            auto boundaries = layer->insData[INPUT_BINS_PORT].lock();
-            if (!boundaries) {
-                IE_THROW() << "Missing boundaries input for " << layer->name << " layer";
-            }
+            with_right = bucketsize->get_with_right_bound();

            // check precisions for input and output tensors
-            input_precision = input->getTensorDesc().getPrecision();
+            input_precision = details::convertPrecision(op->get_input_element_type(INPUT_TENSOR_PORT));
            if (input_precision != Precision::FP32 && input_precision != Precision::I32 &&
                input_precision != Precision::I64) {
                input_precision = Precision::FP32;
            }
-            boundaries_precision = boundaries->getTensorDesc().getPrecision();
+            boundaries_precision = details::convertPrecision(op->get_input_element_type(INPUT_BINS_PORT));
            if (boundaries_precision != Precision::FP32 && boundaries_precision != Precision::I32 &&
                boundaries_precision != Precision::I64) {
                boundaries_precision = Precision::FP32;
            }
-            output_precision = layer->outData[OUTPUT_TENSOR_PORT]->getTensorDesc().getPrecision();
+            output_precision = details::convertPrecision(op->get_output_element_type(OUTPUT_TENSOR_PORT));
            if (output_precision != Precision::I32 && output_precision != Precision::I64) {
                output_precision = Precision::I32;
            }

            // check dimensions of input tensors
-            SizeVector input_tensor_dims = input->getTensorDesc().getDims();
+            SizeVector input_tensor_dims = op->get_input_shape(INPUT_TENSOR_PORT);
            if (input_tensor_dims.size() < 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions of the input.";
+                IE_THROW() << errorPrefix << " has incorrect dimensions of the input.";
            }
-            SizeVector input_bin_dims = boundaries->getTensorDesc().getDims();
+            SizeVector input_bin_dims = op->get_input_shape(INPUT_BINS_PORT);
            if (input_bin_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions of the boundaries tensor.";
+                IE_THROW() << errorPrefix << " has incorrect dimensions of the boundaries tensor.";
            }
            if (input_bin_dims[0] != 0) {
                with_bins = true;
@ -69,9 +86,9 @@ public:

            num_values = std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), 1, std::multiplies<size_t>());

-            addConfig(layer,
-            { DataConfigurator(ConfLayout::PLN, input_precision), DataConfigurator(ConfLayout::PLN, boundaries_precision) },
-            { DataConfigurator(ConfLayout::PLN, output_precision) });
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, input_precision},
+                           {TensorDescCreatorTypes::ncsp, boundaries_precision}},
+                          {{TensorDescCreatorTypes::ncsp, output_precision}});
        }
        catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp
@ -4,45 +4,68 @@

 #include "base.hpp"
 #include "ie_parallel.hpp"
+#include <ngraph/op/ctc_greedy_decoder.hpp>
+#include <nodes/common/tensor_desc_creator.h>

-#include <vector>
 #include <string>
+#include <vector>

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CTCGreedyDecoderImpl: public ExtLayerBase {
 public:
-    explicit CTCGreedyDecoderImpl(const CNNLayer* layer) : mergeRepeated_(true) {
-        std::string errPrefix = "CTCGreedyDecoder layer with name '" + layer->name + "' ";
-        if (layer->insData.size() != 2)
-            IE_THROW() << errPrefix << "has invalid number of input edges: " << layer->insData.size();
-        if (layer->outData.size() != 1)
-            IE_THROW() << errPrefix << "has invalid number of outputs edges: " << layer->outData.size();
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
+            if (!greedyDecOp) {
+                errorMessage = "Node is not an instance of the CTCGreedyDecoder operation from operation set v0.";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }

-        auto inData = layer->insData[DATA_INDEX].lock();
-        auto sequenceLenData = layer->insData[SEQUENCE_LENGTH_INDEX].lock();
-        if (!inData || !sequenceLenData)
-            IE_THROW() << errPrefix << "has nullable inputs.";
-        if (inData->getTensorDesc().getDims()[0] != sequenceLenData->getTensorDesc().getDims()[0] &&
-                inData->getTensorDesc().getDims()[1] != sequenceLenData->getTensorDesc().getDims()[1])
-            IE_THROW() << errPrefix << "has invalid input shapes.";
-        if (inData->getTensorDesc().getPrecision() != Precision::FP32 &&
-                inData->getTensorDesc().getPrecision() != Precision::BF16)
-            IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inData->getTensorDesc().getPrecision();
-        if (sequenceLenData->getTensorDesc().getPrecision() != Precision::FP32 &&
-                inData->getTensorDesc().getPrecision() != Precision::BF16)
-            IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << sequenceLenData->getTensorDesc().getPrecision();
+        return true;
+    }

-        std::vector<DataConfigurator> inputConfigs{{ConfLayout::PLN, Precision::FP32}, {ConfLayout::PLN, Precision::FP32}};
-        std::vector<DataConfigurator> outputConfigs{{ConfLayout::PLN, Precision::FP32}};
-        addConfig(layer, inputConfigs, outputConfigs);
+    explicit CTCGreedyDecoderImpl(const std::shared_ptr<ngraph::Node>& op) : mergeRepeated_(true) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }

-        if (layer->CheckParamPresence("ctc_merge_repeated")) {
-            mergeRepeated_ = layer->GetParamAsBool("ctc_merge_repeated");
-        } else if (layer->CheckParamPresence("merge_repeated")) {
-            mergeRepeated_ = layer->GetParamAsBool("merge_repeated", true);
+            std::string errPrefix = "CTCGreedyDecoder layer with name '" + op->get_friendly_name() + "' ";
+            if (op->get_input_size() != 2)
+                IE_THROW() << errPrefix << "has invalid number of input edges: " << op->get_input_size();
+            if (op->get_output_size() != 1)
+                IE_THROW() << errPrefix << "has invalid number of outputs edges: " << op->get_output_size();
+
+            if (op->get_input_shape(DATA_INDEX)[0] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[0] &&
+                    op->get_input_shape(DATA_INDEX)[1] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[1])
+                IE_THROW() << errPrefix << "has invalid input shapes.";
+
+            Precision inDataPrecision = details::convertPrecision(op->get_input_element_type(DATA_INDEX));
+            if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16)
+                IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inDataPrecision;
+
+            Precision seqLenPrecision = details::convertPrecision(op->get_input_element_type(SEQUENCE_LENGTH_INDEX));
+            if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16)
+                IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;
+
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
+            mergeRepeated_ = greedyDecOp->get_ctc_merge_repeated();
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
        }
    }

--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp
@ -4,51 +4,81 @@

 #include "base.hpp"
 #include "ie_parallel.hpp"
+#include <ngraph/op/ctc_greedy_decoder_seq_len.hpp>
+#include <nodes/common/tensor_desc_creator.h>

-#include <vector>
 #include <string>
+#include <vector>

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CTCGreedyDecoderSeqLenImpl: public ExtLayerBase {
 public:
-    explicit CTCGreedyDecoderSeqLenImpl(const CNNLayer* layer) : mergeRepeated_(true) {
-        errPrefix = "CTCGreedyDecoderSeqLen layer with name '" + layer->name + "' ";
-        if (layer->insData.size() < 2 || layer->insData.size() > 3)
-            IE_THROW() << errPrefix << "has invalid number of input edges: " << layer->insData.size();
-        if (layer->outData.size() != 2)
-            IE_THROW() << errPrefix << "has invalid number of outputs edges: " << layer->outData.size();
-
-        auto inData = layer->insData[DATA_INDEX].lock();
-        auto sequenceLenData = layer->insData[SEQUENCE_LENGTH_INDEX].lock();
-        if (!inData || !sequenceLenData)
-            IE_THROW() << errPrefix << "has nullable inputs.";
-        if (inData->getTensorDesc().getDims()[0] != sequenceLenData->getTensorDesc().getDims()[0])
-            IE_THROW() << errPrefix << "has invalid input shapes.";
-        if (inData->getTensorDesc().getPrecision() != Precision::FP32 &&
-                inData->getTensorDesc().getPrecision() != Precision::BF16)
-            IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inData->getTensorDesc().getPrecision();
-        if (sequenceLenData->getTensorDesc().getPrecision() != Precision::I32 &&
-                sequenceLenData->getTensorDesc().getPrecision() != Precision::I64)
-            IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << sequenceLenData->getTensorDesc().getPrecision();
-
-        std::vector<DataConfigurator> inputConfigs{{ConfLayout::PLN, Precision::FP32}, {ConfLayout::PLN, Precision::I32}};
-
-        if (layer->insData.size() > BLANK_INDEX) {
-            auto blankIndexData = layer->insData[BLANK_INDEX].lock();
-            if (!blankIndexData)
-                IE_THROW() << errPrefix << "has nullable inputs.";
-            if (blankIndexData->getTensorDesc().getPrecision() != Precision::I32 &&
-                    blankIndexData->getTensorDesc().getPrecision() != Precision::I64)
-                IE_THROW() << errPrefix << "has unsupported 'blank_index' input precision: " << blankIndexData->getTensorDesc().getPrecision();
-            inputConfigs.push_back({ConfLayout::PLN, Precision::I32});
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
+            if (!greedyDecOp) {
+                errorMessage = "Node is not an instance of the CTCGreedyDecoderSeqLen operation from operation set v6.";
+                return false;
+            }
+        } catch (...) {
+            return false;
        }
-        std::vector<DataConfigurator> outputConfigs{{ConfLayout::PLN, Precision::I32}, {ConfLayout::PLN, Precision::I32}};
-        addConfig(layer, inputConfigs, outputConfigs);

-        mergeRepeated_ = layer->GetParamAsBool("merge_repeated", true);
+        return true;
+    }
+
+    explicit CTCGreedyDecoderSeqLenImpl(const std::shared_ptr<ngraph::Node>& op) : mergeRepeated_(true) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            std::string errPrefix = "CTCGreedyDecoderSeqLen layer with name '" + op->get_friendly_name() + "' ";
+            if (op->get_input_size() < 2 || op->get_input_size() > 3)
+                IE_THROW() << errPrefix << "has invalid number of input edges: " << op->get_input_size();
+            if (op->get_output_size() != 2)
+                IE_THROW() << errPrefix << "has invalid number of outputs edges: " << op->get_output_size();
+
+            if (op->get_input_shape(DATA_INDEX)[0] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[0])
+                IE_THROW() << errPrefix << "has invalid input shapes.";
+
+            Precision inDataPrecision = details::convertPrecision(op->get_input_element_type(DATA_INDEX));
+            if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16)
+                IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inDataPrecision;
+
+            Precision seqLenPrecision = details::convertPrecision(op->get_input_element_type(SEQUENCE_LENGTH_INDEX));
+            if (seqLenPrecision != Precision::I32 && seqLenPrecision != Precision::I64)
+                IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;
+
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
+            mergeRepeated_ = greedyDecOp->get_merge_repeated();
+
+            if (op->get_input_size() == BLANK_INDEX) {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::I32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}});
+            } else {
+                Precision blIdxPrecision = details::convertPrecision(op->get_input_element_type(BLANK_INDEX));
+                if (blIdxPrecision != Precision::I32 && blIdxPrecision != Precision::I64)
+                    IE_THROW() << errPrefix << "has unsupported 'blank_index' input precision: " << blIdxPrecision;
+
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::I32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}});
+            }
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
+        }
    }

    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_loss.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_loss.cpp
@ -4,6 +4,8 @@

 #include "base.hpp"
 #include "ie_parallel.hpp"
+#include <ngraph/op/ctc_loss.hpp>
+#include <nodes/common/tensor_desc_creator.h>

 #include <cmath>

@ -12,46 +14,52 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CTCLossImpl : public ExtLayerBase {
 public:
-    explicit CTCLossImpl(const CNNLayer* layer) {
-        _logPrefix = std::string("CTCLoss layer with name '") + layer->name + "'";
-
-        if (layer->insData.size() != 4 && layer->insData.size() != 5)
-            IE_THROW() << _logPrefix << " has invalid inputs number.";
-
-        _ctcMergeRepeated = layer->GetParamAsBool("ctc_merge_repeated", true);
-        _preprocessCollapseRepeated = layer->GetParamAsBool("preprocess_collapse_repeated", false);
-        _unique = layer->GetParamAsBool("unique", false);
-
-        auto logitsData = layer->insData[0].lock();
-        if (logitsData == nullptr)
-            IE_THROW() << _logPrefix << " has nullable logits data";
-
-        LayerConfig config;
-        config.inConfs.resize(layer->insData.size());
-        config.inConfs[0].desc = TensorDesc(Precision::FP32,
-            logitsData->getTensorDesc().getDims(),
-            TensorDesc::getLayoutByDims(logitsData->getTensorDesc().getDims()));
-        auto intPrecision = Precision::I32;
-        for (int i = 1; i < layer->insData.size(); i++) {
-            auto data = layer->insData[i].lock();
-            if (data == nullptr)
-                IE_THROW() << _logPrefix << " has nullable input data at " << i;
-            config.inConfs[i].desc = TensorDesc(intPrecision,
-                data->getTensorDesc().getDims(),
-                TensorDesc::getLayoutByDims(data->getTensorDesc().getDims()));
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            auto ctcLossOp = ngraph::as_type_ptr<const ngraph::op::v4::CTCLoss>(op);
+            if (!ctcLossOp) {
+                errorMessage = "Node is not an instance of the CTCLoss operation from operation set v4.";
+                return false;
+            }
+        } catch (...) {
+            return false;
        }

-        DataConfig outConfig;
-        auto& outDims = layer->outData[0]->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(Precision::FP32,
-            outDims,
-            TensorDesc::getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-        config.dynBatchSupport = false;
+        return true;
+    }

-        confs.push_back(config);
+    explicit CTCLossImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            _logPrefix = std::string("CTCLoss layer with name '") + op->get_friendly_name() + "'";
+
+            if (op->get_input_size() != 4 && op->get_input_size() != 5)
+                IE_THROW() << _logPrefix << " has invalid inputs number.";
+
+            auto ctcLossOp = ngraph::as_type_ptr<const ngraph::op::v4::CTCLoss>(op);
+            _ctcMergeRepeated = ctcLossOp->get_ctc_merge_repeated();
+            _preprocessCollapseRepeated = ctcLossOp->get_preprocess_collapse_repeated();
+            _unique = ctcLossOp->get_unique();
+
+            std::vector<DataConfigurator> inDataConfigurators;
+            inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::FP32});
+            for (int i = 1; i < op->get_input_size(); i++) {
+                inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+            }
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
+        }
    }

    StatusCode execute(std::vector<Blob::Ptr>& inputs,
--- a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
@ -9,11 +9,16 @@
 #include <vector>
 #include "ie_parallel.hpp"
 #include "ie_precision.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ie_ngraph_utils.hpp>

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CumSumImpl: public ExtLayerBase {
    enum { CUM_SUM_DATA, AXIS, numOfInputs };
    bool exclusive;
@ -22,71 +27,67 @@ class CumSumImpl: public ExtLayerBase {
    size_t axis = 0;
    std::vector<size_t> shape;

-public:
-    explicit CumSumImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            layerName = layer->name;
-            if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
+            const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
+            if (!cumsum) {
+                errorMessage = "Only opset3 CumSum operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
+
+public:
+    explicit CumSumImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            layerName = op->get_friendly_name();
+            if ((op->get_input_size() != numOfInputs && op->get_input_size() != (numOfInputs - 1)) || op->get_output_size() != 1)
                IE_THROW() << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";

-            const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
-            const auto &dataShape = dataTensor.getDims();
+            const auto &dataShape = op->get_input_shape(CUM_SUM_DATA);
            if (dataShape.size() < 1) {
                IE_THROW() << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
            }
            numOfDims = dataShape.size();

-            exclusive = layer->GetParamAsBool("exclusive", false);
-            reverse = layer->GetParamAsBool("reverse", false);
+            const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
+            exclusive = cumsum->is_exclusive();
+            reverse = cumsum->is_reverse();

-            const auto& dataPrecision = dataTensor.getPrecision();
+            auto dataPrecision = details::convertPrecision(cumsum->get_input_element_type(CUM_SUM_DATA));
            if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
                dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
                IE_THROW() << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();

-            if (layer->insData.size() == numOfInputs) {
-                const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
-                const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+            if (cumsum->get_input_size() == numOfInputs) {
+                const auto& axisTensorPrec = details::convertPrecision(cumsum->get_input_element_type(AXIS));
                if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
                    IE_THROW() << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();

-                const auto axisTensorRank = axisTensor.getDims().size();
-                if (axisTensorRank != 0)
-                    IE_THROW() << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
+                if (!ngraph::is_scalar(cumsum->get_input_shape(AXIS)))
+                    IE_THROW() << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with non scalar rank";
            }

-            if (dataShape != layer->outData[0]->getTensorDesc().getDims())
+            if (dataShape != cumsum->get_output_shape(0))
                IE_THROW() << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";

            shape = dataShape;

-            LayerConfig config;
-            for (size_t i = 0; i < layer->insData.size(); i++) {
-                DataConfig inConfig;
-                inConfig.inPlace = -1;
-                inConfig.constant = false;
-
-                Precision inPrecision = i == 1 ? Precision(Precision::I32) : layer->insData[i].lock()->getTensorDesc().getPrecision();
-                if (inPrecision == Precision::BF16)
-                    inPrecision = Precision::FP32;
-                const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
-                inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
-
-                config.inConfs.push_back(inConfig);
-            }
-            DataConfig outConfig;
-            outConfig.inPlace = -1;
-            outConfig.constant = false;
-            Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
-            if (outPrecision == Precision::BF16)
-                outPrecision = Precision::FP32;
-            const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
-            outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
-
-            config.outConfs.push_back(outConfig);
-
-            config.dynBatchSupport = false;
-            confs.push_back(config);
+            std::vector<DataConfigurator> inDataConfigurators;
+            if (dataPrecision == Precision::BF16)
+                dataPrecision = Precision::FP32;
+            inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, dataPrecision});
+            if (op->get_input_size() > 1)
+                inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+            addConfig(op, inDataConfigurators, {{TensorDescCreatorTypes::ncsp, dataPrecision}});
        } catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
        }
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
@ -10,12 +10,17 @@
 #include <string>
 #include <utility>
 #include <algorithm>
+#include "caseless.hpp"
 #include "ie_parallel.hpp"
+#include "common/tensor_desc_creator.h"
+#include <ngraph/op/detection_output.hpp>

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 template <typename T>
 static bool SortScorePairDescend(const std::pair<float, T>& pair1,
                                 const std::pair<float, T>& pair2) {
@ -24,98 +29,95 @@ static bool SortScorePairDescend(const std::pair<float, T>& pair1,

 class DetectionOutputImpl: public ExtLayerBase {
 public:
-    explicit DetectionOutputImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            if (layer->insData.size() != 3 && layer->insData.size() != 5)
-                IE_THROW() << "Incorrect number of input edges for layer " << layer->name;
-            if (layer->outData.empty())
-                IE_THROW() << "Incorrect number of output edges for layer " << layer->name;
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v0::DetectionOutput>(op);
+            if (!doOp) {
+                errorMessage = "Node is not an instance of the DetectionOutput from the operations set v0.";
+                return false;
+            }
+            if (!details::CaselessEq<std::string>()(doOp->get_attrs().code_type, "caffe.PriorBoxParameter.CENTER_SIZE") &&
+                    !details::CaselessEq<std::string>()(doOp->get_attrs().code_type, "caffe.PriorBoxParameter.CORNER")) {
+                errorMessage = "Unsupported code_type attribute.";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }

-            _num_classes = layer->GetParamAsInt("num_classes");
-            _background_label_id = layer->GetParamAsInt("background_label_id", 0);
-            _top_k = layer->GetParamAsInt("top_k", -1);
-            _variance_encoded_in_target = layer->GetParamAsBool("variance_encoded_in_target", false);
-            _keep_top_k = layer->GetParamAsInt("keep_top_k", -1);
-            _nms_threshold = layer->GetParamAsFloat("nms_threshold");
-            _confidence_threshold = layer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
-            _share_location = layer->GetParamAsBool("share_location", true);
-            _clip_before_nms = layer->GetParamAsBool("clip_before_nms", false) ||
-                               layer->GetParamAsBool("clip", false);  // for backward compatibility
-            _clip_after_nms = layer->GetParamAsBool("clip_after_nms", false);
-            _decrease_label_id = layer->GetParamAsBool("decrease_label_id", false);
-            _normalized = layer->GetParamAsBool("normalized", true);
-            _image_height = layer->GetParamAsInt("input_height", 1);
-            _image_width = layer->GetParamAsInt("input_width", 1);
+    explicit DetectionOutputImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+            if (op->get_input_size() != 3 && op->get_input_size() != 5)
+                IE_THROW() <<  "Invalid number of input edges.";
+
+            if (op->get_output_size() != 1)
+                IE_THROW() << "Invalid number of output edges.";
+
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v0::DetectionOutput>(op);
+            auto attributes = doOp->get_attrs();
+
+            _num_classes = attributes.num_classes;
+            _background_label_id = attributes.background_label_id;
+            _top_k = attributes.top_k;
+            _variance_encoded_in_target = attributes.variance_encoded_in_target;
+            _keep_top_k = attributes.keep_top_k[0];
+            _nms_threshold = attributes.nms_threshold;
+            _confidence_threshold = attributes.confidence_threshold;
+            _share_location = attributes.share_location;
+            _clip_before_nms = attributes.clip_before_nms;
+            _clip_after_nms = attributes.clip_after_nms;
+            _decrease_label_id = attributes.decrease_label_id;
+            _normalized = attributes.normalized;
+            _image_height = attributes.input_height;
+            _image_width = attributes.input_width;
            _prior_size = _normalized ? 4 : 5;
            _offset = _normalized ? 0 : 1;
            _num_loc_classes = _share_location ? 1 : _num_classes;

-            with_add_box_pred = layer->insData.size() == 5;
-            _objectness_score = layer->GetParamAsFloat("objectness_score", 0.0f);
+            with_add_box_pred = op->get_input_size() == 5;
+            _objectness_score = attributes.objectness_score;

-            std::string code_type_str = layer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
-            _code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE
-                                                                                 : CodeType::CORNER);
+            _code_type = (details::CaselessEq<std::string>()(attributes.code_type, "caffe.PriorBoxParameter.CENTER_SIZE") ?
+                CodeType::CENTER_SIZE : CodeType::CORNER);

-            _num_priors = static_cast<int>(layer->insData[idx_priors].lock()->getDims().back() / _prior_size);
-            _priors_batches = layer->insData[idx_priors].lock()->getDims().front() != 1;
+            _num_priors = static_cast<int>(op->get_input_shape(idx_priors).back() / _prior_size);
+            _priors_batches = op->get_input_shape(idx_priors).front() != 1;

-            if (_num_priors * _num_loc_classes * 4 != static_cast<int>(layer->insData[idx_location].lock()->getDims()[1]))
+            if (_num_priors * _num_loc_classes * 4 != static_cast<int>(op->get_input_shape(idx_location)[1]))
                IE_THROW() << "Number of priors must match number of location predictions ("
                                   << _num_priors * _num_loc_classes * 4 << " vs "
-                                   << layer->insData[idx_location].lock()->getDims()[1] << ")";
+                                   << op->get_input_shape(idx_location)[1] << ")";

-            if (_num_priors * _num_classes != static_cast<int>(layer->insData[idx_confidence].lock()->getTensorDesc().getDims().back()))
+            if (_num_priors * _num_classes != static_cast<int>(op->get_input_shape(idx_confidence).back()))
                IE_THROW() << "Number of priors must match number of confidence predictions.";

            if (_decrease_label_id && _background_label_id != 0)
                IE_THROW() << "Cannot use decrease_label_id and background_label_id parameter simultaneously.";

-            _num = static_cast<int>(layer->insData[idx_confidence].lock()->getTensorDesc().getDims()[0]);
+            _num = static_cast<int>(op->get_input_shape(idx_confidence)[0]);

-            InferenceEngine::SizeVector bboxes_size{static_cast<size_t>(_num),
-                                                    static_cast<size_t>(_num_classes),
-                                                    static_cast<size_t>(_num_priors),
-                                                    4};
-            _decoded_bboxes = InferenceEngine::make_shared_blob<float>({Precision::FP32, bboxes_size, NCHW});
-            _decoded_bboxes->allocate();
+            _decoded_bboxes.resize(_num * _num_classes * _num_priors * 4);
+            _buffer.resize(_num * _num_classes * _num_priors);
+            _indices.resize(_num * _num_classes * _num_priors);
+            _detections_count.resize(_num * _num_classes);
+            _bbox_sizes.resize(_num * _num_classes * _num_priors);
+            _num_priors_actual.resize(_num);

-            InferenceEngine::SizeVector buf_size{static_cast<size_t>(_num),
-                                                 static_cast<size_t>(_num_classes),
-                                                 static_cast<size_t>(_num_priors)};
-            _buffer = InferenceEngine::make_shared_blob<int>({Precision::I32, buf_size, {buf_size, {0, 1, 2}}});
-            _buffer->allocate();
+            const auto &confSize = op->get_input_shape(idx_confidence);
+            _reordered_conf.resize(std::accumulate(confSize.begin(), confSize.end(), 1, std::multiplies<size_t>()));

-            InferenceEngine::SizeVector indices_size{static_cast<size_t>(_num),
-                                                     static_cast<size_t>(_num_classes),
-                                                     static_cast<size_t>(_num_priors)};
-            _indices = InferenceEngine::make_shared_blob<int>(
-                    {Precision::I32, indices_size, {indices_size, {0, 1, 2}}});
-            _indices->allocate();
-
-            InferenceEngine::SizeVector detections_size{static_cast<size_t>((size_t)(_num) * _num_classes)};
-            _detections_count = InferenceEngine::make_shared_blob<int>({Precision::I32, detections_size, C});
-            _detections_count->allocate();
-
-            const InferenceEngine::SizeVector &conf_size = layer->insData[idx_confidence].lock()->getTensorDesc().getDims();
-            _reordered_conf = InferenceEngine::make_shared_blob<float>({Precision::FP32, conf_size, ANY});
-            _reordered_conf->allocate();
-
-            InferenceEngine::SizeVector decoded_bboxes_size{static_cast<size_t>(_num),
-                                                            static_cast<size_t>(_num_priors),
-                                                            static_cast<size_t>(_num_classes)};
-            _bbox_sizes = InferenceEngine::make_shared_blob<float>(
-                    {Precision::FP32, decoded_bboxes_size, {decoded_bboxes_size, {0, 1, 2}}});
-            _bbox_sizes->allocate();
-
-            InferenceEngine::SizeVector num_priors_actual_size{static_cast<size_t>(_num)};
-            _num_priors_actual = InferenceEngine::make_shared_blob<int>({Precision::I32, num_priors_actual_size, C});
-            _num_priors_actual->allocate();
-
-            std::vector<DataConfigurator> in_data_conf(layer->insData.size(), DataConfigurator(ConfLayout::PLN, Precision::FP32));
-            addConfig(layer, in_data_conf, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+            std::vector<DataConfigurator> inDataConfigurators(op->get_input_size(), {TensorDescCreatorTypes::ncsp, Precision::FP32});
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
        } catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
+            throw;
        }
    }

@ -131,13 +133,13 @@ public:

        const int N = inputs[idx_confidence]->getTensorDesc().getDims()[0];

-        float *decoded_bboxes_data = _decoded_bboxes->buffer().as<float *>();
-        float *reordered_conf_data = _reordered_conf->buffer().as<float *>();
-        float *bbox_sizes_data     = _bbox_sizes->buffer().as<float *>();
-        int *detections_data       = _detections_count->buffer().as<int *>();
-        int *buffer_data           = _buffer->buffer().as<int *>();
-        int *indices_data          = _indices->buffer().as<int *>();
-        int *num_priors_actual     = _num_priors_actual->buffer().as<int *>();
+        float *decoded_bboxes_data = _decoded_bboxes.data();
+        float *reordered_conf_data = _reordered_conf.data();
+        float *bbox_sizes_data     = _bbox_sizes.data();
+        int *detections_data       = _detections_count.data();
+        int *buffer_data           = _buffer.data();
+        int *indices_data          = _indices.data();
+        int *num_priors_actual     = _num_priors_actual.data();

        for (int n = 0; n < N; ++n) {
            const float *ppriors = prior_data;
@ -396,13 +398,13 @@ private:
    void nms_mx(const float *conf_data, const float *bboxes, const float *sizes,
                int *buffer, int *indices, int *detections, int num_priors_actual);

-    InferenceEngine::Blob::Ptr _decoded_bboxes;
-    InferenceEngine::Blob::Ptr _buffer;
-    InferenceEngine::Blob::Ptr _indices;
-    InferenceEngine::Blob::Ptr _detections_count;
-    InferenceEngine::Blob::Ptr _reordered_conf;
-    InferenceEngine::Blob::Ptr _bbox_sizes;
-    InferenceEngine::Blob::Ptr _num_priors_actual;
+    std::vector<float> _decoded_bboxes;
+    std::vector<int> _buffer;
+    std::vector<int> _indices;
+    std::vector<int> _detections_count;
+    std::vector<float> _reordered_conf;
+    std::vector<float> _bbox_sizes;
+    std::vector<int> _num_priors_actual;
 };

 struct ConfidenceComparator {
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
@ -12,6 +12,8 @@
 #include <utility>
 #include <algorithm>
 #include "ie_parallel.hpp"
+#include "common/tensor_desc_creator.h"
+#include <ngraph/op/experimental_detectron_detection_output.hpp>


 namespace {
@ -44,6 +46,8 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 static
 void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores,
                  float* refined_boxes, float* refined_boxes_areas, float* refined_scores,
@ -235,46 +239,46 @@ private:
    const int OUTPUT_SCORES {2};

 public:
-    explicit ExperimentalDetectronDetectionOutputImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            score_threshold_ = layer->GetParamAsFloat("score_threshold");
-            nms_threshold_ = layer->GetParamAsFloat("nms_threshold");
-            max_delta_log_wh_ = layer->GetParamAsFloat("max_delta_log_wh");
-            classes_num_ = layer->GetParamAsInt("num_classes");
-            max_detections_per_class_ = layer->GetParamAsInt("post_nms_count");
-            max_detections_per_image_ = layer->GetParamAsInt("max_detections_per_image");
-            class_agnostic_box_regression_ = layer->GetParamAsBool("class_agnostic_box_regression", false);
-            deltas_weights_ = layer->GetParamAsFloats("deltas_weights");
-
-
-            LayerConfig config;
-            for (auto in : layer->insData) {
-                auto in_ = in.lock();
-                auto dims = in_->getTensorDesc().getDims();
-                DataConfig data;
-                data.desc = TensorDesc(Precision::FP32, dims, in_->getTensorDesc().getLayoutByDims(dims));
-                config.inConfs.push_back(data);
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v6::ExperimentalDetectronDetectionOutput>(op);
+            if (!doOp) {
+                errorMessage = "Node is not an instance of the ExperimentalDetectronDetectionOutput from the operations set v6.";
+                return false;
            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }

-            auto dimsB = layer->outData[OUTPUT_BOXES]->getTensorDesc().getDims();
-            DataConfig dataB;
-            dataB.desc = TensorDesc(Precision::FP32, dimsB,
-                                    layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsB));
-            config.outConfs.push_back(dataB);
-            auto dimsC = layer->outData[OUTPUT_CLASSES]->getTensorDesc().getDims();
-            DataConfig dataC;
-            dataC.desc = TensorDesc(Precision::I32, dimsC,
-                                    layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsC));
-            config.outConfs.push_back(dataC);
-            auto dimsS = layer->outData[OUTPUT_SCORES]->getTensorDesc().getDims();
-            DataConfig dataS;
-            dataS.desc = TensorDesc(Precision::FP32, dimsS,
-                                    layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsS));
-            config.outConfs.push_back(dataS);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
+    explicit ExperimentalDetectronDetectionOutputImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v6::ExperimentalDetectronDetectionOutput>(op);
+            auto attributes = doOp->get_attrs();
+
+            score_threshold_ = attributes.score_threshold;
+            nms_threshold_ = attributes.nms_threshold;
+            max_delta_log_wh_ = attributes.max_delta_log_wh;
+            classes_num_ = attributes.num_classes;
+            max_detections_per_class_ = attributes.post_nms_count;
+            max_detections_per_image_ = attributes.max_detections_per_image;
+            class_agnostic_box_regression_ = attributes.class_agnostic_box_regression;
+            deltas_weights_ = attributes.deltas_weights;
+
+            std::vector<DataConfigurator> inDataConfigurators(op->get_input_size(), {TensorDescCreatorTypes::ncsp, Precision::FP32});
+
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::I32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}});
        } catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
+            throw;
        }
    }

--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_offset_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_offset_sum.cpp
@ -1,247 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "ie_parallel.hpp"
-
-#include <vector>
-
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class EmbeddingBagOffsetsSumImpl: public MKLDNNEmbeddingBagSum {
-public:
-    explicit EmbeddingBagOffsetsSumImpl(const CNNLayer* layer) :
-                MKLDNNEmbeddingBagSum(layer, 3lu, 1lu, 4lu, 3lu) {
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (indicesData == nullptr)
-            IE_THROW() << "'" << layer->name << "' layer has nullable indices data.";
-        if (indicesData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << "'" << layer->name << "' layer has indices data with invalid shape.";
-
-        auto offsetsData = layer->insData[OFFSETS_IDX].lock();
-        if (offsetsData == nullptr)
-            IE_THROW() << "'" << layer->name << "' layer has invalid offsets data.";
-        if (offsetsData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << "'" << layer->name << "' layer's offsets data has invalid shape.";
-
-        _indicesLen = indicesData->getTensorDesc().getDims()[0];
-        _offsetsLen = offsetsData->getTensorDesc().getDims()[0];
-    }
-
-    StatusCode execute(
-                std::vector<Blob::Ptr>& inputs,
-                std::vector<Blob::Ptr>& outputs,
-                ResponseDesc* resp) noexcept override {
-        switch (inputs[0]->getTensorDesc().getPrecision()) {
-            case Precision::FP32: {
-                return processData<PrecisionTrait<Precision::FP32>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::I8: {
-                return processData<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::U8: {
-                return processData<PrecisionTrait<Precision::U8>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::I32: {
-                return processData<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-            }
-            default: {
-                if (resp) {
-                    std::string errorMsg = "EmbeddingBagSum layer does not support embedding table precision '"
-                            + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return GENERAL_ERROR;
-            }
-        }
-    }
-
-protected:
-    template<typename T>
-    StatusCode processData(
-                std::vector<Blob::Ptr>& inputs,
-                std::vector<Blob::Ptr>& outputs,
-                ResponseDesc* resp) noexcept {
-        switch (inputs[1]->getTensorDesc().getPrecision()) {
-            case Precision::I32: {
-                return processData<T, PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::I64: {
-                return processData<T, PrecisionTrait<Precision::I64>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::U64: {
-                return processData<T, PrecisionTrait<Precision::U64>::value_type>(inputs, outputs, resp);
-            }
-            default: {
-                if (resp) {
-                    std::string errorMsg = "EmbeddingBagSum layer does not support indices precision '"
-                            + std::string(inputs[1]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return GENERAL_ERROR;
-            }
-        }
-    }
-
-    template<typename T, typename I>
-    StatusCode processData(
-                std::vector<Blob::Ptr>& inputs,
-                std::vector<Blob::Ptr>& outputs,
-                ResponseDesc* resp) noexcept {
-        std::string errorMsg;
-        std::string msgPrefix = std::string("Layer EmbeddingBagOffsetsSum with name '") + _layerName + "' ";
-
-        const T* srcData = inputs[0]->cbuffer().as<const T*>() +
-            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        T* dstData = outputs[0]->buffer().as<T*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const I* indicesData = inputs[INDICES_IDX]->cbuffer().as<const I*>();
-
-        const I* offsetsData = inputs[OFFSETS_IDX]->cbuffer().as<const I*>();
-        int64_t defaultIndex = -1;
-        if (inputs.size() > DEFAULT_INDEX_IDX) {
-            defaultIndex = (int64_t)inputs[DEFAULT_INDEX_IDX]->cbuffer().as<const I*>()[0];
-            if (defaultIndex < 0 || defaultIndex >= _indicesLen) {
-                std::string msg =  "Invalid default index: " + std::to_string(defaultIndex);
-                msg.copy(resp->msg, sizeof(resp->msg) - 1);
-                return GENERAL_ERROR;
-            }
-        }
-        const T* weightsData = nullptr;
-        if (_withWeights)
-            weightsData = inputs[PER_SAMPLE_WEIGHTS_IDX]->cbuffer().as<const T*>();
-
-        const auto& inDataDims = inputs[0]->getTensorDesc().getDims();
-
-        const size_t OUTPUT_BAGS_NUM = outputs[0]->getTensorDesc().getDims()[0];
-
-        std::function<void(size_t, const I*&, size_t&, size_t&, bool&)> get_idx =
-                [&](size_t embIndex, const I*& indicesRef, size_t& outSize, size_t& weightsIdx, bool& withWeights) {
-            if (embIndex >= _offsetsLen) {
-                errorMsg = msgPrefix + "has invalid embedding bag index.";
-                return;
-            }
-            if (offsetsData[embIndex] >= _indicesLen) {
-                errorMsg = msgPrefix + ". Offset value exceeds indices size in the model.\noffset: "
-                    + std::to_string(offsetsData[embIndex]) + "; indices size: " + std::to_string(_indicesLen);
-                return;
-            }
-
-            indicesRef = nullptr;
-            outSize = 0lu;
-            withWeights = _withWeights;
-
-            if (embIndex == _offsetsLen - 1lu)
-                outSize = _indicesLen - offsetsData[embIndex];
-            else
-                outSize = offsetsData[embIndex + 1lu] - offsetsData[embIndex];
-
-            if (outSize != 0lu) {
-                indicesRef = indicesData + offsetsData[embIndex];
-            } else {
-            // Empty or default bag
-                withWeights = false;
-                if (defaultIndex >= 0) {
-                    indicesRef = reinterpret_cast<I*>(&defaultIndex);
-                    outSize = 1lu;
-                }
-                return;
-            }
-
-            if (withWeights)
-                weightsIdx = offsetsData[embIndex];
-        };
-
-        auto threadBody = [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(OUTPUT_BAGS_NUM, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-
-            size_t indicesSize = 0lu;
-            const I* indices = nullptr;
-            size_t weightsIdx = 0lu;
-            bool withWeights = _withWeights;
-
-            for (size_t obi = start; obi < end; obi++) {
-                size_t dstIndex = obi * _embDepth;
-                get_idx(obi, indices, indicesSize, weightsIdx, withWeights);
-                if (indices != nullptr) {
-                    withWeights = withWeights & _withWeights;
-
-                    size_t inIdx = 0lu;
-                    if (indices[inIdx] >= inDataDims[0]) {
-                        errorMsg = msgPrefix + "has invalid embedding bag index: " + std::to_string(indices[inIdx]);
-                        return;
-                    }
-                    size_t srcIndex = indices[inIdx] * _embDepth;
-
-                    if (withWeights) {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] = srcData[srcIndex + i] * weightsData[weightsIdx];
-                        }
-                        weightsIdx++;
-                    } else {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] = srcData[srcIndex + i];
-                        }
-                    }
-
-                    for (inIdx = 1lu; inIdx < indicesSize; inIdx++) {
-                        if (indices[inIdx] >= inDataDims[0]) {
-                            errorMsg = msgPrefix + "has invalid embedding bag index: " + std::to_string(indices[inIdx]);
-                            return;
-                        }
-                        size_t srcIndex = indices[inIdx] * _embDepth;
-
-                        if (withWeights) {
-                            for (size_t i = 0lu; i < _embDepth; i++) {
-                                dstData[dstIndex + i] += srcData[srcIndex + i] * weightsData[weightsIdx];
-                            }
-                            weightsIdx++;
-                        } else {
-                            for (size_t i = 0lu; i < _embDepth; i++) {
-                                dstData[dstIndex + i] += srcData[srcIndex + i];
-                            }
-                        }
-                    }
-                } else {
-                    for (size_t i = 0lu; i < _embDepth; i++) {
-                        dstData[dstIndex + i] = 0;
-                    }
-                }
-            }
-        };
-
-        parallel_nt(0, threadBody);
-
-        if (!errorMsg.empty()) {
-            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            return GENERAL_ERROR;
-        }
-
-        return OK;
-    }
-
-    void initFromInputs(std::vector<Blob::Ptr>& inputs) override {
-    }
-
-    void getIndices(size_t embIndex, const size_t*& indices, size_t& size, size_t& weightsIdx, bool& withWeights) override {
-    }
-
-    const size_t OFFSETS_IDX = 2lu;
-
-    size_t _indicesLen;
-    size_t _offsetsLen;
-};
-
-REG_FACTORY_FOR(EmbeddingBagOffsetsSumImpl, EmbeddingBagOffsetsSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
@ -1,67 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class EmbeddingBagPackedSumImpl: public MKLDNNEmbeddingBagSum {
-public:
-    explicit EmbeddingBagPackedSumImpl(const CNNLayer* layer) :
-            MKLDNNEmbeddingBagSum(layer, 2lu, 1lu, 2lu, 3lu) {
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (indicesData == nullptr)
-            IE_THROW() << "'" << layer->name << "' layer has nullable indices data.";
-        if (indicesData->getTensorDesc().getDims().size() != 2)
-            IE_THROW() << "'" << layer->name << "' layer has indices data with invalid shape.";
-
-        _indices = std::vector<std::vector<size_t>>(
-            indicesData->getTensorDesc().getDims()[0],
-            std::vector<size_t>(indicesData->getTensorDesc().getDims()[1], 0lu));
-    }
-
-    void initFromInputs(std::vector<Blob::Ptr>& inputs) override {
-        // Initialize indices
-        const size_t bagsNum = inputs[INDICES_IDX]->getTensorDesc().getDims()[0];
-        const size_t batch = inputs[INDICES_IDX]->getTensorDesc().getDims()[1];
-        if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-            const INT32* src = inputs[INDICES_IDX]->cbuffer().as<const INT32*>();
-            for (size_t i = 0lu; i < bagsNum; i++) {
-                size_t ibn = i * batch;
-                for (size_t j = 0lu; j < batch; j++) {
-                    _indices[i][j] = static_cast<size_t>(src[ibn + j]);
-                }
-            }
-        } else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-            const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
-            for (size_t i = 0lu; i < bagsNum; i++) {
-                cpu_memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
-            }
-        }
-    }
-
-    void getIndices(size_t embIndex, const size_t*& indices, size_t& size, size_t& weightsIdx, bool& withWeights) override {
-        if (embIndex >= _indices.size())
-            IE_THROW() << "Invalid embedding bag index.";
-
-        withWeights = true;
-
-        indices = _indices[embIndex].data();
-        size = _indices[0].size();
-
-        weightsIdx = embIndex * _indices[0].size();
-    }
-
-protected:
-    std::vector<std::vector<size_t>> _indices;
-};
-
-REG_FACTORY_FOR(EmbeddingBagPackedSumImpl, EmbeddingBagPackedSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.cpp
@ -1,209 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "ie_parallel.hpp"
-#include "list.hpp"
-
-#include <set>
-#include <string>
-#include <vector>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::Extensions::Cpu;
-
-
-const std::set<size_t> MKLDNNEmbeddingBagSum::_supportedIndicesTypeSize = {sizeof(INT32), sizeof(INT64)};
-
-MKLDNNEmbeddingBagSum::MKLDNNEmbeddingBagSum(
-            const CNNLayer* layer,
-            size_t requiredInputNum,
-            size_t indicesIdx,
-            size_t perSampleWeightsIdx,
-            size_t defaultIndexIdx,
-            const std::set<Precision>& supportedPrecisions) :
-                INDICES_IDX(indicesIdx),
-                PER_SAMPLE_WEIGHTS_IDX(perSampleWeightsIdx),
-                DEFAULT_INDEX_IDX(defaultIndexIdx) {
-    try {
-        std::string logPrefix = std::string("Layer EmbeddingBagSum with name '") + layer->name + "' ";
-        if (layer->insData.size() < requiredInputNum || layer->outData.size() != 1)
-            IE_THROW() << logPrefix << "has incorrect number of input or output edges!";
-        _layerName = layer->name;
-
-        auto inData = layer->insData[0].lock();
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (inData == nullptr || indicesData == nullptr)
-            IE_THROW() << logPrefix << "has nullable input data.";
-
-        auto dataPrecision = inData->getTensorDesc().getPrecision();
-        if (dataPrecision == Precision::BF16)
-            dataPrecision = Precision::FP32;
-        if (!supportedPrecisions.empty()) {
-            if (supportedPrecisions.find(dataPrecision) == supportedPrecisions.end())
-                IE_THROW() << logPrefix << "has unsupported precision: " << dataPrecision.name();
-        } else {
-            static const std::set<Precision> defaultSupportedPrecisions =
-                {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
-            if (defaultSupportedPrecisions.find(dataPrecision) == defaultSupportedPrecisions.end())
-                IE_THROW() << logPrefix << "has unsupported precision: " << dataPrecision.name();
-        }
-
-        if (layer->insData.size() > PER_SAMPLE_WEIGHTS_IDX)
-            _withWeights = true;
-        if (_withWeights) {
-            auto weightsData = layer->insData[PER_SAMPLE_WEIGHTS_IDX].lock();
-            if (weightsData == nullptr)
-                 IE_THROW() << logPrefix << "has nullable weights data";
-            if (weightsData->getTensorDesc().getDims() != indicesData->getTensorDesc().getDims())
-                 IE_THROW() << logPrefix << "must have equal shapes for indices and per_sample_weights inputs.";
-        }
-
-        LayerConfig config;
-        config.inConfs.resize(layer->insData.size());
-        for (int i = 0; i < layer->insData.size(); i++) {
-            auto data = layer->insData[i].lock();
-            if (data == nullptr)
-                IE_THROW() << logPrefix << "has nullable input data";
-            auto prc = data->getTensorDesc().getPrecision();
-            if (prc == Precision::BF16)
-                prc = Precision::FP32;
-            config.inConfs[i].desc = TensorDesc(prc,
-                data->getTensorDesc().getDims(),
-                TensorDesc::getLayoutByDims(data->getTensorDesc().getDims()));
-        }
-
-        DataConfig outConfig;
-        auto& outDims = layer->outData[0]->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(dataPrecision,
-            outDims,
-            TensorDesc::getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-        config.dynBatchSupport = false;
-
-        confs.push_back(config);
-
-        const auto& inDataDims = inData->getTensorDesc().getDims();
-        _embDepth = 1lu;
-        for (size_t i = 1lu; i < inDataDims.size(); i++) {
-            _embDepth *= inDataDims[i];
-        }
-    } catch (InferenceEngine::Exception &ex) {
-        errorMsg = ex.what();
-    }
-}
-
-StatusCode MKLDNNEmbeddingBagSum::execute(
-            std::vector<Blob::Ptr>& inputs,
-            std::vector<Blob::Ptr>& outputs,
-            ResponseDesc *resp) noexcept {
-    switch (inputs[0]->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            processData<PrecisionTrait<Precision::FP32>::value_type>(inputs, outputs);
-            break;
-        }
-        case Precision::I8: {
-            processData<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs);
-            break;
-        }
-        case Precision::U8: {
-            processData<PrecisionTrait<Precision::U8>::value_type>(inputs, outputs);
-            break;
-        }
-        case Precision::I32: {
-            processData<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs);
-            break;
-        }
-        default: {
-            if (resp) {
-                std::string errorMsg = "EmbeddingBagSum layer does not support precision '"
-                        + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-    }
-
-    return OK;
-}
-
-template<typename T>
-void MKLDNNEmbeddingBagSum::processData(
-            std::vector<Blob::Ptr>& inputs,
-            std::vector<Blob::Ptr>& outputs) noexcept {
-    const T* srcData = inputs[0]->cbuffer().as<const T*>() +
-        inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-    T* dstData = outputs[0]->buffer().as<T*>() +
-        outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-    const T* weightsData = nullptr;
-    if (_withWeights)
-        weightsData = inputs[PER_SAMPLE_WEIGHTS_IDX]->cbuffer().as<const T*>();
-    initFromInputs(inputs);
-
-    const auto& inDataDims = inputs[0]->getTensorDesc().getDims();
-
-    const size_t outputBagsNum = outputs[0]->getTensorDesc().getDims()[0];
-
-    auto threadBody = [&](const int ithr, const int nthr) {
-        size_t start(0lu), end(0lu);
-        splitter(outputBagsNum, nthr, ithr, start, end);
-        if (start >= end)
-            return;
-
-        size_t indicesSize = 0lu;
-        const size_t* indices = nullptr;
-        size_t weightsIdx = 0lu;
-        bool withWeights = _withWeights;
-
-        for (size_t obi = start; obi < end; obi++) {
-            size_t dstIndex = obi * _embDepth;
-            getIndices(obi, indices, indicesSize, weightsIdx, withWeights);
-
-            if (indices != nullptr) {
-                withWeights = withWeights & _withWeights;
-
-                size_t inIdx = 0lu;
-                if (indices[inIdx] >= inDataDims[0])
-                    IE_THROW() << "EmbeddingBagSum layer '" << _layerName
-                        << "' has invalid embedding bag index: " << indices[inIdx];
-                size_t srcIndex = indices[inIdx] * _embDepth;
-
-                if (withWeights) {
-                    for (size_t i = 0lu; i < _embDepth; i++) {
-                        dstData[dstIndex + i] = srcData[srcIndex + i] * weightsData[weightsIdx];
-                    }
-                    weightsIdx++;
-                } else {
-                    for (size_t i = 0lu; i < _embDepth; i++) {
-                        dstData[dstIndex + i] = srcData[srcIndex + i];
-                    }
-                }
-
-                for (inIdx = 1lu; inIdx < indicesSize; inIdx++) {
-                    if (indices[inIdx] >= inDataDims[0])
-                        IE_THROW() << "EmbeddingBagSum layer '" << _layerName
-                            << "' has invalid embedding bag index: " << indices[inIdx];
-                    size_t srcIndex = indices[inIdx] * _embDepth;
-
-                    if (withWeights) {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] += srcData[srcIndex + i] * weightsData[weightsIdx];
-                        }
-                        weightsIdx++;
-                    } else {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] += srcData[srcIndex + i];
-                        }
-                    }
-                }
-            } else {
-                for (size_t i = 0lu; i < _embDepth; i++) {
-                    dstData[dstIndex + i] = 0;
-                }
-            }
-        }
-    };
-
-    parallel_nt(0, threadBody);
-}
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.hpp
@ -1,63 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "base.hpp"
-
-#include <memory>
-#include <set>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class MKLDNNEmbeddingBagSum : public ExtLayerBase {
-public:
-    MKLDNNEmbeddingBagSum(
-        const CNNLayer* layer,
-        size_t requiredInputsNum,
-        size_t indicesIdx,
-        size_t perSampleWeightsIdx,
-        size_t defaultIndexIdx,
-        const std::set<Precision>& supportedPrecisions = {});
-
-    StatusCode execute(
-        std::vector<Blob::Ptr>& inputs,
-        std::vector<Blob::Ptr>& outputs,
-        ResponseDesc *resp) noexcept override;
-
-protected:
-    virtual void initFromInputs(std::vector<Blob::Ptr>& inputs) = 0;
-    virtual void getIndices(
-        size_t embIndex,
-        const size_t*& indicesRef,
-        size_t& size,
-        size_t& weightsIdx,
-        bool& withWeights) = 0;
-
-    template<typename T>
-    void processData(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) noexcept;
-
-    std::set<Precision> _supportedPrecisions;
-
-    const size_t INDICES_IDX;
-    const size_t PER_SAMPLE_WEIGHTS_IDX;
-    const size_t DEFAULT_INDEX_IDX;
-
-    bool _withWeights = false;
-    size_t _embDepth = 0;
-    std::string _layerName;
-
-    using INT32 = PrecisionTrait<Precision::I32>::value_type;
-    using INT64 = PrecisionTrait<Precision::I64>::value_type;
-    using UINT64 = PrecisionTrait<Precision::U64>::value_type;
-
-    static const std::set<size_t> _supportedIndicesTypeSize;
-};
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
@ -1,134 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class EmbeddingSegmentsSumImpl: public MKLDNNEmbeddingBagSum {
-public:
-    explicit EmbeddingSegmentsSumImpl(const CNNLayer* layer) :
-                MKLDNNEmbeddingBagSum(layer, 4lu, 1lu, 5lu, 4lu) {
-        std::string errPrefix = std::string("EmbeddingSegmentsSum layer with name '") + _layerName + "' ";
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (indicesData == nullptr)
-            IE_THROW() << errPrefix << "has nullable indices data.";
-        if (indicesData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << errPrefix << "has indices data with invalid shape: "
-                << indicesData->getTensorDesc().getDims().size();
-
-        auto segmentIdData = layer->insData[SEGMENT_ID_IDX].lock();
-        if (segmentIdData == nullptr)
-            IE_THROW() << errPrefix << "has invalid segmentID data.";
-        if (segmentIdData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << errPrefix << "has invalid segmentID data shape: "
-                << segmentIdData->getTensorDesc().getDims().size();
-
-        auto numSegmentData = layer->insData[NUM_SEGMENTS_IDX].lock();
-        if (numSegmentData == nullptr)
-            IE_THROW() << errPrefix << "has nullable numSegmentID data.";
-
-        if (_supportedIndicesTypeSize.find(indicesData->getTensorDesc().getPrecision().size())
-                    == _supportedIndicesTypeSize.end()
-                || _supportedIndicesTypeSize.find(segmentIdData->getTensorDesc().getPrecision().size())
-                    == _supportedIndicesTypeSize.end()
-                || _supportedIndicesTypeSize.find(numSegmentData->getTensorDesc().getPrecision().size())
-                    == _supportedIndicesTypeSize.end())
-            IE_THROW() << errPrefix << "has unsupported input data type.";
-
-        _indices = std::vector<size_t>(indicesData->getTensorDesc().getDims()[0], 0lu);
-        _segmentIds = std::vector<size_t>(segmentIdData->getTensorDesc().getDims()[0], 0lu);
-    }
-
-    void initFromInputs(std::vector<Blob::Ptr>& inputs) override {
-        // Initialize indices
-        if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-            const INT32* src = inputs[INDICES_IDX]->cbuffer().as<const INT32*>();
-            for (size_t i = 0lu; i < inputs[INDICES_IDX]->size(); i++)
-                _indices[i] = static_cast<size_t>(src[i]);
-        } else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-            const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
-            cpu_memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
-        }
-
-        // Initialize segments ids
-        if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-            const INT32* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const INT32*>();
-            for (size_t i = 0lu; i < inputs[SEGMENT_ID_IDX]->size(); i++)
-                _segmentIds[i] = static_cast<size_t>(src[i]);
-        } else if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-            const UINT64* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const UINT64*>();
-            cpu_memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
-        }
-
-        if (inputs.size() > NUM_SEGMENTS_IDX) {
-            if (inputs[NUM_SEGMENTS_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-                const INT32* src = inputs[NUM_SEGMENTS_IDX]->cbuffer().as<const INT32*>();
-                _numSegments = static_cast<size_t>(*src);
-            } else if (inputs[NUM_SEGMENTS_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-                const INT64* src = inputs[NUM_SEGMENTS_IDX]->cbuffer().as<const INT64*>();
-                _numSegments = *src;
-            }
-        }
-
-        // Initialize default index
-        _defaultIndices.clear();
-        if (inputs.size() > DEFAULT_INDEX_IDX) {
-            if (inputs[DEFAULT_INDEX_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-                const INT32* src = inputs[DEFAULT_INDEX_IDX]->cbuffer().as<const INT32*>();
-                _defaultIndices.push_back(static_cast<size_t>(*src));
-            } else if (inputs[DEFAULT_INDEX_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-                const INT64* src = inputs[DEFAULT_INDEX_IDX]->cbuffer().as<const INT64*>();
-                _defaultIndices.push_back(*src);
-            }
-        }
-    }
-
-    void getIndices(size_t embIndex, const size_t*& indices, size_t& size, size_t& weightsIdx, bool& withWeight) override {
-        if (embIndex >= _numSegments)
-            IE_THROW() << "Invalid embedding bag index.";
-
-        indices = nullptr;
-        size = 0lu;
-        withWeight = true;
-
-        for (size_t si = 0; si < _indices.size(); si++) {
-            if (_segmentIds[si] == embIndex) {
-                size++;
-                if (indices == nullptr) {
-                    indices = _indices.data() + si;
-                    weightsIdx = si;
-                }
-            }
-        }
-
-        // Empty bag
-        if (size == 0) {
-            size = 1lu;
-            withWeight = false;
-            if (_defaultIndices.size() == 1lu)
-                indices = _defaultIndices.data();
-            return;
-        }
-    }
-
-protected:
-    const size_t SEGMENT_ID_IDX = 2lu;
-    const size_t NUM_SEGMENTS_IDX = 3lu;
-
-    size_t _numSegments = 0lu;
-
-    std::vector<size_t> _indices;
-    std::vector<size_t> _segmentIds;
-    std::vector<size_t> _defaultIndices;
-};
-
-REG_FACTORY_FOR(EmbeddingSegmentsSumImpl, EmbeddingSegmentsSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.cpp
@ -10,6 +10,9 @@
 #include <cstring>
 #include <string>
 #include <cmath>
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;

 namespace InferenceEngine {
 namespace Extensions {
@ -267,41 +270,65 @@ private:
    }
 };

-ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
+bool ExtractImagePatchesImpl::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        std::string errorPrefix = std::string("Layer ") + layer->type + " with name '" + layer->name + "' ";
-        if (details::CaselessEq<std::string>()("ExtractImagePatchesLayer", layer->type))
-            IE_THROW() << errorPrefix << "is not an instance of ExtractImagePatchesLayer class";
+        const auto extImgPatcher = std::dynamic_pointer_cast<const ngraph::opset3::ExtractImagePatches>(op);
+        if (!extImgPatcher) {
+            errorMessage = "Only opset3 ExtractImagePatches operation is supported";
+            return false;
+        }
+        const auto padValue = extImgPatcher->get_auto_pad();
+        if (!one_of(padValue, ngraph::op::PadType::VALID, ngraph::op::PadType::SAME_LOWER, ngraph::op::PadType::SAME_UPPER)) {
+            errorMessage = "Does not support pad type: " + ngraph::as_string(padValue);
+            return false;
+        }
+        if (!everyone_is(2, extImgPatcher->get_sizes().size(), extImgPatcher->get_strides().size(), extImgPatcher->get_rates().size())) {
+            errorMessage = "Doesn't support 'sizes', 'strides', 'rates', attributes with rank != 2";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}

-        if (layer->insData.size() != 1 || layer->outData.size() != 1)
-            IE_THROW() << errorPrefix << "has incorrect number of input or output edges!"
-                << " Input: " << layer->insData.size() << "; Output: " << layer->outData.size();
+ExtractImagePatchesImpl::ExtractImagePatchesImpl(const std::shared_ptr<ngraph::Node>& op) {
+    try {
+        std::string errorMessage;
+        if (!isSupportedOperation(op, errorMessage)) {
+            IE_THROW(NotImplemented) << errorMessage;
+        }

-        auto inData = layer->insData[0].lock();
-        if (inData == nullptr)
-            IE_THROW() << errorPrefix << "has nullable input data";
+        errorPrefix = "ExtractImagePatches layer with name '" + op->get_friendly_name() + "' ";
+        const auto extImgPatcher = std::dynamic_pointer_cast<const ngraph::opset3::ExtractImagePatches>(op);

-        if (inData->getTensorDesc().getDims().size() != 4)
-            IE_THROW() << errorPrefix << "must have 4D input tensor. Actual: " << inData->getTensorDesc().getDims().size();
+        if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << "has incorrect number of input or output edges!"
+                           << " Input: " << op->get_input_size() << "; Output: " << op->get_output_size();

-        if (layer->outData[0]->getTensorDesc().getDims().size() != 4)
-            IE_THROW() << errorPrefix << "must have 4D output tensor. Actual: " << layer->outData[0]->getTensorDesc().getDims().size();
+        if (op->get_input_shape(0).size() != 4)
+                IE_THROW() << errorPrefix << "must have 4D input tensor. Actual: " << op->get_input_shape(0).size();

-        if (inData->getLayout() != NCHW)
-            IE_THROW() << errorPrefix << "has unsupported layout: " << inData->getLayout();
+        if (op->get_output_shape(0).size() != 4)
+            IE_THROW() << errorPrefix << "must have 4D output tensor. Actual: " << op->get_output_shape(0).size();

-        const auto precision = inData->getTensorDesc().getPrecision();
-        if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
-            IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name();
+        const auto precision = details::convertPrecision(op->get_input_element_type(0));
+            if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
+                IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name();
+
+        auto ksizes = extImgPatcher->get_sizes();
+        auto strides = extImgPatcher->get_strides();
+        auto rates = extImgPatcher->get_rates();
+        if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::VALID) {
+            _auto_pad = ExtImgPatcherPadType::VALID;
+        } else if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::SAME_LOWER) {
+            _auto_pad = ExtImgPatcherPadType::SAME_LOWER;
+        } else if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::SAME_UPPER) {
+            _auto_pad = ExtImgPatcherPadType::SAME_UPPER;
+        } else {
+            IE_THROW() << errorPrefix << "has unsupported pad type: " << extImgPatcher->get_auto_pad();
+        }

-        auto ksizes = layer->GetParamAsUInts("sizes");
-        auto strides = layer->GetParamAsUInts("strides");
-        auto rates = layer->GetParamAsUInts("rates");
-        std::string auto_pad = layer->GetParamAsString("auto_pad");
-        if (!CaselessEq<std::string>()(auto_pad, "valid")
-                && !CaselessEq<std::string>()(auto_pad, "same_upper")
-                && !CaselessEq<std::string>()(auto_pad, "same_lower"))
-            IE_THROW() <<  errorPrefix << "has unsupported auto_pad value: " << auto_pad;
        if (ksizes.size() != 2 || strides.size() != 2 || rates.size() != 2)
            IE_THROW() << errorPrefix << "must have the following attributes with shape {2}: sizes, strides, rates.";
        _ksizes.clear();
@ -323,12 +350,12 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
            _rates.push_back(static_cast<size_t>(x));
        }

-        SizeVector in_dims = inData->getTensorDesc().getDims();
+        SizeVector in_dims = op->get_input_shape(0);
        _pad_left = 0;
        _pad_top = 0;
        jit_extract_image_patches_params jpp;
        jpp.need_padding = false;
-        if (!CaselessEq<std::string>()(auto_pad, "valid")) {
+        if (_auto_pad != ExtImgPatcherPadType::VALID) {
            const size_t iheight = in_dims[2];
            const size_t iwidth = in_dims[3];
            const int64_t ihStep = _ksizes[0] + (_rates[0] - 1) * (_ksizes[0] - 1);
@ -338,9 +365,9 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
            int64_t PH = (std::ceil(1.f * iheight/_strides[0]) - 1) * _strides[0] + ihStep - iheight;

            int64_t increment_sign = 0;
-            if (CaselessEq<std::string>()(auto_pad, "same_lower")) {
+            if (_auto_pad == ExtImgPatcherPadType::SAME_LOWER) {
                increment_sign = 1;
-            } else if (CaselessEq<std::string>()(auto_pad, "same_upper")) {
+            } else if (_auto_pad == ExtImgPatcherPadType::SAME_UPPER) {
                increment_sign = -1;
            }

@ -355,14 +382,14 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
        }

        jpp.IW = in_dims[3];
-        SizeVector out_dims = layer->outData[0]->getTensorDesc().getDims();
+        SizeVector out_dims = op->get_output_shape(0);
        jpp.OH = out_dims[2];
        jpp.OW = out_dims[3];
        jpp.KH = _ksizes[0];
        jpp.KW = _ksizes[1];
        jpp.SH = _strides[0];
        jpp.SW = _strides[1];
-        jpp.dtype_size = layer->insData.front().lock()->getPrecision().size();
+        jpp.dtype_size = precision.size();
        jpp.block_size = 1;

        if (mayiuse(x64::avx512_common)) {
@ -379,26 +406,13 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
        if (extract_image_patches_kernel)
            extract_image_patches_kernel->create_ker();

-        LayerConfig config;
-
-        DataConfig inConfig;
-        inConfig.desc = inData->getTensorDesc();
-        config.inConfs.push_back(inConfig);
-
-        DataConfig outConfig;
-        outConfig.desc = layer->outData[0]->getTensorDesc();
-        outConfig.desc.setPrecision(inConfig.desc.getPrecision());
-        outConfig.desc.setLayout(inConfig.desc.getLayout());
-        config.outConfs.push_back(outConfig);
-
-        config.dynBatchSupport = false;
-        confs.push_back(config);
+        addConfig(op, {{TensorDescCreatorTypes::ncsp, precision}},
+                      {{TensorDescCreatorTypes::ncsp, precision}});
    } catch (InferenceEngine::Exception &ex) {
        errorMsg = ex.what();
    }
 }

-
 StatusCode ExtractImagePatchesImpl::execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
    const char *src_data = inputs[0]->cbuffer().as<const char *>() +
            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
--- a/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.hpp
@ -42,10 +42,17 @@ struct jit_uni_extract_image_patches_kernel {

 class ExtractImagePatchesImpl : public ExtLayerBase {
 public:
-    explicit ExtractImagePatchesImpl(const CNNLayer*);
+    explicit ExtractImagePatchesImpl(const std::shared_ptr<ngraph::Node>& op);
    StatusCode execute(std::vector<Blob::Ptr>&, std::vector<Blob::Ptr>&, ResponseDesc*) noexcept override;
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;

 private:
+    enum class ExtImgPatcherPadType {
+        VALID,
+        SAME_LOWER,
+        SAME_UPPER
+    };
+
    std::vector<size_t> _ksizes;
    std::vector<size_t> _strides;
    std::vector<size_t> _rates;
@ -53,6 +60,10 @@ private:
    size_t _pad_top;
    std::shared_ptr<jit_uni_extract_image_patches_kernel> extract_image_patches_kernel;
    static const std::set<size_t> _supported_precisions_sizes;
+
+    ExtImgPatcherPadType _auto_pad;
+
+    std::string errorPrefix;
 };

 REG_FACTORY_FOR(ExtractImagePatchesImpl, ExtractImagePatches);
--- a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
@ -1,124 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class FillImpl: public ExtLayerBase {
-public:
-    explicit FillImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            if (layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            SizeVector fill_dims = layer->insData[FILL_DIMS].lock()->getTensorDesc().getDims();
-            if (fill_dims.size() > 1)
-                IE_THROW() << layer->name << " Fill dimensions vector should be 1 dimension";
-
-            SizeVector value_dims = layer->insData[FILL_VALUE].lock()->getTensorDesc().getDims();
-            if (value_dims.size() > 1)
-                IE_THROW() << layer->name << " Value scalar should have 1 dimension";
-
-            if (!(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
-                !(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::FP32) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            } else {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN) },
-                                { DataConfigurator(ConfLayout::PLN) });
-            }
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        int32_t* fill_dims = inputs[FILL_DIMS]->cbuffer().as<int32_t *>() +
-                             inputs[FILL_DIMS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t fill_size = inputs[FILL_DIMS]->getTensorDesc().getDims()[0];
-        SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
-
-        if (dst_dims.size() != fill_size) {
-            if (resp) {
-                std::string errorMsg = "Output tensor dimension mismatch";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return PARAMETER_MISMATCH;
-        }
-
-        size_t work_amount_dst = 1;
-        for (size_t i = 0; i < dst_dims.size(); i++) {
-            work_amount_dst *= fill_dims[i];
-            if (static_cast<int>(dst_dims[i]) != fill_dims[i]) {
-                if (resp) {
-                    std::string errorMsg = "Output tensor dimension size mismatch";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return PARAMETER_MISMATCH;
-            }
-        }
-
-        switch (outputs[0]->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            float* dst_data = outputs[0]->cbuffer().as<float *>() +
-                              outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            float value = (inputs[FILL_VALUE]->cbuffer().as<float *>() +
-                           inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
-
-            parallel_nt(0, [&](const int ithr, const int nthr) {
-                size_t start = 0, end = 0;
-                splitter(work_amount_dst, nthr, ithr, start, end);
-                std::fill_n(dst_data + start, end - start, value);
-            });
-        }
-        break;
-        case Precision::I32: {
-            int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
-                                outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            int32_t value = (inputs[FILL_VALUE]->cbuffer().as<int32_t *>() +
-                             inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
-
-            parallel_nt(0, [&](const int ithr, const int nthr) {
-                size_t start = 0, end = 0;
-                splitter(work_amount_dst, nthr, ithr, start, end);
-                std::fill_n(dst_data + start, end - start, value);
-            });
-            return OK;
-        }
-        break;
-        default:
-            if (resp) {
-                std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-
-        return OK;
-    }
-
-private:
-    const size_t FILL_DIMS = 0;
-    const size_t FILL_VALUE = 1;
-};
-
-REG_FACTORY_FOR(FillImpl, Fill);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
@ -1,154 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include <algorithm>
-#include <limits>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-#include "common/fp16_utils.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class GatherImpl: public ExtLayerBase {
-public:
-    explicit GatherImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 2 || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            Precision inIdxPrecision = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getPrecision();
-            if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16)
-                inIdxPrecision = Precision::I32;
-
-            axis = layer->GetParamAsInt("axis");
-
-            const SizeVector& dictionary_dims = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getDims();
-            if (dictionary_dims.size() == 0)
-                IE_THROW() << layer->name << " Incorrect input parameters dimension!";
-            // Dictionary must be at least rank axis + 1
-            IE_ASSERT(-static_cast<int>(dictionary_dims.size()) <= axis && axis < static_cast<int>(dictionary_dims.size()))
-                << layer->name << " Incorrect input parameters dimensions and axis number!";
-            if (axis < 0)
-                axis += dictionary_dims.size();
-
-            //  Find number of dictionaries, index range and data length
-            for (int i = 0; i < axis; i++)
-                numDictionaries *= dictionary_dims[i];
-            indexRange = dictionary_dims[axis];
-            for (size_t i = axis + 1; i < dictionary_dims.size(); i++)
-                dataLength *= dictionary_dims[i];
-
-            if (dataLength == 0)
-                IE_THROW() << layer->name << " Incorrect input parameters dimension!";
-
-            LayerConfig config;
-            DataConfig dataConfigIdx, dataConfigDct;
-            Precision dataPrecision = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getPrecision();
-            dataConfigDct.desc = TensorDesc(dataPrecision, dictionary_dims,
-                    layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getLayoutByDims(dictionary_dims));
-            config.inConfs.push_back(dataConfigDct);
-            const SizeVector& indexes_dims = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getDims();
-            dataConfigIdx.desc = TensorDesc(inIdxPrecision, indexes_dims,
-                    layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getLayout());
-            config.inConfs.push_back(dataConfigIdx);
-
-            DataConfig dataConfigOut;
-            const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
-            dataConfigOut.desc = TensorDesc(dataPrecision, out_dims,
-                    layer->outData[0]->getTensorDesc().getLayoutByDims(out_dims));
-            config.outConfs.push_back(dataConfigOut);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    struct f32toUi32 {
-        inline unsigned int operator()(const float value) {
-            return static_cast<unsigned int>(value);
-        }
-    };
-
-    struct f16toUi32 {
-        inline unsigned int operator()(const ie_fp16 value) {
-            return static_cast<unsigned int>(f16tof32(value));
-        }
-    };
-
-    struct i32toUi32 {
-        inline unsigned int operator()(const int32_t value) {
-            return static_cast<unsigned int>(value);
-        }
-    };
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        switch (inputs[GATHER_INDEXES]->getTensorDesc().getPrecision()) {
-            case Precision::FP32:
-                gather<float, f32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
-                break;
-            case Precision::FP16:
-                gather<ie_fp16, f16toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
-                break;
-            case Precision::I32:
-                gather<int32_t, i32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
-                break;
-            default:
-                return GENERAL_ERROR;
-        }
-
-        return OK;
-    }
-
-private:
-    template <typename index_t, class Conversion>
-    void gather(Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output) {
-        size_t src_indexSize = indexes->size();
-        const index_t *src_index = indexes->cbuffer().as<const index_t *>() + indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const uint8_t *src_dataDict = dictionary->cbuffer().as<const uint8_t *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        uint8_t *dst_data = output->cbuffer().as<uint8_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t len = dataLength * dictionary->getTensorDesc().getPrecision().size();
-
-        parallel_for(src_indexSize, [&](size_t i) {
-            unsigned int idx = Conversion()(src_index[i]);
-
-            //  Index clipping
-            if (idx < indexRange) {
-                //  Copying data to destination from Dictionary
-                for (size_t j = 0; j < numDictionaries; j++) {
-                    cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
-                                output->byteSize() - (len * (i + j * src_indexSize)),
-                                &src_dataDict[len * (idx + j * indexRange)],
-                                len);
-                }
-            } else {
-                for (size_t j = 0; j < numDictionaries; j++) {
-                    memset(&dst_data[len * (i + j * src_indexSize)], 0, len);
-                }
-            }
-        });
-    }
-
-    int axis = 0;
-    size_t numDictionaries = 1;
-    size_t indexRange = 0;
-    size_t dataLength = 1;
-    const size_t GATHER_DICTIONARY = 0;
-    const size_t GATHER_INDEXES = 1;
-};
-
-
-REG_FACTORY_FOR(GatherImpl, Gather);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_elements.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_elements.cpp
@ -1,149 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <string>
-#include <vector>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class GatherElementsImpl: public ExtLayerBase {
-public:
-    explicit GatherElementsImpl(const CNNLayer* layer) : strideAx1Diff_(0) {
-        errorPrefix_ = std::string("Layer GatherElements with name '") + layer->name + "'";
-
-        if (layer->insData.size() != 2 || layer->outData.size() != 1)
-            IE_THROW() << errorPrefix_ << " has invalid number of input/output edges.";
-
-        auto inputData = layer->insData[dataIndex_].lock();
-        auto indices = layer->insData[indicesIndex_].lock();
-        if (!inputData || !indices)
-            IE_THROW() << errorPrefix_ << " has nullable inputs.";
-
-        const auto& dataDims = inputData->getTensorDesc().getDims();
-        const auto& indicesDims = indices->getTensorDesc().getDims();
-        if (dataDims.size() != indicesDims.size())
-            IE_THROW() << errorPrefix_ << " has invalid input shapes. Inputs 'Data' and 'Indices' must have equal ranks.";
-
-        Precision dataPrecision = inputData->getTensorDesc().getPrecision();
-        if (dataPrecision.size() != sizeof(PrecisionTrait<Precision::I32>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I16>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I8>::value_type)) {
-            IE_THROW() << errorPrefix_ << " has unsupported 'inputData' input precision: " << dataPrecision;
-        }
-
-        Precision indicesPrecision = indices->getTensorDesc().getPrecision();
-        if (indicesPrecision != Precision::I32) {
-            IE_THROW() << errorPrefix_ << " has unsupported 'indices' input precision: " << indicesPrecision;
-        }
-
-        dataTypeSize_ = dataPrecision.size();
-
-        int axis = layer->GetParamAsInt("axis");
-        if (axis < 0)
-            axis += dataDims.size();
-        if (axis < 0 || axis >= static_cast<int>(dataDims.size()))
-            IE_THROW() << errorPrefix_ << " has invalid axis attribute: " << axis;
-        axis_ = axis;
-
-        auto& outputData = layer->outData[0];
-        strideAxDst_ = outputData->getTensorDesc().getBlockingDesc().getStrides()[axis_];
-        dstAxDim_ = outputData->getTensorDesc().getDims()[axis_];
-        if (axis_ > 0) {
-            strideAx1Diff_ = inputData->getTensorDesc().getBlockingDesc().getStrides()[axis_ - 1] -
-                    outputData->getTensorDesc().getBlockingDesc().getStrides()[axis_ - 1];
-        }
-
-        LayerConfig config;
-        DataConfig dataConfig, indicesConfig, outConfig;
-        dataConfig.desc = TensorDesc(dataPrecision, dataDims,
-            inputData->getTensorDesc().getLayoutByDims(dataDims));
-        config.inConfs.push_back(dataConfig);
-        indicesConfig.desc = TensorDesc(Precision::I32, indicesDims,
-            indices->getTensorDesc().getLayoutByDims(indicesDims));
-        config.inConfs.push_back(indicesConfig);
-
-        const auto& outDims = outputData->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(dataPrecision, outDims,
-                outputData->getTensorDesc().getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-
-        config.dynBatchSupport = false;
-
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        switch (dataTypeSize_) {
-            case sizeof(PrecisionTrait<Precision::I32>::value_type):
-                return directExecution<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-            case sizeof(PrecisionTrait<Precision::I16>::value_type):
-                return directExecution<PrecisionTrait<Precision::I16>::value_type>(inputs, outputs, resp);
-            case sizeof(PrecisionTrait<Precision::I8>::value_type):
-                return directExecution<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs, resp);
-            default:
-                std::string errMsg = errorPrefix_ + " has inputData input with unsupported precision: " +
-                    inputs[dataIndex_]->getTensorDesc().getPrecision().name();
-                errMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                return GENERAL_ERROR;
-        }
-    }
-
-protected:
-    template <typename dataType>
-    StatusCode directExecution(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
-        const dataType* srcData = inputs[dataIndex_]->cbuffer().as<const dataType*>() +
-            inputs[dataIndex_]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int* indices = inputs[indicesIndex_]->cbuffer().as<const int*>() +
-            inputs[indicesIndex_]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        dataType* dstData = outputs[0]->buffer().as<dataType*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const int outSize = outputs[0]->size();
-        auto threadBody = [&](const int ithr, const int nthr) {
-            int start(0lu), end(0lu);
-            splitter(outSize, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-
-            int axStrideIt = start % strideAxDst_;
-            int dstAxIdx = (start / strideAxDst_) % dstAxDim_;
-            int dstShift0 = (start / strideAxDst_ / dstAxDim_) * strideAx1Diff_;
-
-            for (size_t o = start; o < end; o++, axStrideIt++) {
-                if (axStrideIt == strideAxDst_) {
-                    axStrideIt = 0;
-                    dstAxIdx++;
-                    if (dstAxIdx == dstAxDim_) {
-                        dstAxIdx = 0;
-                        dstShift0 += strideAx1Diff_;
-                    }
-                }
-                dstData[o] = srcData[o + dstShift0 + (indices[o] - dstAxIdx) * strideAxDst_];
-            }
-        };
-        parallel_nt(0, threadBody);
-
-        return OK;
-    }
-
-    const size_t dataIndex_ = 0;
-    const size_t indicesIndex_ = 1;
-
-    size_t axis_;
-    size_t dataTypeSize_;
-    int strideAxDst_;
-    int dstAxDim_;
-    int strideAx1Diff_;
-    std::string errorPrefix_;
-};
-
-REG_FACTORY_FOR(GatherElementsImpl, GatherElements);
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_nd.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_nd.cpp
@ -1,230 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <string>
-#include <vector>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class GatherNDImpl: public ExtLayerBase {
-public:
-    explicit GatherNDImpl(const CNNLayer* layer) {
-        _errorPrefix = std::string("Layer GatherND with name '") + layer->name + "'";
-
-        if (layer->insData.size() != 2 || layer->outData.size() != 1)
-            IE_THROW() << _errorPrefix << " has invalid number of input/output edges.";
-
-        auto data = layer->insData[_dataIndex].lock();
-        auto indices = layer->insData[_indicesIndex].lock();
-        if (!data || !indices)
-            IE_THROW() << _errorPrefix << " has nullable inputs.";
-        Precision dataPrecision = data->getTensorDesc().getPrecision();
-        if (dataPrecision.size() != sizeof(PrecisionTrait<Precision::I32>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I16>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I8>::value_type)) {
-            IE_THROW() << _errorPrefix << " has unsupported 'data' input precision: " << dataPrecision;
-        }
-
-        Precision indicesPrecision = indices->getTensorDesc().getPrecision();
-        if (indicesPrecision != Precision::I32 &&
-                indicesPrecision != Precision::I16 && indicesPrecision != Precision::U16 &&
-                indicesPrecision != Precision::I8 && indicesPrecision != Precision::U8) {
-            IE_THROW() << _errorPrefix << " has unsupported 'indices' input precision: " << indicesPrecision;
-        }
-
-        _dataTypeSize = dataPrecision.size();
-        const auto& dataDims = data->getTensorDesc().getDims();
-        const auto& indicesDims = indices->getTensorDesc().getDims();
-
-        _batchDims = layer->GetParamAsInt("batch_dims", 0);
-        if (_batchDims >= std::min(dataDims.size(), indicesDims.size()))
-            IE_THROW() << _errorPrefix << " has invalid batch_dims attribute: " << _batchDims;
-
-        _batchNum = 1lu;
-        for (size_t i = 0; i < _batchDims; i++) {
-            _batchNum *= indicesDims[i];
-        }
-
-        _sliceRank = indicesDims[indicesDims.size() - 1];
-        _dataRank = dataDims.size() - _batchDims;
-        if (_sliceRank > _dataRank)
-            IE_THROW() << _errorPrefix << " has invalid inputs shapes.";
-
-        _blockSize = 1;
-        for (size_t i = _sliceRank + _batchDims; i < dataDims.size(); i++) {
-            _blockSize *= dataDims[i];
-        }
-        _batchStep = 1;
-        for (size_t i = _batchDims; i < dataDims.size(); i++) {
-            _batchStep *= dataDims[i];
-        }
-
-        LayerConfig config;
-        DataConfig dataConfig, indicesConfig, outConfig;
-        dataConfig.desc = TensorDesc(dataPrecision, dataDims,
-            data->getTensorDesc().getLayoutByDims(dataDims));
-        config.inConfs.push_back(dataConfig);
-        indicesConfig.desc = TensorDesc(Precision::I32, indicesDims,
-            indices->getTensorDesc().getLayoutByDims(indicesDims));
-        config.inConfs.push_back(indicesConfig);
-
-        const auto& outDims = layer->outData[0]->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(dataPrecision, outDims,
-                layer->outData[0]->getTensorDesc().getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-        config.dynBatchSupport = false;
-
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        if (_blockSize > 1) {
-            gatherBlocks(inputs, outputs, resp);
-        } else {
-            switch (_dataTypeSize) {
-                case sizeof(PrecisionTrait<Precision::I32>::value_type):
-                    gatherElementwise<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-                    break;
-                case sizeof(PrecisionTrait<Precision::I16>::value_type):
-                    gatherElementwise<PrecisionTrait<Precision::I16>::value_type>(inputs, outputs, resp);
-                    break;
-                case sizeof(PrecisionTrait<Precision::I8>::value_type):
-                    gatherElementwise<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs, resp);
-                    break;
-                default:
-                    std::string errMsg = _errorPrefix + " has data input with unsupported precision: " +
-                        inputs[_dataIndex]->getTensorDesc().getPrecision().name();
-                    errMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                    return GENERAL_ERROR;
-            }
-        }
-
-        return OK;
-    }
-
-protected:
-    template <typename dataType>
-    void gatherElementwise(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
-        const dataType* srcData = inputs[_dataIndex]->cbuffer().as<const dataType*>() +
-            inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int* indices = inputs[_indicesIndex]->cbuffer().as<const int*>() +
-            inputs[_indicesIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        dataType* dstData = outputs[0]->buffer().as<dataType*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const size_t* srcMultipliers = inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getStrides().data() + _batchDims;
-
-        const size_t cycles = outputs[0]->byteSize() / (sizeof(dataType) * _batchNum);
-        const size_t CS = cycles * _sliceRank;
-        const size_t CB = cycles * _blockSize;
-        const size_t workAmount = _batchNum * cycles;
-
-        auto threadBody = [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-            size_t bStart = start / cycles;
-            size_t cStart = start % cycles;
-            size_t workCounter = start;
-
-            const dataType* shiftedSrcData = srcData + bStart * _batchStep;
-            const int* shiftedIndices = indices + bStart * CS + cStart * _sliceRank;
-            dataType* shiftedDstData = dstData + bStart * CB + cStart * _blockSize;
-
-            for (size_t b = bStart; b < _batchNum; b++) {
-                for (size_t j = cStart; j < cycles; j++) {
-                    size_t dataIdx = 0lu;
-                    for (size_t i = 0lu; i < _sliceRank; i++)
-                        dataIdx += srcMultipliers[i] * shiftedIndices[i];
-                    shiftedDstData[0] = shiftedSrcData[dataIdx];
-                    shiftedDstData++;
-                    shiftedIndices += _sliceRank;
-                    if (++workCounter == end) {
-                        return;
-                    }
-                }
-                cStart = 0lu;
-                shiftedSrcData += _batchStep;
-            }
-        };
-
-        parallel_nt(0, threadBody);
-    }
-
-    void gatherBlocks(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
-        const uint8_t* srcData = inputs[_dataIndex]->cbuffer().as<const uint8_t*>() +
-            inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int* indices = inputs[_indicesIndex]->cbuffer().as<const int*>() +
-            inputs[_indicesIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        uint8_t* dstData = outputs[0]->buffer().as<uint8_t*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        std::vector<size_t> srcMultipliers(_sliceRank);
-        for (size_t i = 0; i < _sliceRank ; i++)
-            srcMultipliers[i] = _dataTypeSize * inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getStrides()[i + _batchDims];
-
-        const size_t batchStep = _batchStep * _dataTypeSize;
-        const size_t dataStep = _blockSize * _dataTypeSize;
-        const size_t cycles = outputs[0]->byteSize() / (dataStep * _batchNum);
-        const size_t CS = cycles * _sliceRank;
-        const size_t CB = cycles * dataStep;
-        const size_t workAmount = _batchNum * cycles;
-
-        auto threadBody = [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-            size_t bStart = start / cycles;
-            size_t cStart = start % cycles;
-            size_t workCounter = start;
-
-            const uint8_t* shiftedSrcData = srcData + bStart * batchStep;
-            const int* shiftedIndices = indices + bStart * CS + cStart * _sliceRank;
-            uint8_t* shiftedDstData = dstData + bStart * CB + cStart * dataStep;
-
-            for (size_t b = bStart; b < _batchNum; b++) {
-                for (size_t j = cStart; j < cycles; j++) {
-                    size_t dataIdx = 0lu;
-                    for (size_t i = 0; i < _sliceRank ; i++)
-                        dataIdx += srcMultipliers[i] * shiftedIndices[i];
-                    cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataStep);
-                    shiftedDstData += dataStep;
-                    shiftedIndices += _sliceRank;
-                    if (++workCounter == end) {
-                        return;
-                    }
-                }
-                cStart = 0;
-                shiftedSrcData += batchStep;
-            }
-        };
-
-        parallel_nt(0, threadBody);
-    }
-
-    size_t _dataRank;
-    size_t _sliceRank;
-    size_t _blockSize;
-    size_t _batchDims;
-    size_t _batchNum;
-    size_t _batchStep;
-    size_t _dataTypeSize;
-    const size_t _dataIndex = 0;
-    const size_t _indicesIndex = 1;
-    std::string _errorPrefix;
-};
-
-
-REG_FACTORY_FOR(GatherNDImpl, GatherND);
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
@ -3,6 +3,9 @@
 //

 #include "base.hpp"
+#include <ngraph/op/gather_tree.hpp>
+#include <nodes/common/tensor_desc_creator.h>
+#include <utils/general_utils.h>

 #include <cmath>
 #include <limits>
@ -17,45 +20,71 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class GatherTreeImpl: public ExtLayerBase {
 public:
-    explicit GatherTreeImpl(const CNNLayer* layer) {
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges.";
+            auto gatherElementsOp = ngraph::as_type_ptr<const ngraph::op::v1::GatherTree>(op);
+            if (!gatherElementsOp) {
+                errorMessage = "Node is not an instance of the GatherTree operation from operation set v1.";
+                return false;
+            }

-            if (layer->insData.size() != 4)
-                IE_THROW() << layer->name << " Incorrect number of input edges.";
-            if (layer->outData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of output edges.";
-
-            precision = layer->insData[GATHER_TREE_STEP_IDX].lock()->getTensorDesc().getPrecision();
-            if (precision != Precision::FP32 && precision != Precision::I32)
-                precision = Precision::FP32;
-
-            if (layer->insData[GATHER_TREE_PARENT_IDX].lock()->getTensorDesc().getPrecision() != precision ||
-                layer->insData[GATHER_TREE_MAX_SEQ_LEN].lock()->getTensorDesc().getPrecision() != precision ||
-                layer->insData[GATHER_TREE_END_TOKEN].lock()->getTensorDesc().getPrecision() != precision ||
-                layer->outData[0]->getTensorDesc().getPrecision() != precision)
-                IE_THROW() << layer->name << " Incorrect input/output data tensor precision. Should be the same.";
-
-            if (layer->insData[GATHER_TREE_STEP_IDX].lock()->getTensorDesc().getDims().size() != 3)
-                IE_THROW() << layer->name << " step_idx vector should be 3 dimension";
-            if (layer->insData[GATHER_TREE_PARENT_IDX].lock()->getTensorDesc().getDims().size() != 3)
-                IE_THROW() << layer->name << " parent_idx vector should be 3 dimension";
-            if (layer->insData[GATHER_TREE_MAX_SEQ_LEN].lock()->getTensorDesc().getDims().size() != 1)
-                IE_THROW() << layer->name << " max_seq_len vector should be 1 dimension";
-            if (layer->insData[GATHER_TREE_END_TOKEN].lock()->getTensorDesc().getDims().size() != 1)
-                IE_THROW() << layer->name << " end_token should be 1 dimension";
-
-            addConfig(layer, { DataConfigurator(ConfLayout::PLN, precision), DataConfigurator(ConfLayout::PLN, precision),
-                               DataConfigurator(ConfLayout::PLN, precision), DataConfigurator(ConfLayout::PLN, precision) },
-                             { DataConfigurator(ConfLayout::PLN, precision) });
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
+            auto precision = op->get_input_element_type(GATHER_TREE_STEP_IDX);
+            if (!MKLDNNPlugin::one_of(precision, ngraph::element::f32, ngraph::element::i32))
+                precision = ngraph::element::f32;
+            if (op->get_input_element_type(GATHER_TREE_PARENT_IDX) != precision ||
+                    op->get_input_element_type(GATHER_TREE_MAX_SEQ_LEN) != precision ||
+                    op->get_input_element_type(GATHER_TREE_END_TOKEN) != precision ||
+                    op->get_output_element_type(0) != precision) {
+                errorMessage = "Node has incorrect input/output data precision. Must be the same.";
+                return false;
+            }
+        } catch (...) {
+            return false;
        }
+
+        return true;
    }

+    explicit GatherTreeImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            std::string errorPrefix = std::string("Node GatherTree with name '") + op->get_friendly_name() + "'";
+            if (op->get_input_size() != 4)
+                IE_THROW() << errorPrefix << " has incorrect number of input edges.";
+            if (op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of output edges.";
+
+            precision = details::convertPrecision(op->get_input_element_type(GATHER_TREE_STEP_IDX));
+            if (!MKLDNNPlugin::one_of(precision, Precision::FP32, Precision::I32))
+                precision = Precision::FP32;
+
+            if (op->get_input_shape(GATHER_TREE_STEP_IDX).size() != 3)
+                IE_THROW() << errorPrefix << " step_idx vector should be 3 dimension";
+            if (op->get_input_shape(GATHER_TREE_PARENT_IDX).size() != 3)
+                IE_THROW() << errorPrefix << " parent_idx vector should be 3 dimension";
+            if (op->get_input_shape(GATHER_TREE_MAX_SEQ_LEN).size() != 1)
+                IE_THROW() << errorPrefix << " max_seq_len vector should be 1 dimension";
+            if (op->get_input_shape(GATHER_TREE_END_TOKEN).size() != 0)
+                IE_THROW() << errorPrefix << " end_token should be 1 dimension";
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, precision},
+                           {TensorDescCreatorTypes::ncsp, precision},
+                           {TensorDescCreatorTypes::ncsp, precision},
+                           {TensorDescCreatorTypes::ncsp, precision}},
+                          {{TensorDescCreatorTypes::ncsp, precision}});
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
+        }
+    }

    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
        if (precision == Precision::FP32)
@ -140,10 +169,10 @@ public:
    }

 private:
-    const size_t GATHER_TREE_STEP_IDX = 0;
-    const size_t GATHER_TREE_PARENT_IDX = 1;
-    const size_t GATHER_TREE_MAX_SEQ_LEN = 2;
-    const size_t GATHER_TREE_END_TOKEN = 3;
+    static const size_t GATHER_TREE_STEP_IDX = 0;
+    static const size_t GATHER_TREE_PARENT_IDX = 1;
+    static const size_t GATHER_TREE_MAX_SEQ_LEN = 2;
+    static const size_t GATHER_TREE_END_TOKEN = 3;

    InferenceEngine::Precision precision;
 };
--- a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
@ -8,21 +8,48 @@
 #include <string>
 #include <vector>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset1.hpp>
+
+using namespace MKLDNNPlugin;

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

 class GRNImpl: public ExtLayerBase {
-public:
-    explicit GRNImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
+            const auto grn = std::dynamic_pointer_cast<const ngraph::opset1::GRN>(op);
+            if (!grn) {
+                errorMessage = "Only opset1 GRN operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }

-            bias = layer->GetParamAsFloat("bias");
+    std::string errorPrefix;

-            addConfig(layer, {{ConfLayout::PLN, false, 0, Precision::FP32}}, {{ConfLayout::PLN, false, 0, Precision::FP32}});
+public:
+    explicit GRNImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            errorPrefix = "GRN layer with name '" + op->get_friendly_name() + "'";
+            const auto grn = std::dynamic_pointer_cast<const ngraph::opset1::GRN>(op);
+
+            if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            bias = grn->get_bias();
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}});
        } catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
        }
--- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
@ -7,12 +7,12 @@
 #include <mkldnn_selective_build.h>

 #include <ie_iextension.h>
-#include <legacy/ie_layers.h>

 #include <string>
 #include <map>
 #include <memory>
 #include <algorithm>
+#include <ngraph/node.hpp>

 namespace InferenceEngine {

@ -43,7 +43,7 @@ public:
 namespace Extensions {
 namespace Cpu {

-using ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer*)>;
+using ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const std::shared_ptr<ngraph::Node>& op)>;

 struct ExtensionsHolder {
    std::map<std::string, ext_factory> list;
@ -60,11 +60,11 @@ public:
    }

    virtual StatusCode
-    getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept {
+    getFactoryFor(ILayerImplFactory*& factory, const std::shared_ptr<ngraph::Node>& op, ResponseDesc* resp) noexcept {
        using namespace MKLDNNPlugin;
-        factory = layersFactory.createNodeIfRegistered(MKLDNNPlugin, cnnLayer->type, cnnLayer);
+        factory = layersFactory.createNodeIfRegistered(MKLDNNPlugin, op->get_type_name(), op);
        if (!factory) {
-            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
+            std::string errorMsg = std::string("Factory for ") + op->get_type_name() + " wasn't found!";
            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
            return NOT_FOUND;
        }
@ -85,7 +85,7 @@ public:

    using LayersFactory = openvino::cc::Factory<
                                std::string,
-                                InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer*)>;
+                                InferenceEngine::ILayerImplFactory*(const std::shared_ptr<ngraph::Node>& op)>;

    LayersFactory layersFactory;

--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@ -7,11 +7,7 @@
 # define MKLDNN_EXTENSION_NODE(__prim, __type)
 #endif

-MKLDNN_EXTENSION_NODE(EmbeddingBagOffsetsSumImpl, EmbeddingBagOffsetsSum);
-MKLDNN_EXTENSION_NODE(EmbeddingBagPackedSumImpl, EmbeddingBagPackedSum);
-MKLDNN_EXTENSION_NODE(EmbeddingSegmentsSumImpl, EmbeddingSegmentsSum);
 MKLDNN_EXTENSION_NODE(CTCLossImpl, CTCLoss);
-MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
 MKLDNN_EXTENSION_NODE(MathImpl, Abs);
 MKLDNN_EXTENSION_NODE(MathImpl, Acos);
 MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
@ -38,44 +34,20 @@ MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTo
 MKLDNN_EXTENSION_NODE(ExtractImagePatchesImpl, ExtractImagePatches);
 MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
 MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
-MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
-MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
-MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
 MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
 MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
-MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
-MKLDNN_EXTENSION_NODE(FillImpl, Fill);
-MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
-MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
-MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
-MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
-MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
-MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
-MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
-MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronGenerateProposalsSingleImageImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppressionIEInternal);
 MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
 MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
-MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
-MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
-MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
 MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
-MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
 MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
 MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
 MKLDNN_EXTENSION_NODE(CTCGreedyDecoderSeqLenImpl, CTCGreedyDecoderSeqLen);
-MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
-MKLDNN_EXTENSION_NODE(GatherElementsImpl, GatherElements);
-MKLDNN_EXTENSION_NODE(GatherNDImpl, GatherND);
 MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
 MKLDNN_EXTENSION_NODE(RangeImpl, Range);
-MKLDNN_EXTENSION_NODE(SelectImpl, Select);
 MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
-MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
-MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
-MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
-MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
-MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
 MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
--- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
@ -11,30 +11,51 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset5.hpp>
+
+using namespace MKLDNNPlugin;

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

 class LogSoftmaxImpl: public ExtLayerBase {
-public:
-    explicit LogSoftmaxImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            const auto logSoftMax = std::dynamic_pointer_cast<const ngraph::opset5::LogSoftmax>(op);
+            if (!logSoftMax) {
+                errorMessage = "Only opset5 LogSoftmax operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }

-            if (layer->insData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
+public:
+    explicit LogSoftmaxImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }

-            SizeVector dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            errorPrefix = "LogSoftmax layer with name '" + op->get_friendly_name() + "'";
+            const auto logSoftMax = std::dynamic_pointer_cast<const ngraph::opset5::LogSoftmax>(op);
+
+            if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            SizeVector dims = op->get_input_shape(0);
            if (!dims.size())
                dims = SizeVector(1, 1);
-            int axis = layer->GetParamAsInt("axis", -1);
+            int axis = logSoftMax->get_axis();
            if (axis < 0)
                axis += dims.size();

            if (dims.size() < static_cast<size_t>((size_t)(1) + axis))
-                IE_THROW() << layer->name << " Incorrect input parameters dimensions and axis number!";
+                IE_THROW() << errorPrefix << " has incorrect input parameters dimensions and axis number!";

            int j;
            for (j = dims.size() - 1; j >= 0; j--) {
@ -48,7 +69,8 @@ public:
            for (size_t i = (axis + 1); i < dims.size(); i++)
                reduced_axis_stride *= dims[i];

-            addConfig(layer, { { ConfLayout::PLN, false, 0, Precision::FP32 } }, { { ConfLayout::PLN, false, 0, Precision::FP32 } });
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
        } catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
        }
@ -103,6 +125,8 @@ private:
    size_t reduced_axis_stride = 1;
    size_t axis_step = 1;
    bool is_last_dim = false;
+
+    std::string errorPrefix;
 };

 REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@ -8,87 +8,67 @@
 #include <string>
 #include <vector>
 #include <cassert>
+
 #include "ie_parallel.hpp"
+#include "common/tensor_desc_creator.h"
+#include "utils/general_utils.h"
+#include <ngraph/ops.hpp>

 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {

-class MathImpl: public ExtLayerBase {
-    static float error_function(float x) {
-        const float clip_bound = 2.86f;
-        //  Points clip_bound and -clip_bound are extremums for this polynom
-        //  So in order to provide better accuracy comparing to std::erf we have to clip input range
-        if (x > clip_bound)
-            return 1;
-        if (x < -clip_bound)
-            return -1;
+using MKLDNNPlugin::TensorDescCreatorTypes;

-        //  A polynomial approximation of the error function
-        const float erfNumerator[4] = { 90.0260162353515625f, 2232.00537109375f,
-            7003.3251953125f, 55592.30078125f };
-        const float erfDenominator[5] = { 33.56171417236328125f, 521.35797119140625f,
-            4594.32373046875f, 22629.0f, 49267.39453125f };
-        float polynom = 9.60497379302978515625f;
-        float x2 = x * x;
-        for (float c : erfNumerator) {
-            polynom = polynom * x2 + c;
+class MathImpl: public ExtLayerBase {
+public:
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            if (initializers.find(op->get_type_info()) == initializers.end()) {
+                errorMessage = "Unsupported Math layer type.";
+                return false;
+            }
+
+            if (MKLDNNPlugin::one_of(op->get_type_info(),
+                    ngraph::op::v0::HardSigmoid::type_info,
+                    ngraph::op::v0::Selu::type_info)) {
+                auto firstConst = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+                auto secondConst = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+                if (!firstConst || !secondConst) {
+                    errorMessage = "Constant expected as the second and third inputs.";
+                    return false;
+                }
+            }
+        } catch (...) {
+            return false;
        }
-        x *= polynom;
-        polynom = 1.0f;
-        for (float c : erfDenominator) {
-            polynom = polynom * x2 + c;
-        }
-        return x / polynom;
+        return true;
    }

-public:
-    explicit MathImpl(const CNNLayer* layer) {
+    explicit MathImpl(const std::shared_ptr<ngraph::Node>& op) :
+            alpha(0.f), beta(0.f), gamma(0.f) {
        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }

-            if (layer->insData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
+            initializers[op->get_type_info()](op, *this);

-            if (layer->insData[0].lock()->getTensorDesc().getDims() != layer->outData[0]->getTensorDesc().getDims())
-                IE_THROW() << layer->name << " Incorrect number of input/output dimensions!";
-
-            alpha = layer->GetParamAsFloat("alpha", 0.0f);
-            beta = layer->GetParamAsFloat("beta", 0.0f);
-            gamma = layer->GetParamAsFloat("gamma", 0.0f);
-
-            std::string math_func = layer->type;
-            if (math_func == "Erf") mathFunction = Math::Erf;
-            else if (math_func == "Abs") mathFunction = Math::Abs;
-            else if (math_func == "Acos") mathFunction = Math::Acos;
-            else if (math_func == "Acosh") mathFunction = Math::Acosh;
-            else if (math_func == "Asin") mathFunction = Math::Asin;
-            else if (math_func == "Asinh") mathFunction = Math::Asinh;
-            else if (math_func == "Atan") mathFunction = Math::Atan;
-            else if (math_func == "Atanh") mathFunction = Math::Atanh;
-            else if (math_func == "Ceil") mathFunction = Math::Ceil;
-            else if (math_func == "Ceiling") mathFunction = Math::Ceil;
-            else if (math_func == "Cos") mathFunction = Math::Cos;
-            else if (math_func == "Cosh") mathFunction = Math::Cosh;
-            else if (math_func == "Floor") mathFunction = Math::Floor;
-            else if (math_func == "HardSigmoid") mathFunction = Math::HardSigmoid;
-            else if (math_func == "Log") mathFunction = Math::Log;
-            else if (math_func == "Neg") mathFunction = Math::Neg;
-            else if (math_func == "Reciprocal") mathFunction = Math::Reciprocal;
-            else if (math_func == "Selu") mathFunction = Math::Selu;
-            else if (math_func == "Sign") mathFunction = Math::Sign;
-            else if (math_func == "Sin") mathFunction = Math::Sin;
-            else if (math_func == "Sinh") mathFunction = Math::Sinh;
-            else if (math_func == "SoftPlus") mathFunction = Math::SoftPlus;
-            else if (math_func == "Softsign") mathFunction = Math::Softsign;
-            else if (math_func == "Tan") mathFunction = Math::Tan;
-            else
-                IE_THROW() << layer->name << " Incorrect Math layer type!";
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, false, 0, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, false, 0, Precision::FP32)});
+            if (MKLDNNPlugin::one_of(op->get_type_info(),
+                    ngraph::op::v0::HardSigmoid::type_info,
+                    ngraph::op::v0::Selu::type_info)) {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+            } else {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+            }
        } catch (InferenceEngine::Exception &ex) {
            errorMsg = ex.what();
+            throw;
        }
    }

@ -99,90 +79,85 @@ public:
        float* dst_data = outputs[0]->cbuffer().as<float *>() +
            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();

-        switch (mathFunction) {
-        case Math::Erf:
-            parallel_for(dataSize, [&](size_t i) {
-                dst_data[i] = error_function(src_data[i]);
-            });
-            break;
-        case Math::Abs:
+        switch (getAlgorithm()) {
+        case MKLDNNPlugin::MathAbs:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = (std::abs)(src_data[i]);
            });
            break;
-        case Math::Acos:
+        case MKLDNNPlugin::MathAcos:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = acosf(src_data[i]);
            });
            break;
-        case Math::Acosh:
+        case MKLDNNPlugin::MathAcosh:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = acoshf(src_data[i]);
            });
            break;
-        case Math::Asin:
+        case MKLDNNPlugin::MathAsin:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = asinf(src_data[i]);
            });
            break;
-        case Math::Asinh:
+        case MKLDNNPlugin::MathAsinh:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = asinhf(src_data[i]);
            });
            break;
-        case Math::Atan:
+        case MKLDNNPlugin::MathAtan:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = atanf(src_data[i]);
            });
            break;
-        case Math::Atanh:
+        case MKLDNNPlugin::MathAtanh:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = atanhf(src_data[i]);
            });
            break;
-        case Math::Ceil:
+        case MKLDNNPlugin::MathCeiling:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = ceilf(src_data[i]);
            });
            break;
-        case Math::Cos:
+        case MKLDNNPlugin::MathCos:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = cosf(src_data[i]);
            });
            break;
-        case Math::Cosh:
+        case MKLDNNPlugin::MathCosh:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = coshf(src_data[i]);
            });
            break;
-        case Math::Floor:
+        case MKLDNNPlugin::MathFloor:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = floorf(src_data[i]);
            });
            break;
-        case Math::HardSigmoid:
+        case MKLDNNPlugin::MathHardSigmoid:
            alpha = (alpha == 0.0f) ? 0.2f : alpha;
            beta = (beta == 0.0f) ? 0.5f : beta;
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = (std::max)(0.f, (std::min)(1.f, alpha * src_data[i] + beta));
            });
            break;
-        case Math::Log:
+        case MKLDNNPlugin::MathLog:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = logf(src_data[i]);
            });
            break;
-        case Math::Neg:
+        case MKLDNNPlugin::MathNegative:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = -src_data[i];
            });
            break;
-        case Math::Reciprocal:
+        case MKLDNNPlugin::MathReciprocal:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = 1.0f / src_data[i];
            });
            break;
-        case Math::Selu:
+        case MKLDNNPlugin::MathSelu:
            alpha = (alpha == 0.0f) ? 1.67326f : alpha;
            gamma = (gamma == 0.0f) ? 1.0507f : gamma;
            parallel_for(dataSize, [&](size_t i) {
@ -190,7 +165,7 @@ public:
                dst_data[i] = (x > 0.0f) ? (gamma * x) : (gamma * alpha * (exp(x) - 1.0f));
            });
            break;
-        case Math::Sign:
+        case MKLDNNPlugin::MathSign:
            parallel_for(dataSize, [&](size_t i) {
                if (src_data[i] > 0.0f)
                    dst_data[i] = 1.0f;
@ -200,28 +175,28 @@ public:
                    dst_data[i] = 0.0f;
            });
            break;
-        case Math::Sin:
+        case MKLDNNPlugin::MathSin:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = sinf(src_data[i]);
            });
            break;
-        case Math::Sinh:
+        case MKLDNNPlugin::MathSinh:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = sinhf(src_data[i]);
            });
            break;
-        case Math::SoftPlus:
+        case MKLDNNPlugin::MathSoftPlus:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = logf(expf(src_data[i]) + 1);
            });
            break;
-        case Math::Softsign:
+        case MKLDNNPlugin::MathSoftsign:
            parallel_for(dataSize, [&](size_t i) {
                float x = src_data[i];
                dst_data[i] = x / (1.f + (std::abs)(x));
            });
            break;
-        case Math::Tan:
+        case MKLDNNPlugin::MathTan:
            parallel_for(dataSize, [&](size_t i) {
                dst_data[i] = tanf(src_data[i]);
            });
@ -237,38 +212,80 @@ public:
    }

 private:
-    enum class Math {
-        Abs,
-        Acos,
-        Acosh,
-        Asin,
-        Asinh,
-        Atan,
-        Atanh,
-        Ceil,
-        Cos,
-        Cosh,
-        Erf,
-        Floor,
-        HardSigmoid,
-        Log,
-        Neg,
-        Reciprocal,
-        Selu,
-        Sign,
-        Sin,
-        Sinh,
-        SoftPlus,
-        Softsign,
-        Tan
-    };
+    static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MathImpl& node)>> initializers;

-    Math mathFunction = Math::Erf;
    float alpha = 0.0f;
    float beta = 0.0f;
    float gamma = 0.0f;
 };

+std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MathImpl& node)>> MathImpl::initializers = {
+    {ngraph::op::v0::Abs::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAbs;
+    }},
+    {ngraph::op::v0::Acos::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAcos;
+    }},
+    {ngraph::op::v3::Acosh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAcosh;
+    }},
+    {ngraph::op::v0::Asin::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAsin;
+    }},
+    {ngraph::op::v3::Asinh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAsinh;
+    }},
+    {ngraph::op::v0::Atan::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAtan;
+    }},
+    {ngraph::op::v0::Ceiling::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathCeiling;
+    }},
+    {ngraph::op::v0::Cos::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathCos;
+    }},
+    {ngraph::op::v0::Cosh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathCosh;
+    }},
+    {ngraph::op::v0::Floor::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathFloor;
+    }},
+    {ngraph::op::v0::HardSigmoid::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathHardSigmoid;
+        node.alpha = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
+        node.beta = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
+    }},
+    {ngraph::op::v0::Log::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathLog;
+    }},
+    {ngraph::op::v0::Negative::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathNegative;
+    }},
+    {ngraph::op::v0::Selu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSelu;
+        node.alpha = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
+        node.gamma = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
+    }},
+    {ngraph::op::v0::Sign::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSign;
+    }},
+    {ngraph::op::v0::Sin::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSin;
+    }},
+    {ngraph::op::v0::Sinh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSinh;
+    }},
+    {ngraph::op::v4::SoftPlus::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSoftPlus;
+    }},
+    {ngraph::op::v0::Tan::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathTan;
+    }},
+    {ngraph::op::v3::Atanh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAtanh;
+    }}
+};
+
 REG_FACTORY_FOR(MathImpl, Abs);
 REG_FACTORY_FOR(MathImpl, Acos);
 REG_FACTORY_FOR(MathImpl, Acosh);
@ -280,7 +297,6 @@ REG_FACTORY_FOR(MathImpl, Ceil);
 REG_FACTORY_FOR(MathImpl, Ceiling);
 REG_FACTORY_FOR(MathImpl, Cos);
 REG_FACTORY_FOR(MathImpl, Cosh);
-REG_FACTORY_FOR(MathImpl, Erf);
 REG_FACTORY_FOR(MathImpl, Floor);
 REG_FACTORY_FOR(MathImpl, HardSigmoid);
 REG_FACTORY_FOR(MathImpl, Log);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
@ -0,0 +1,237 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include "mkldnn_batch_to_space_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset2.hpp>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto batchToSpace = std::dynamic_pointer_cast<const ngraph::opset2::BatchToSpace>(op);
+        if (!batchToSpace) {
+            errorMessage = "Only opset2 BatchToSpace operation is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(3)) == nullptr) {
+            errorMessage = "Only constant 'block_shape', 'crops_begin', 'crops_end' are supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNBatchToSpaceNode::MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "BatchToSpace layer with name '" + op->get_friendly_name() + "'";
+
+    if (op->get_input_size() != 4 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input or output edges!";
+
+    inDims = op->get_input_shape(0);
+    outDims = op->get_output_shape(0);
+    if (inDims.size() < 4 || inDims.size() > 5)
+        IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << inDims.size();
+    if (inDims.size() != outDims.size())
+        IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions";
+
+    blockShapeIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<size_t>();
+    cropsBeginIn  = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<size_t>();
+}
+
+void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    const auto precision = getOriginalInputPrecisionAtPort(0);
+    const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
+    if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
+        IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp}},
+                         {{TensorDescCreatorTypes::nspc, precision}},
+                         impl_desc_type::ref_any);
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp}},
+                         {{TensorDescCreatorTypes::ncsp, precision}},
+                         impl_desc_type::ref_any);
+    if (inDims[1] % 8 == 0) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp}},
+                             {{TensorDescCreatorTypes::nCsp8c, precision}},
+                             impl_desc_type::ref_any);
+    }
+    if (inDims[1] % 16 == 0) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp}},
+                             {{TensorDescCreatorTypes::nCsp16c, precision}},
+                             impl_desc_type::ref_any);
+    }
+}
+
+static std::vector<size_t> getShape5D(const SizeVector &shape) {
+    std::vector<size_t> shape5D(5, 1);
+    for (int i = 0; i < 2; i++) {
+        shape5D[i] = shape[i];
+        shape5D[4 - i] = shape[shape.size() - 1 - i];
+    }
+    shape5D[2] = shape.size() == 5 ? shape[2] : shape5D[2];
+    return shape5D;
+}
+
+template<typename T>
+void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
+    const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    const auto layout = getParentEdgeAt(0)->getDesc().getLayout();
+    const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
+    const auto dimsSize = inDims.size();
+
+    auto inShape5D = getShape5D(inDims);
+    auto outShape5D = getShape5D(outDims);
+    auto blockShape = getShape5D(blockShapeIn);
+
+    if (layout == NHWC || layout == NDHWC) {
+        inShape5D.push_back(inShape5D[1]);
+        inShape5D.erase(inShape5D.begin() + 1);
+        outShape5D.push_back(outShape5D[1]);
+        outShape5D.erase(outShape5D.begin() + 1);
+        blockShape.push_back(blockShape[1]);
+        blockShape.erase(blockShape.begin() + 1);
+    }
+
+    const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu;
+    const size_t blockCountInput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const size_t blockCountOutput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const auto blockRemainder = inShape5D[1] % blockSize;
+    const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
+
+    const size_t inSpatialStep = inShape5D[2] * inShape5D[3] * inShape5D[4];
+    const size_t inBatchStep = (blocked ? blockSize * blockCountInput : inShape5D[1]) * inSpatialStep;
+
+    const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
+    const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
+
+    size_t channels = (inShape5D[1] / blockSize);
+    channels = channels == 0 ? 1 : channels;
+    const size_t workAmount = inShape5D[0] * channels;
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(workAmount, nthr, ithr, start, end);
+        std::vector<size_t> indxStart(2, 0);
+        std::vector<size_t> indxEnd(2, 0);
+        parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
+        parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
+        std::vector<int64_t> oAdd(5, 1);
+        std::vector<size_t> begin(5, 0);
+        std::vector<size_t> finish(5, 1);
+        for (size_t i0 = indxStart[0]; i0 < indxEnd[0] + 1; ++i0) {
+            int64_t bIdx = i0 / outShape5D[0];
+            const size_t srcIdx0 = i0 * inBatchStep;
+            const size_t dstIdx0 = (i0 - (bIdx * outShape5D[0])) * outBatchStep;
+            oAdd[4] = bIdx % blockShapeIn[dimsSize - 1] - cropsBeginIn[dimsSize - 1];
+            bIdx /= blockShapeIn[dimsSize - 1];
+            oAdd[3] = bIdx % blockShapeIn[dimsSize - 2] - cropsBeginIn[dimsSize - 2];
+            bIdx /= blockShapeIn[dimsSize - 2];
+            oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu;
+            bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
+            oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1];
+            if (layout == NHWC || layout == NDHWC) {
+                oAdd.push_back(oAdd[1]);
+                oAdd.erase(oAdd.begin() + 1);
+            }
+            begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
+            finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
+            begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
+            finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
+            begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
+            finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
+            begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
+            finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
+            const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
+            const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
+            indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
+            const size_t lastI1 = i0 == indxEnd[0] ? (indxEnd[1] > finish[1] ? finish[1] : indxEnd[1]) : finish[1];
+            for (; indxStart[1] < lastI1 + 1; ++indxStart[1]) {
+                const size_t block = indxStart[1] == finish[1] ? lastBlock : blockSize;
+                const int64_t tmpOC = indxStart[1] * blockShape[1] + addTmpOC;
+                const size_t srcIdx1 = srcIdx0 + indxStart[1] * inSpatialStep * blockSize;
+                const size_t dstIdx1 = dstIdx0 + tmpOC * outSpatialStep * blockSize;
+                const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
+                for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
+                    const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
+                    const size_t srcIdx2 = srcIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
+                    const size_t dstIdx2 = dstIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
+                    for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
+                        const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
+                        const size_t srcIdx3 = srcIdx2 + i3 * inShape5D[4] * blockSize;
+                        const size_t dstIdx3 = dstIdx2 + tmpOh * outShape5D[4] * blockSize;
+                        for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
+                            const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
+                            const size_t srcIdx4 = srcIdx3 + i4 * blockSize;
+                            const size_t dstIdx4 = dstIdx3 + tmpOw * blockSize;
+                            for (size_t it = 0; it < itEnd + 1; ++it) {
+                                const size_t i5Begin = it == 0 ? 0 : (it * blockSize - 1 - oAdd[1]) / blockShape[1] + 1;
+                                const size_t i5End = it == itEnd ? (block - 1) : ((it + 1) * blockSize - 1 - oAdd[1]) / blockShape[1];
+                                for (size_t i5 = i5Begin; i5 < i5End + 1; ++i5) {
+                                    const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
+                                    const size_t srcIdx5 = srcIdx4 + i5;
+                                    const size_t dstIdx5 =
+                                            dstIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
+                                    dstData[dstIdx5] = srcData[srcIdx5];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            indxStart[1] = 0lu;
+        }
+    });
+}
+
+void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
+    switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) {
+        case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>();  break;
+        case 2: batchToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(); break;
+        case 4: batchToSpaceKernel<PrecisionTrait<Precision::I32>::value_type>(); break;
+        default:
+            IE_THROW() << "BatchToSpace layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'";
+    }
+}
+
+bool MKLDNNBatchToSpaceNode::created() const {
+    return getType() == BatchToSpace;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNBatchToSpaceNode, BatchToSpace)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
@ -0,0 +1,40 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNBatchToSpaceNode : public MKLDNNNode {
+public:
+    MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNBatchToSpaceNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    InferenceEngine::SizeVector inDims;
+    InferenceEngine::SizeVector outDims;
+    std::vector<size_t> blockShapeIn;
+    std::vector<size_t> cropsBeginIn;
+
+    std::string errorPrefix;
+
+    template<typename T>
+    void batchToSpaceKernel();
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
@ -1,281 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mkldnn_batchnorm_node.h"
-#include <mkldnn_extension_utils.h>
-#include "common/cpu_memcpy.h"
-
-using namespace mkldnn;
-using namespace MKLDNNPlugin;
-using namespace InferenceEngine;
-
-MKLDNNBatchNormalizationNode::MKLDNNBatchNormalizationNode(const InferenceEngine::CNNLayerPtr& layer,
-                                                           const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return GetVarianceDesc(primitive_desc_it);
-    });
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return GetMeanDesc(primitive_desc_it);
-    });
-
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        if (!fusedWithScale())
-            return MKLDNNMemoryDesc();
-        return GetScaleShiftWeightsDesc(primitive_desc_it);
-    });
-}
-
-void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
-    auto * bnLayer = dynamic_cast<BatchNormalizationLayer*>(getCnnLayer().get());
-    if (bnLayer == nullptr)
-        IE_THROW() << "Cannot convert batch normalization layer.";
-    if (bnLayer->_weights == nullptr || bnLayer->_biases == nullptr) {
-        IE_THROW() << "Weights/biases are empty for layer: " << bnLayer->name
-                           << " used in MKLDNN node: " << getName() << "\n"
-                           << "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
-                           << " to load them from .bin part of the IR";
-    }
-
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (!getChildEdges().size())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
-    eps = bnLayer->epsilon;
-
-    size_t variancesSize = MKLDNNDims(bnLayer->_weights->getTensorDesc().getDims()).size();
-    size_t meansSize = MKLDNNDims(bnLayer->_biases->getTensorDesc().getDims()).size();
-
-    if (variancesSize != meansSize && variancesSize != 1)
-        IE_THROW() << "Incorrect weights and biases sizes!";
-
-    internalBlobs.push_back(createInternalBlob(bnLayer->_weights->getTensorDesc().getDims(), true));
-    internalBlobs.push_back(createInternalBlob(bnLayer->_biases->getTensorDesc().getDims(), false));
-
-    auto parentOutDims = getParentEdgeAt(0)->getDims();
-
-    if (fusedWith.size() > 1)
-        IE_THROW() << "BatchNorm fusion is possible with only one layer!";
-
-    for (const auto &node : fusedWith) {
-        auto * scshLayer = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
-        if (scshLayer == nullptr)
-            IE_THROW() << "Cannot cast to the ScaleShift layer to fuse with BatchNorm.";
-
-        size_t C = static_cast<size_t>(getChildEdgeAt(0)->getDims()[1]);
-        SizeVector mkldnn_weights = {2, C};
-        TensorDesc desc(scshLayer->_weights->getTensorDesc().getPrecision(), mkldnn_weights, InferenceEngine::NC);
-        InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
-        internalBlob->allocate();
-        float * data = internalBlob->buffer();
-        if (data == nullptr)
-            IE_THROW() << "Cannot get memory!";
-
-        InferenceEngine::Blob::Ptr blb = scshLayer->_weights;
-        if (blb == nullptr)
-            IE_THROW() << "Cannot get weights blob for node " << getName() << ".";
-
-        size_t weightsByteSize = blb->byteSize();
-        cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
-        data += blb->size();
-        blb = scshLayer->_biases;
-
-        if (blb == nullptr) {
-            memset(data, 0, weightsByteSize);
-        } else {
-            if (weightsByteSize != blb->byteSize())
-                IE_THROW() << "ScaleShift has incorrect weights!";
-            cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
-        }
-        internalBlobs.push_back(internalBlob);
-    }
-
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32)
-        precision = InferenceEngine::Precision::FP32;
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-
-    for (auto format : getAvailableFormatsForDims(parentOutDims)) {
-        MKLDNNMemoryDesc in_candidate(parentOutDims, inputDataType, format);
-        createDescriptor({in_candidate}, {});
-    }
-}
-
-static MKLDNNMemoryDesc get_bn_mdesc_by_index(const mkldnn::primitive_desc_iterator &primitive_desc, int idx) {
-    mkldnn_batch_normalization_desc_t *p;
-    error::wrap_c_api(mkldnn_primitive_desc_query(
-            primitive_desc.get(), mkldnn::convert_to_c(mkldnn::query::batch_normalization_d), 0, &p),
-                      "could not get a batch-normalization descriptor");
-    auto bndesc =
-            (p->flags & mkldnn::convert_to_c(mkldnn::normalization_flags::use_global_stats)) ?
-            primitive_desc.src_desc(idx) : primitive_desc.dst_desc(idx);
-
-    return MKLDNNMemoryDesc {bndesc};
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const mkldnn::primitive_desc &primitive_desc) const {
-    // TODO: rewrite with using stat_desc
-    return get_bn_mdesc_by_index(primitive_desc, 2);
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const mkldnn::primitive_desc &primitive_desc) const {
-    return get_bn_mdesc_by_index(primitive_desc, 1);
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const mkldnn::primitive_desc &primitive_desc) const {
-    return MKLDNNMemoryDesc(primitive_desc.weights_desc(0));
-}
-
-bool MKLDNNBatchNormalizationNode::created() const {
-    return getType() == BatchNormalization;
-}
-
-void MKLDNNBatchNormalizationNode::createPrimitive() {
-    if (prim)
-        return;
-
-    auto prim_desc = createPrimitiveDescriptor<batch_normalization_forward::primitive_desc,
-            batch_normalization_forward::desc>();
-    prim.reset(new batch_normalization_forward(prim_desc));
-
-    auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-
-    const auto &mean = internalBlobMemory[1]->GetPrimitive();
-    const auto &var = internalBlobMemory[0]->GetPrimitive();
-
-    if (convert_to_c(flag) & dnnl_use_scaleshift) {
-        const auto &sclshft = internalBlobMemory[2]->GetPrimitive();
-        primArgs = {{DNNL_ARG_SRC, src},
-                    {DNNL_ARG_MEAN, mean},
-                    {DNNL_ARG_VARIANCE, var},
-                    {DNNL_ARG_SCALE_SHIFT, sclshft},
-                    {DNNL_ARG_DST, dst}};
-    } else {
-        primArgs = {{DNNL_ARG_SRC, src},
-                    {DNNL_ARG_MEAN, mean},
-                    {DNNL_ARG_VARIANCE, var},
-                    {DNNL_ARG_DST, dst}};
-    }
-}
-
-void MKLDNNBatchNormalizationNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                                    const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    MKLDNNMemoryDesc inDesc(inputDesc[0]);
-    if (inDesc.getDims().ndims() == 2) {
-        // Make it 4D
-        MKLDNNDims dims = inDesc.getDims();
-        dims.push_back(1);  // H
-        dims.push_back(1);  // W
-        auto format = memory::format_tag::nchw;
-        inDesc = MKLDNNMemoryDesc(dims, inDesc.getDataType(), format);
-    }
-
-    flag = normalization_flags::use_global_stats;
-    if (fusedWithScale())
-        flag |= normalization_flags::use_scale_shift;
-
-    MKLDNNDescriptor desc(std::shared_ptr<batch_normalization_forward::desc>(
-            new mkldnn::batch_normalization_forward::desc(prop_kind::forward_scoring, inDesc, eps,
-                                                  flag)));
-    descs.push_back(desc);
-}
-
-void MKLDNNBatchNormalizationNode::initOptimalPrimitiveDescriptor() {
-    auto selected_pd = getSelectedPrimitiveDescriptor();
-    if (selected_pd == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
-    auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
-        return;
-
-    if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || (!isUninitTensorDesc(config.inConfs[0].desc) &&
-            !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
-        IE_THROW() << "Layer " << getName() << " has incorrect selected config!";
-
-    if (!isUninitTensorDesc(config.inConfs[0].desc)) {
-        config.outConfs[0].desc = config.inConfs[0].desc;
-    } else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
-        config.inConfs[0].desc = config.outConfs[0].desc;
-    } else {
-        config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
-    }
-
-    initDescriptor(config);
-}
-
-void MKLDNNBatchNormalizationNode::initSupportedPrimitiveDescriptors() {
-    if (!supportedPrimitiveDescriptors.empty())
-        return;
-
-    // BN primitive doesn't support strides
-    for (auto& desc : descs) {
-        primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine());
-        while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig config;
-            config.dynBatchSupport = true;
-            for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                InferenceEngine::DataConfig dataConfig;
-                dataConfig.inPlace = -1;
-                dataConfig.constant = false;
-                dataConfig.desc = getSrcMemDesc(itpd, i);
-                config.inConfs.push_back(dataConfig);
-            }
-
-            for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                InferenceEngine::DataConfig dataConfig;
-                dataConfig.inPlace = canBeInPlace() ? 0 : -1;
-                dataConfig.constant = false;
-                dataConfig.desc = getDstMemDesc(itpd, i);
-                config.outConfs.push_back(dataConfig);
-            }
-            impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
-
-            supportedPrimitiveDescriptors.emplace_back(config, impl_type);
-            if (!itpd.next_impl())
-                break;
-        }
-    }
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it,
-                                                             size_t idx) {
-    TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
-
-    if (getParentEdgeAt(0)->getDims().ndims() == 2 && desc.getLayout() == InferenceEngine::Layout::NCHW) {
-        desc.reshape(getParentEdgeAt(idx)->getDims().ToSizeVector(), InferenceEngine::Layout::NC);
-        return MKLDNNMemoryDesc(desc);
-    }
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it,
-                                                             size_t idx) {
-    TensorDesc desc =  MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx));
-
-    if (getParentEdgeAt(0)->getDims().ndims() == 2 && desc.getLayout() == InferenceEngine::Layout::NCHW) {
-        desc.reshape(getParentEdgeAt(idx)->getDims().ToSizeVector(), InferenceEngine::Layout::NC);
-        return MKLDNNMemoryDesc(desc);
-    }
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
-}
-
-REG_MKLDNN_PRIM_FOR(MKLDNNBatchNormalizationNode, BatchNormalization);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
@ -1,44 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_common.h>
-#include <mkldnn_node.h>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace MKLDNNPlugin {
-
-class MKLDNNBatchNormalizationNode : public MKLDNNNode {
-public:
-    MKLDNNBatchNormalizationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-            MKLDNNWeightsSharing::Ptr &cache);
-
-    ~MKLDNNBatchNormalizationNode() override = default;
-    void initSupportedPrimitiveDescriptors() override;
-    void initOptimalPrimitiveDescriptor() override;
-    void getSupportedDescriptors() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
-    void createPrimitive() override;
-    bool created() const override;
-
-    bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise
-                                        && fusedWith[0]->getCnnLayer()->type == "ScaleShift";}
-
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-    MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-
-private:
-    mkldnn::normalization_flags flag = mkldnn::normalization_flags::none;
-    float eps = 0.0f;
-    MKLDNNMemoryDesc GetVarianceDesc(const mkldnn::primitive_desc& primitive_desc) const;
-    MKLDNNMemoryDesc GetMeanDesc(const mkldnn::primitive_desc& primitive_desc) const;
-    MKLDNNMemoryDesc GetScaleShiftWeightsDesc(const mkldnn::primitive_desc& primitive_desc) const;
-};
-
-}  // namespace MKLDNNPlugin
-
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@ -6,20 +6,19 @@
 #include "mkldnn_reorder_node.h"
 #include "mkldnn_input_node.h"
 #include "mkldnn_eltwise_node.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_conv_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include "ie_parallel.hpp"
 #include "cpu/x64/jit_generator.hpp"
 #include "cpu/x64/jit_uni_eltwise_injector.hpp"
 #include "cpu/x64/jit_uni_depthwise_injector.hpp"
 #include "cpu/x64/cpu_isa_traits.hpp"
 #include "utils/general_utils.h"
+#include <ngraph/opsets/opset1.hpp>

 // WA for xbyak.h
 #ifdef _WIN32
@ -873,17 +872,52 @@ private:
    }
 };

-MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer,
+bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto binConv = std::dynamic_pointer_cast<const ngraph::opset1::BinaryConvolution>(op);
+        if (!binConv) {
+            errorMessage = "Only opset1 BinaryConvolution operation is supported";
+            return false;
+        }
+        if (binConv->get_mode() != ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT) {
+            errorMessage = "Doesn't support mode: " + ngraph::as_string(binConv->get_mode());
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const std::shared_ptr<ngraph::Node>& op,
                                                         const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {
-    if (mayiuse(x64::avx512_common)) {
-        implType = impl_desc_type::jit_avx512;
-    } else if (mayiuse(x64::avx2)) {
-        implType = impl_desc_type::jit_avx2;
-    } else if (mayiuse(x64::sse41)) {
-        implType = impl_desc_type::jit_sse42;
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "BinaryConvolution node with name '" + getName() + "' ";
+        const auto binConv = std::dynamic_pointer_cast<const ngraph::opset1::BinaryConvolution>(op);
+
+        pad_value = binConv->get_pad_value();
+        for (int i = 0; i < binConv->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(binConv->get_strides()[i]));
+        }
+        for (int i = 0; i < binConv->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(binConv->get_dilations()[i]) - 1);
+        }
+        paddingL = binConv->get_pads_begin();
+        paddingR = binConv->get_pads_end();
+
+        if (mayiuse(x64::avx512_common)) {
+            implType = impl_desc_type::jit_avx512;
+        } else if (mayiuse(x64::avx2)) {
+            implType = impl_desc_type::jit_avx2;
+        } else if (mayiuse(x64::sse41)) {
+            implType = impl_desc_type::jit_sse42;
+        } else {
+            implType = impl_desc_type::ref;
+        }
    } else {
-        implType = impl_desc_type::ref;
+        IE_THROW(NotImplemented) << errorMessage;
    }
 }

@ -891,28 +925,17 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
    if (!descs.empty())
        return;

-    auto* binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(getCnnLayer().get());
-    if (binConvLayer == nullptr)
-        IE_THROW() << "Cannot convert convolution layer.";
-
-    std::string errorPrefix = "BinaryConvolution layer with name '" + getName() + "' ";
-
-    withBinarization = isFusedWith(Quantize);
+    withBinarization = isFusedWith(FakeQuantize);
    withSum = false;
    int expectedInputEdgesNum = 2;
    for (int i = 0; i < fusedWith.size(); i++) {
        auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-        if (eltwiseNode && eltwiseNode->isSum()) {
+        if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
            withSum = true;
            expectedInputEdgesNum++;
        }
    }

-    group = binConvLayer->_group;
-    if (group != 1) {
-        IE_THROW() << errorPrefix << "doesn't support parameter group != 1";
-    }
-
    if (getParentEdges().size() != expectedInputEdgesNum)
        IE_THROW() << errorPrefix << "has incorrect number of input edges";

@ -930,21 +953,6 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
    if (getChildEdgeAt(0)->getDims().ndims() != 4) {
        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
    }
-
-    if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
-        IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
-    }
-
-    pad_value = binConvLayer->_pad_value;
-
-    invertVectorCopyUtoI(binConvLayer->_stride, stride);
-    for (int i = 1; i <= binConvLayer->_dilation.size(); i++) {
-        dilation.push_back(static_cast<int>(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1);
-    }
-
-    auto allPads = getPaddings(*binConvLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
-    invertVectorCopyUtoI(allPads.end, paddingR);
 }

 void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
@ -1077,48 +1085,18 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() {
 }

 bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
-    auto isOneOf = [](EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
-        for (auto a : algs) {
-            if (alg == a) {
-                return true;
-            }
-        }
-        return false;
-    };
-
    if (implType == impl_desc_type::ref)
        return false;

    // Binarization have to be last operation in fusing chain
-    if (isFusedWith(Quantize))
+    if (isFusedWith(FakeQuantize))
        return false;

-    if (node->getType() == Quantize) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize node " << node->getName();
-        return quantizeNode->isBinarization();
-    } else if (node->getType() == Eltwise) {
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot get eltwise node " << node->getName();
-
-        // Only one Add operation can be fused since it is implemented via output blob reuse
-        if (eltwiseNode->isSum()) {
-            for (auto& fusedNode : fusedWith) {
-                auto* fusedEltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(fusedNode.get());
-                if (fusedEltwiseNode->isSum()) {
-                    return false;
-                }
-            }
-        }
-
-        return eltwiseNode->isSum() ||
-               isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, SoftRelu,
-                                                  Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt});
+    if (node->getType() == FakeQuantize) {
+        return node->getAlgorithm() == FQBinarization;
+    } else {
+        return canFuseSimpleOperation(node);
    }
-
-    return false;
 }

 void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
@ -1127,16 +1105,16 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
    for (auto &node : fusedWith) {
        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            if (eltwiseNode->isSum())
+            if (eltwiseNode->isSpecialConvolutionAddFusing())
                ops.append_sum(1.0);
            else
                eltwiseNode->appendPostOps(ops);
            continue;
        }

-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
            continue;
        }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
@ -74,7 +74,7 @@ struct jit_uni_bin_conv_kernel {

 class MKLDNNBinaryConvolutionNode : public MKLDNNNode {
 public:
-    MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNBinaryConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
    ~MKLDNNBinaryConvolutionNode() override = default;

    void getSupportedDescriptors() override;
@ -86,7 +86,11 @@ public:
        return false;
    }
    void setPostOps(mkldnn::primitive_attr &attr);
-    bool canFuse(const MKLDNNNodePtr& node) const;
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+    impl_desc_type getImplType() { return implType; }

 private:
    bool withSum = false;
@ -112,6 +116,8 @@ private:
                          const std::vector<size_t>& s_str, const std::vector<size_t>& w_str, const std::vector<size_t>& d_str);
    void executeReference(const uint8_t* src, const uint8_t* weights, uint8_t* dst,
                          const std::vector<size_t>& s_str, const std::vector<size_t>& w_str, const std::vector<size_t>& d_str);
+
+    std::string errorPrefix;
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
@ -0,0 +1,133 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include "mkldnn_broadcast_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset1.hpp>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto broadcast = std::dynamic_pointer_cast<const ngraph::opset1::Broadcast>(op);
+        if (!broadcast) {
+            errorMessage = "Only opset1 Broadcast operation is supported";
+            return false;
+        }
+        if (broadcast->get_broadcast_spec() != ngraph::op::AutoBroadcastSpec::NUMPY) {
+            errorMessage = "Only NUMPY broadcast type is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(broadcast->get_input_node_shared_ptr(BROADCAST_SHAPE)) == nullptr) {
+            errorMessage = "Only const 'shape' input is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "Broadcast node with name '" + op->get_friendly_name() + "'";
+    if (op->get_input_size() != 2 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+    SizeVector shape_dims = op->get_input_shape(BROADCAST_SHAPE);
+    if (shape_dims.size() > 1)
+        IE_THROW() << errorPrefix << " has incorrect 'shape' input rank: " << shape_dims.size();
+}
+
+void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    Precision prec = getOriginalInputPrecisionAtPort(BROADCAST_INPUT);
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, prec},
+                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                         {{TensorDescCreatorTypes::ncsp, prec}},
+                         impl_desc_type::ref_any);
+}
+
+void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
+    size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getDesc().getDims())[0];
+    SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims();
+    SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getDims();
+    SizeVector srcStrides = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getBlockingDesc().getStrides();
+    size_t data_size = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getPrecision().size();
+
+    if (!src_dims.size())
+        src_dims = SizeVector(1, 1);
+    if (!srcStrides.size())
+        srcStrides = SizeVector(1, 1);
+
+    if (dst_dims.size() != shape_size) {
+        IE_THROW() << "Output tensor dimension mismatch";
+    }
+
+    if (src_dims.size() > dst_dims.size()) {
+        IE_THROW() << "Output tensor dimension is smaller then input tensor dimension";
+    }
+
+    InferenceEngine::SizeVector dstStrides = getChildEdgeAt(0)->getDesc().getBlockingDesc().getStrides();
+    InferenceEngine::SizeVector src_aligned(dst_dims.size());
+    InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
+    size_t prefix_size = dst_dims.size() - src_dims.size();
+    for (size_t i = 0; i < dst_dims.size(); i++) {
+        if (i < prefix_size) {
+            src_aligned[i] = 1;
+            srcStrides_aligned[i] = srcStrides[0];
+        } else {
+            src_aligned[i] = src_dims[i - prefix_size];
+            srcStrides_aligned[i] = srcStrides[i - prefix_size];
+        }
+    }
+
+    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+    const auto *src_data = reinterpret_cast<const uint8_t *>(getParentEdgeAt(BROADCAST_INPUT)->getMemoryPtr()->GetPtr());
+    auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t i, src_idx, start = 0, end = 0;
+        SizeVector counters(dst_dims.size(), 0);
+        splitter(work_amount_dst, nthr, ithr, start, end);
+        for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+            counters[j] = i % dst_dims[j];
+            i /= dst_dims[j];
+        }
+        for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
+            for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+                src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+
+            cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
+
+            for (int j = dst_dims.size() - 1; j >= 0; j--) {
+                counters[j] = (counters[j] + 1) % dst_dims[j];
+                if (counters[j] != 0) break;
+            }
+        }
+    });
+}
+
+bool MKLDNNBroadcastNode::created() const {
+    return getType() == Broadcast;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNBroadcastNode, Broadcast)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
@ -0,0 +1,35 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNBroadcastNode : public MKLDNNNode {
+public:
+    MKLDNNBroadcastNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNBroadcastNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    static const size_t BROADCAST_INPUT = 0;
+    static const size_t BROADCAST_SHAPE = 1;
+
+    std::string errorPrefix;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@ -9,7 +9,6 @@
 #include <vector>
 #include <mkldnn_extension_utils.h>

-#include <legacy/ie_layers.h>
 #include "mkldnn.hpp"
 #include "mkldnn/iml_type_mapper.h"
 #include "mkldnn_dims.h"
@ -17,7 +16,7 @@
 #include "mkldnn_memory.h"
 #include "ie_parallel.hpp"
 #include "mkldnn_conv_node.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_pooling_node.h"
 #include "mkldnn_eltwise_node.h"
 #include <limits>
@ -27,21 +26,37 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

-MKLDNNConcatNode::MKLDNNConcatNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+
+bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
+        if (!concatOp) {
+            errorMessage = "Node is not an instance of the Concat operation.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    auto concatOp = ngraph::as_type_ptr<ngraph::op::v0::Concat>(op);
+    auto axis = concatOp->get_axis();
+    if (axis < 0) {
+        this->axis = concatOp->get_input_shape(0).size() + axis;
+    } else {
+        this->axis = axis;
+    }
+}

 void MKLDNNConcatNode::getSupportedDescriptors() {
-    auto * conLayer = dynamic_cast<ConcatLayer*>(getCnnLayer().get());
-
-    if (conLayer == nullptr)
-        IE_THROW() << "Cannot convert concat layer.";
-
-    axis = conLayer->_axis;
-
-    if (getParentEdges().empty())
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
    auto& firstParentDims = getParentEdgeAt(0)->getDims();
    for (size_t i = 1; i < getParentEdges().size(); i++) {
        auto& dims = getParentEdgeAt(i)->getDims();
@ -64,10 +79,11 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;

-    inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+    auto& originInputPrecisions = getOriginalInputPrecisions();
+    inputPrecision = originInputPrecisions[0];
    bool isMixedPrecision = false;
-    for (int i = 1; i < getCnnLayer()->insData.size(); i++) {
-        if (getCnnLayer()->insData[0].lock()->getPrecision() != getCnnLayer()->insData[i].lock()->getPrecision()) {
+    for (int i = 1; i < getOriginalInputsNumber(); i++) {
+        if (originInputPrecisions[0] != originInputPrecisions[i]) {
            isMixedPrecision = true;
            break;
        }
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
@ -13,9 +13,10 @@ namespace MKLDNNPlugin {

 class MKLDNNConcatNode : public MKLDNNNode {
 public:
-    MKLDNNConcatNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
    ~MKLDNNConcatNode() override = default;

+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
    void initOptimalPrimitiveDescriptor() override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@ -6,97 +6,123 @@
 #include "mkldnn_reorder_node.h"
 #include "mkldnn_input_node.h"
 #include "mkldnn_eltwise_node.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_pooling_node.h"
 #include "mkldnn_concat_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include <utils/general_utils.h>
+#include <ngraph/ops.hpp>

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

-MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache), withBiases(false), withSum(false), withDWConv(false), isDW(false), isMerged(false),
+bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) {
+            errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported";
+            return false;
+        }
+        size_t ndims = op->get_input_shape(0).size();
+        if ((ndims < 4) || (ndims > 5)) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(ndims);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
+MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false),
          isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef),
-          groupNum(1lu), baseInputsNumber(1), eltwisePrecision(Precision::FP32) {
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
-    });
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        if (!withBiases)
-            return MKLDNNMemoryDesc();
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1));
-    });
-
-    auto ws = layer->blobs.find("w-scale");
-    if (ws != layer->blobs.end()) {
-        wScale = ws->second;
+          groupNum(1lu), eltwisePrecision(Precision::FP32) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
    }

-    // Trying to find oi-scale
-    if (getCnnLayer()->type == "Convolution" && getCnnLayer()->precision == Precision::I8) {
-        auto ois = layer->blobs.find("oi-scale");
-        if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
-            && ois == layer->blobs.end()) {
-            IE_THROW() << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution "
-                << getCnnLayer()->name;
-        }
-        if (ois != layer->blobs.end()) {
-            // If we can find an oi-scale, then the next layer has to be an INT8.
-            oScale = ois->second;
-        }
-    }
+    isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0;

-    if (getCnnLayer()->type == "Convolution") {
-        baseInputsNumber = getCnnLayer().get()->insData.size();
+    auto convolutionOp = ngraph::as_type_ptr<ngraph::op::v1::Convolution>(op);
+    auto groupConvolutionOp = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolution>(op);
+
+    if (convolutionOp) {
+        algorithm = ConvolutionCommon;
+
+        groupNum = 1;
+        isGrouped = false;
+
+        weightDims = convolutionOp->input_value(1).get_shape();
+
+        IC = weightDims[1];
+        groupIC = IC;
+        groupOC = weightDims[0];
+
+        biasesDims = { groupOC };
+
+        for (int i = 0; i < convolutionOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(convolutionOp->get_strides()[i]));
+        }
+        for (int i = 0; i < convolutionOp->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(convolutionOp->get_dilations()[i]) - 1);
+        }
+        paddingL = convolutionOp->get_pads_begin();
+        paddingR = convolutionOp->get_pads_end();
+    } else if (groupConvolutionOp) {
+        algorithm = ConvolutionGrouped;
+
+        groupNum = groupConvolutionOp->input_value(1).get_shape()[0];
+        isGrouped = true;
+
+        weightDims = groupConvolutionOp->input_value(1).get_shape();
+
+        groupIC = weightDims[2];
+        IC = groupIC * groupNum;
+        groupOC = weightDims[1];
+
+        biasesDims = {groupOC * groupNum};
+
+        for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i]));
+        }
+        for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1);
+        }
+        paddingL = groupConvolutionOp->get_pads_begin();
+        paddingR = groupConvolutionOp->get_pads_end();
    }
 }

-mkldnn::memory::data_type MKLDNNConvolutionNode::precisionToDataType(InferenceEngine::Precision prec) {
-    // MKLDNN Plugin doesn't support U16 layout so upcast to FP32 in this case
-    if (prec == Precision::U16)
-        prec = Precision::FP32;
+bool MKLDNNConvolutionNode::canBeExecutedInInt8() const {
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
+    if (!inputZeroPoints.empty())
+        inputDataType = memory::data_type::u8;

-    return MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
+    auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
+    if (!weightsZeroPoints.empty())
+        weightsDataType = memory::data_type::s8;
+
+    return one_of(inputDataType, memory::data_type::u8, memory::data_type::s8) && weightsDataType == memory::data_type::s8;
 }

-bool MKLDNNConvolutionNode::canBeExecutedInInt8() {
-    auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
-    if (convLayer == nullptr)
-        IE_THROW() << "Cannot convert convolution layer.";
-
-    if (baseInputsNumber > 1) {
-        auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision());
-        if (!inputZeroPoints.empty())
-            inputDataType = memory::data_type::u8;
-
-        auto weightsDataType = precisionToDataType(Precision::FP32);
-        if (baseInputsNumber > 1) {
-            weightsDataType = precisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision());
-            if (!weightsZeroPoints.empty())
-                weightsDataType = memory::data_type::s8;
-        }
-
-        return (inputDataType == mkldnn_s8 || inputDataType == mkldnn_u8) && weightsDataType == mkldnn_s8;
-    } else {
-        return this->getCnnLayer()->precision == Precision::I8;
-    }
-}
-
-InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex) {
+InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const {
    InferenceEngine::Precision eltwisePrecision;
-    auto parent0 = getCreatorLayer(eltwiseNode->getCnnLayer()->insData[0].lock()).lock();
-    auto parent1 = getCreatorLayer(eltwiseNode->getCnnLayer()->insData[1].lock()).lock();

-    auto fusedParent = findex != 0 ? fusedWith[findex - 1].get()->getCnnLayer() : this->getCnnLayer();
-    eltwisePrecision = fusedParent == parent0 ? eltwiseNode->getCnnLayer()->insData[1].lock()->getPrecision() :
-        eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision();
+    int fusingPort = fusingNode->getFusingPort();
+    if (fusingPort == 0) {
+        eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(1);
+    } else if (fusingPort == 1) {
+        eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(0);
+    } else {
+        IE_THROW() << "Cannot determine Eltwise post op precision for Convolution node with name '" << getName() << "'";
+    }
+
    return eltwisePrecision;
 }

@ -104,47 +130,43 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
    if (!descs.empty())
        return;

-    auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
-    if (convLayer == nullptr)
-        IE_THROW() << "Cannot convert convolution layer.";
+    withBiases = getOriginalInputsNumber() == 3;

    withSum = false;
-    int expectedInputEdgesNum = baseInputsNumber;
+    int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
    for (int i = 0; i < fusedWith.size(); i++) {
-        auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
-        if (convolutionNode) {
-            expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
+        if (fusedWith[i]->getType() == Convolution) {
+            expectedInputEdgesNum += static_cast<int>(fusedWith[i]->getOriginalInputsNumber()) - 1;
        }

-        auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-        if (eltwiseNode && eltwiseNode->isSum()) {
-            withSum = true;
-            expectedInputEdgesNum++;
+        if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
+            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+            if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
+                withSum = true;
+                expectedInputEdgesNum++;
+            }
        }
    }

-    auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision());
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
    if (!inputZeroPoints.empty())
        inputDataType = memory::data_type::u8;

-    auto outputDataType = precisionToDataType(getCnnLayer()->outData[0]->getPrecision());
+    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
    eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
-    if (baseInputsNumber > 1) {
-        if (!fusedWith.empty()) {
-            auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-            if (lastFusedLayer) {
-                outputDataType = precisionToDataType(lastFusedLayer->outData[0]->getPrecision());
-                eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
-            }
-        }
+    if (!fusedWith.empty()) {
+        outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
+        eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
+    }

-        // We need to make sure that convolution output and second input of fused Eltwise operation
-        // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
-        if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) {
-            for (int i = 0; i < fusedWith.size(); i++) {
-                auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-                if (eltwiseNode && eltwiseNode->isSum()) {
-                    eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
+    // We need to make sure that convolution output and second input of fused Eltwise operation
+    // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
+    if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) {
+        for (int i = 0; i < fusedWith.size(); i++) {
+            if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
+                auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+                if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
+                    eltwisePrecision = fusedEltwisePrecision(fusedWith[i]);
                    if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
                        eltwisePrecision = Precision::FP32;
                        outputDataType = memory::data_type::f32;
@ -160,81 +182,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
    if (getChildEdges().empty())
        IE_THROW() << "Incorrect number of output edges for layer " << getName();

-    if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
-        IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
-    }
-
-    isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
-    isGrouped = convLayer->_group != 1;    // group info available from IR
-    if (isMerged && isGrouped)
-        IE_THROW() << "Convolution initialization. Group splitted mode are used together with direct group specification.";
-
-    // default values. Can be replaced in next steps
-    groupNum = convLayer->_group;
-    size_t IC = convLayer->input()->getDims()[1];
-    size_t groupIC = IC;
-    size_t groupOC = convLayer->_out_depth;
-
-    isDW = groupNum == groupOC && groupNum == groupIC;
-
-    if (isMerged) {
-        groupNum = getMergeWith().size() + 1;
-    }
-    if (isGrouped) {
-        groupIC /= groupNum;
-        groupOC /= groupNum;
-    }
-
-    weightDims.clear();
-    weightDims.push_back(groupOC);
-    weightDims.push_back(groupIC);
-    for (int i = 1; i <= convLayer->_kernel.size(); i++) {
-        weightDims.push_back(convLayer->_kernel[convLayer->_kernel.size() - i]);
-    }
-    biasesDims = { groupOC * groupNum };
-
-    if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
-
-    withBiases = (convLayer->_biases != nullptr && convLayer->_biases->size() != 0) || baseInputsNumber == 3;
-
-    if (baseInputsNumber == 1) {
-        internalBlobs.push_back(createInternalBlob(weightDims, true, isGrouped));
-
-        if (withBiases) {
-            internalBlobs.push_back(createInternalBlob(biasesDims, false));
-        }
-
-        Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
-        if (weights->getTensorDesc().getPrecision() == Precision::I8) {
-            // The weights blob has incorrect dims, so we have to fix it
-            TensorDesc wdesc = internalBlobs[0]->getTensorDesc();
-            wdesc.setPrecision(Precision::I8);
-            InferenceEngine::TBlob<int8_t>::Ptr reshapedInt8Weights =
-                    InferenceEngine::TBlob<int8_t>::Ptr(
-                            new InferenceEngine::TBlob<int8_t>(wdesc, static_cast<int8_t*>(weights->buffer()), weights->byteSize()));
-
-            internalBlobs[0] = reshapedInt8Weights;
-            if (withBiases) {
-                Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second;
-                TensorDesc bdesc = internalBlobs[1]->getTensorDesc();
-                bdesc.setPrecision(Precision::I32);
-                InferenceEngine::TBlob<int32_t>::Ptr reshapedInt32Biases =
-                        InferenceEngine::TBlob<int32_t>::Ptr(
-                                new InferenceEngine::TBlob<int32_t>(bdesc, static_cast<int32_t*>(biases->buffer()), biases->byteSize()));
-                internalBlobs[1] = reshapedInt32Biases;
-            }
-        }
-    }
-
-    invertVectorCopyUtoI(convLayer->_stride, stride);
-    for (int i = 1; i <= convLayer->_dilation.size(); i++) {
-        dilation.push_back(static_cast<int>(convLayer->_dilation[convLayer->_dilation.size() - i]) - 1);
-    }
-
-    auto allPads = getPaddings(*convLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
-    invertVectorCopyUtoI(allPads.end, paddingR);
-
+    int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims();
    MKLDNNDims weightsDims = MKLDNNDims(weightDims);

    withDWConv = isFusedWith(Convolution);
@ -242,29 +190,26 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
    for (int i = 0; i < fusedWith.size(); i++) {
        auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
        if (convolutionNode) {
-            auto *convLayer = reinterpret_cast<ConvolutionLayer *>(convolutionNode->getCnnLayer().get());
            dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
            dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
-            dw_conv_oc = convLayer->_out_depth;
-            for (int j = 0; j < convLayer->_kernel.size(); j++) {
-                dw_conv_kernel.push_back(convLayer->_kernel[j]);
-            }
-            for (int j = 0; j < convLayer->_stride.size(); j++) {
-                dw_conv_strides.push_back(convLayer->_stride[j]);
-            }
+            dw_conv_oc = convolutionNode->outDims[0][1];
+            const auto &dwWeightsDims = convolutionNode->inDims[1].ToSizeVector();
+            dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 1]);
+            dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 2]);
+            dw_conv_strides = convolutionNode->getStride();

            if (canBeExecutedInInt8()) {
                if (i == 0) {
-                    dw_conv_in_dt = precisionToDataType(getCnnLayer()->outData[0]->getPrecision());
+                    dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
                } else {
-                    dw_conv_in_dt = precisionToDataType(fusedWith[i - 1].get()->getCnnLayer()->outData[0]->getPrecision());
+                    dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[i - 1]->getOriginalOutputPrecisionAtPort(0));
                }
            } else {
                dw_conv_in_dt = memory::data_type::f32;
            }

            for (int j = 0; j < paddingR.size(); j++) {
-                int with_group = (isGrouped || isMerged) ? 1 : 0;
+                int with_group = isGrouped ? 1 : 0;
                int krn = weightsDims[with_group + 2 + j];
                int src = getParentEdgeAt(0)->getDims()[2 + j];
                int dst = getChildEdgeAt(0)->getDims()[2 + j];
@ -283,30 +228,32 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
            outputDataType = memory::data_type::f32;
        if (eltwisePrecision == Precision::BF16)
            eltwisePrecision = Precision::FP32;
-        in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
-        out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
-                getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
+        in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc
+                                                                                                 : memory::format_tag::nhwc);
+        out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc
+                                                                                                  : memory::format_tag::nhwc);
        createDescriptor({in_candidate}, {out_candidate});
    } else {
-        inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
-        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
-        outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
-        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
+        inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
+                                                                                                           : memory::data_type::f32;
+        outputDataType = (getOriginalOutputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
+                                                                                                             : memory::data_type::f32;
        eltwisePrecision = Precision::FP32;
        for (int i = 0; i < fusedWith.size(); i++) {
-            auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-            if (eltwiseNode && eltwiseNode->isSum()) {
-                eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
-                // TODO(amalyshe): there might be situation when convolution can be executed in BF16,
-                // output is required in FP32 but eltwise inplace tensor would be in BF16
-                // currently we forcedly change output to the BF16 that will add reoreder after the node
-                // Another situation can be when we mark output as FP32 and Eltwise asPrecison (which stand
-                // for input of inplace tensor precision) to FP32. This will add reorder for that in-place tensor
-                // bofore the fused convolution. This behaviour might be more correct regarding expected markup
-                // of the graph but performance of first and second approaches might be different. Need to verify
-                outputDataType = eltwisePrecision == Precision::BF16 ? memory::data_type::bf16 : memory::data_type::f32;
-                eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
+            if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
+                auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+                if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
+                    eltwisePrecision = fusedEltwisePrecision(fusedWith[i]);
+                    // TODO(amalyshe): there might be situation when convolution can be executed in BF16,
+                    // output is required in FP32 but eltwise inplace tensor would be in BF16
+                    // currently we forcedly change output to the BF16 that will add reoreder after the node
+                    // Another situation can be when we mark output as FP32 and Eltwise asPrecison (which stand
+                    // for input of inplace tensor precision) to FP32. This will add reorder for that in-place tensor
+                    // bofore the fused convolution. This behaviour might be more correct regarding expected markup
+                    // of the graph but performance of first and second approaches might be different. Need to verify
+                    outputDataType = eltwisePrecision == Precision::BF16 ? memory::data_type::bf16 : memory::data_type::f32;
+                    eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
+                }
            }
        }
        // correction for cases of FP32 input - we do not have FP32 convolution supported BF16 output
@ -316,16 +263,13 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
            eltwisePrecision = Precision::FP32;
        }

-        Layout layout = convLayer->input()->getLayout();
-
-        if (layout == NCHW || layout == NHWC) {
+        if (ndims == 4) {
            if (IC == 1 && groupOC == 1) {
                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
                createDescriptor({in_candidate}, {out_candidate});
            } else if (IC == 3 || IC == 1) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                                                layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc);
+                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw16c);
                createDescriptor({in_candidate}, {out_candidate});
                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw8c);
@ -339,19 +283,16 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                createDescriptor({in_candidate}, {out_candidate});
            }

-            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                    layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc);
-            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
-                    layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc);
+            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
+            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
            createDescriptor({in_candidate}, {out_candidate});
-        } else if (layout == NCDHW || layout == NDHWC) {
+        } else if (ndims == 5) {
            if (IC == 1 && groupOC == 1) {
                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
                createDescriptor({in_candidate}, {out_candidate});
            } else if (IC == 3 || IC == 1) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                                                layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc);
+                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw16c);
                createDescriptor({in_candidate}, {out_candidate});
                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw8c);
@ -365,17 +306,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                createDescriptor({in_candidate}, {out_candidate});
            }

-            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                    layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc);
-            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
-                    layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc);
+            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
+            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
            createDescriptor({in_candidate}, {out_candidate});
        }
    }
 }

-void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
-    int blob_idx = 0;
+void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) const {
    mkldnn::post_ops ops;

    for (auto &node : fusedWith) {
@ -383,66 +321,31 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
            continue;

        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode && eltwiseNode->isSum()) {
-                ops.append_sum(1.0, precisionToDataType(eltwisePrecision));
-            continue;
-        }
-
        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
+            if (eltwiseNode->isSpecialConvolutionAddFusing())
+                ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
+            else
+                eltwiseNode->appendPostOps(ops);
            continue;
        }

-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
            continue;
        }

        auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
        if (convolutionNode) {
            if (initWeights) {
-                if (convolutionNode->getBaseIntputsNumber() == 1) {
-                    auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
-
-                    auto weightsPrc = precisionToDataType(convLayer->precision);
-                    auto biasPrc = memory::data_type::s32;
-
-                    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                    MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
-                    PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g);
-                    PostOpsIntBlobMemory[blob_idx]->FillZero();
-
-                    Blob::Ptr weights = convLayer->blobs.find("weights")->second;
-                    Blob::Ptr biases = convLayer->blobs.find("biases")->second;
-
-                    PostOpsIntBlobMemory[blob_idx]->SetData(weightsPrc, memory::format_tag::goihw, weights->buffer(),
-                                                            dwWeightsDims.size() * MKLDNNExtensionUtils::sizeOfDataType(weightsPrc));
-
-                    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                    MKLDNNDims dwBiasesDims({dw_conv_oc});
-                    PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format_tag::x);
-                    PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
-                    PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::format_tag::x, biases->buffer(),
-                                                                dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
-                    // todo: rewrite onto append_dw_k3s2p1
-                    ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
-                                       dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
-                                       mkldnn::memory::convert_to_c(dw_conv_in_dt),
-                                       static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
-                                       static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
-
-                    blob_idx += 2;
-                } else {
-                    // todo: rewrite onto append_dw_k3s2p1
-                    ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
-                                       dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
-                                       mkldnn::memory::convert_to_c(dw_conv_in_dt),
-                                       static_cast<const float *>(getParentEdgeAt(
-                                               baseInputsNumber + 0)->getMemory().GetData()),
-                                       static_cast<const float *>(getParentEdgeAt(
-                                               baseInputsNumber + 1)->getMemory().GetData()));
-                }
+                // todo: rewrite onto append_dw_k3s2p1
+                ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
+                                   dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
+                                   mkldnn::memory::convert_to_c(dw_conv_in_dt),
+                                   static_cast<const float *>(getParentEdgeAt(
+                                           getOriginalInputsNumber() + 0)->getMemory().GetData()),
+                                   static_cast<const float *>(getParentEdgeAt(
+                                           getOriginalInputsNumber() + 1)->getMemory().GetData()));
            } else {
                // todo: rewrite onto append_dw_k3s2p1
                ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
@ -451,47 +354,6 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
                                   nullptr,
                                   nullptr);
            }
-
-            if (convolutionNode->wScale != nullptr) {
-                float* wScaleData = static_cast<float*>(convolutionNode->wScale->buffer());
-
-                std::vector<float> oScaleDataVector;
-                std::vector<float> oShiftDataVector;
-                if (convolutionNode->getCnnLayer()->precision == Precision::I8 &&
-                    convolutionNode->getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
-                    float *oScaleData = static_cast<float *>(convolutionNode->oScale->buffer());
-
-                    for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
-                        oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
-                        oShiftDataVector.push_back(0.f);
-                    }
-                } else {
-                    for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
-                        oScaleDataVector.push_back(wScaleData[c]);
-                        oShiftDataVector.push_back(0.f);
-                    }
-                }
-
-                MKLDNNDims oScaleDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
-
-                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format_tag::x);
-                PostOpsIntBlobMemory[blob_idx]->FillZero();
-                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::format_tag::x, &oScaleDataVector[0],
-                                                        oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-
-                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format_tag::x);
-                PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
-                PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::format_tag::x, &oShiftDataVector[0],
-                                                            oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-
-                ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift,
-                                     static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
-                                     static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
-
-                blob_idx += 2;
-            }
            continue;
        }

@ -528,8 +390,8 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
                config.inConfs.push_back(dataConfig);
            }

-            if (withDWConv && baseInputsNumber > 1) {
-                auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
+            if (withDWConv) {
+                auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
                auto biasPrc = memory::data_type::f32;

                MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
@ -553,7 +415,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {

                dataConfig.constant = false;
                dataConfig.desc = getDstMemDesc(itpd, i);
-                if (!(isGrouped || isMerged))
+                if (!isGrouped)
                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
                config.outConfs.push_back(dataConfig);

@ -582,7 +444,6 @@ void MKLDNNConvolutionNode::createPrimitive() {
    mkldnn::primitive_attr attr;
    addZeroPoints(attr);
    setPostOps(attr, true);
-    addScaleToPrimitiveAttr(attr);

    auto prim_desc = createPrimitiveDescriptor<convolution_forward::primitive_desc,
            convolution_forward::desc>(attr);
@ -590,11 +451,14 @@ void MKLDNNConvolutionNode::createPrimitive() {
    prim.reset(new convolution_forward(prim_desc));

    auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+    auto wei = getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPrimitive();
    auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    if (withBiases)
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_BIAS, getBias()}, {DNNL_ARG_DST, dst}};
-    else
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DST, dst}};
+    if (withBiases) {
+        auto bias = getParentEdgesAtPort(2)[0]->getMemoryPtr()->GetPrimitive();
+        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_BIAS, bias}, {DNNL_ARG_DST, dst}};
+    } else {
+        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_DST, dst}};
+    }
 }

 bool MKLDNNConvolutionNode::created() const {
@ -605,55 +469,25 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
                                             const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
    TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];

-    mkldnn::memory::data_type wdt = precisionToDataType(inDesc.getPrecision());
-    mkldnn::memory::data_type bdt = precisionToDataType(inDesc.getPrecision());
-    if (inDesc.getPrecision() == Precision::BF16) {
-        bdt = mkldnn::memory::data_type::f32;
-    }
+    memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
+    memory::data_type bdt = memory::data_type::f32;

    if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) {
        wdt = memory::data_type::s8;
-        bdt = baseInputsNumber == 3 ? precisionToDataType(getCnnLayer()->insData[2].lock()->getPrecision()) : memory::data_type::s32;
-    }
-
-    if (baseInputsNumber == 1) {
-        Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
-
-        if (weights->getTensorDesc().getPrecision() == Precision::I8) {
-            wdt = memory::data_type::s8;
-            bdt = memory::data_type::s32;
-
-            Precision outPrec;
-            if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) {
-                outPrec = Precision::FP32;
-            } else {
-                // define precision accordninly normalizer
-                // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not?
-                outPrec = outDesc.getPrecision();
-            }
-
-            inDesc = TensorDesc(inDesc.getPrecision(), inputDesc[0].getDims(), inputDesc[0].getBlockingDesc());
-            outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), outputDesc[0].getBlockingDesc());
-        }
    }

    MKLDNNMemoryDesc in_candidate(inDesc);
    MKLDNNMemoryDesc out_candidate(outDesc);

-    // grouping and autoblocking is not compatible
-    if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
-        return;
-
    MKLDNNDims blocked_weightDims(weightDims);
    MKLDNNDims blocked_biasesDims(biasesDims);
    MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::format_tag::any};

-    std::vector<algorithm> algorithms;
-    // We cannot map wino_format on tensor descriptor for now
-    if (getBaseIntputsNumber() == 1) {
-        algorithms.push_back(algorithm::convolution_winograd);
-    }
-    algorithms.push_back(algorithm::convolution_direct);
+    std::vector<mkldnn::algorithm> algorithms;
+
+    // TODO [NM]: We cannot map wino_format on tensor descriptor for now
+    // algorithms.push_back(algorithm::convolution_winograd);
+    algorithms.push_back(mkldnn::algorithm::convolution_direct);

    for (auto alg : algorithms) {
        try {
@ -695,27 +529,6 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) const {
    }
 }

-void MKLDNNConvolutionNode::addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const {
-    if (wScale != nullptr) {
-        float* wScaleData = static_cast<float*>(wScale->buffer());
-
-        std::vector<float> oScaleDataVector;
-        if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
-            float *oScaleData = static_cast<float *>(oScale->buffer());
-
-            for (size_t c = 0; c < wScale->size(); c++) {
-                oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
-            }
-        } else {
-            for (size_t c = 0; c < wScale->size(); c++) {
-                oScaleDataVector.push_back(wScaleData[c]);
-            }
-        }
-
-        attr.set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector);
-    }
-}
-
 void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
    auto* selectedPD = getSelectedPrimitiveDescriptor();
    if (!selectedPD) {
@ -725,18 +538,15 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
    // Strided blobs feature support.
    // Works only for FP32 convolutions for now.
    bool isStridedBlobsSupported = true;
-    for (auto &insData : getCnnLayer()->insData) {
-        if (insData.lock()->getPrecision() != InferenceEngine::Precision::FP32
-            && insData.lock()->getPrecision() != InferenceEngine::Precision::BF16) {
-            isStridedBlobsSupported = false;
-            break;
-        }
-    }

-    // TODO: fix strided blobs feature support for dynamic weights
-    if (baseInputsNumber != 1) {
+    // TODO [NM]: refactor via using global executionPrecision.
+    if (canBeExecutedInInt8()) {
        isStridedBlobsSupported = false;
    }
+    // TODO [NM]: fix strided blobs feature support for dynamic weights
+    // if (getOriginalInputsNumber() != 1) {
+    //     isStridedBlobsSupported = false;
+    // }

    if (isStridedBlobsSupported) {
        createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
@ -745,7 +555,6 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
    mkldnn::primitive_attr attr;
    addZeroPoints(attr);
    setPostOps(attr);
-    addScaleToPrimitiveAttr(attr);

    InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
    size_t selected_count = 0;
@ -768,8 +577,8 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
                cfg.inConfs.push_back(dataConfig);
            }

-            if (withDWConv && baseInputsNumber > 1) {
-                auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
+            if (withDWConv) {
+                auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
                auto biasPrc = memory::data_type::f32;

                MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
@ -853,14 +662,14 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() {
    }
 }

-bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) {
+bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const {
    //  WA: In some cases, we can predict in advance the type of primitive that will be called in the future.
    //  In particular, isPossibleToSkipInitConfig() checks whether we can skip the creation of primitives with
    //  gemm implementation, which significantly increase the network load time.
    if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty())
        return false;

-    if (getCnnLayer()->params.find("PrimitivesPriority") != getCnnLayer()->params.end())
+    if (isPrimitivesPriorityDefined)
        return false;

    //  Here we check that we will not delete jit_planar_conv primitive by mistake.
@ -920,12 +729,8 @@ MKLDNNMemoryDesc MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_ite
    }
 }

-const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const {
-    return baseInputsNumber > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
-}
-
-const mkldnn::memory& MKLDNNConvolutionNode::getBias() const {
-    return baseInputsNumber > 2 ? getParentEdgeAt(2)->getMemory().GetPrimitive() : internalBlobMemory[1]->GetPrimitive();
+bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
+    return canFuseSimpleOperation(node);
 }

 InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@ -16,9 +16,10 @@ class MKLDNNEltwiseNode;

 class MKLDNNConvolutionNode : public MKLDNNNode {
 public:
-    MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
    ~MKLDNNConvolutionNode() override = default;

+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
    void getSupportedDescriptors() override;
    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
@ -26,50 +27,45 @@ public:
    void createPrimitive() override;
    void initSupportedPrimitiveDescriptors() override;
    void filterSupportedPrimitiveDescriptors() override;
-    void filterSupportedDescriptors();
-    bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc);
    bool created() const override;
    bool canBeInPlace() const override {
        return false;
    }
-
-    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
-
-    size_t descInputNumbers(MKLDNNDescriptor desc) override {
-        return static_cast<size_t>(baseInputsNumber);
-    }
-
-    int getBaseIntputsNumber() {
-        return baseInputsNumber;
-    }
-
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-
-    const mkldnn::memory& getWeights() const;
-    const mkldnn::memory& getBias() const;
-
-    bool canBeExecutedInInt8();
-
    InferenceEngine::Precision getRuntimePrecision() const override;
+    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    size_t descInputNumbers(MKLDNNDescriptor desc) override {
+        return static_cast<size_t>(getOriginalInputsNumber());
+    }
+
+    bool canBeExecutedInInt8() const;
+    size_t getGroupNum() const { return groupNum; }

    std::vector<uint8_t> inputZeroPoints;
    std::vector<float> weightsZeroPoints;
    std::vector<int32_t> outputCompensation;

+    const InferenceEngine::SizeVector &getWeightDims() { return weightDims; }
+    const std::vector<ptrdiff_t> &getStride() { return stride; }
+    const std::vector<ptrdiff_t> &getDilation() { return dilation; }
+    const std::vector<ptrdiff_t> &getPaddingL() { return paddingL; }
+    const std::vector<ptrdiff_t> &getPaddingR() { return paddingR; }
+
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
 protected:
-    void addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const;
-    InferenceEngine::Precision fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex);
+    InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;

 private:
-    mkldnn::memory::data_type precisionToDataType(InferenceEngine::Precision prec);
    void addZeroPoints(mkldnn::primitive_attr& attr) const;
+    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights) const ;
+    void filterSupportedDescriptors();
+    bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;

    bool withBiases;
    bool withSum;
    bool withDWConv;
-    bool isDW;
-    bool isMerged;
    bool isGrouped;
+    bool isPrimitivesPriorityDefined;
    std::vector<ptrdiff_t> stride;
    std::vector<ptrdiff_t> dilation;
    std::vector<ptrdiff_t> paddingL;
@ -83,14 +79,16 @@ private:
    std::vector<ptrdiff_t> dw_conv_kernel;
    std::vector<ptrdiff_t> dw_conv_strides;
    mkldnn::memory::data_type dw_conv_in_dt;
-    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
-
-    InferenceEngine::Blob::Ptr wScale, oScale;

    size_t groupNum;
-    int baseInputsNumber;
+    size_t IC;
+    size_t groupIC;
+    size_t groupOC;

    InferenceEngine::Precision eltwisePrecision;
+
+    const size_t X_AXIS = 0;
+    const size_t Y_AXIS = 1;
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@ -6,15 +6,43 @@
 #include "mkldnn_convert_node.h"
 #include "common/cpu_convert.h"
 #include "common/tensor_desc_creator.h"
-
-#define THROW_ERROR IE_THROW() << getTypeStr() << " layer with name '" << getName() <<"' ERROR: "
+#include <ngraph/opsets/opset1.hpp>

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

-MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
+        if (!convert) {
+            errorMessage = "Only opset1 Convert operation is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Convert node with name '" + getName() + "'";
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
+
+MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
+                                     const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode("Convert", nodeName, eng, cache) {
+    inDims.emplace_back(dims);
+    addOriginalInputPrecision(inPrc);
+    outDims.emplace_back(dims);
+    addOriginalOutputPrecision(outPrc);
+}

 void MKLDNNConvertNode::getSupportedDescriptors() {
    // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
@ -24,20 +52,15 @@ void MKLDNNConvertNode::getSupportedDescriptors() {
    if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY)
        inDims.push_back(MKLDNNDims(input->getDims()));
    if (getParentEdges().size() != 1)
-        THROW_ERROR << "Incorrect number of input edges";
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
    if (getChildEdges().empty())
-        THROW_ERROR << "Incorrect number of output edges";
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
 }

 void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;

-    auto layer = getCnnLayer();
-    if (layer == nullptr) {
-        THROW_ERROR << "Cannot get CNN layer";
-    }
-
    LayerConfig config;
    DataConfig dataIn;
    DataConfig dataConfigOut;
@ -54,16 +77,11 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
        dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc);
        config.outConfs.push_back(dataConfigOut);
        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
-    } else if (layer->insData.size() == 1 && layer->outData.size() == 1) {
-        auto insData = layer->insData[0].lock();
-        if (nullptr == insData) {
-            THROW_ERROR << "Input data is empty";
-        }
-
-        const SizeVector& insDims = insData->getTensorDesc().getDims();
-        auto insPrecision = insData->getTensorDesc().getPrecision();
-        const SizeVector& outputDims = layer->outData[0]->getTensorDesc().getDims();
-        auto outPrecision = layer->outData[0]->getTensorDesc().getPrecision();
+    } else if (getOriginalInputsNumber() == 1 && getOriginalOutputsNumber() == 1) {
+        const SizeVector& insDims = getParentEdgeAt(0)->getDims().ToSizeVector();
+        auto insPrecision = getOriginalInputPrecisionAtPort(0);
+        const SizeVector& outputDims = getChildEdgeAt(0)->getDims().ToSizeVector();
+        auto outPrecision = getOriginalOutputPrecisionAtPort(0);

        config.inConfs.push_back(dataIn);
        config.outConfs.push_back(dataConfigOut);
@ -78,7 +96,7 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
            supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
        }
    } else {
-        THROW_ERROR << "Incorrect number of input/output edges";
+        IE_THROW() << errorPrefix << " has incorrect number of input/output edges";
    }
 }

@ -86,18 +104,18 @@ void MKLDNNConvertNode::createPrimitive() {
    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        THROW_ERROR << "Destination memory didn't allocate.";
+        IE_THROW() << errorPrefix << " has not allocated destination memory";
    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        THROW_ERROR << "Input memory didn't allocate.";
+        IE_THROW() << errorPrefix << " has not allocated input memory";
    if (getSelectedPrimitiveDescriptor() == nullptr)
-        THROW_ERROR << "Preferable primitive descriptor is not set.";
+        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
 }

 void MKLDNNConvertNode::execute(mkldnn::stream strm) {
    auto& parentMem = getParentEdgeAt(0)->getMemory();
    auto& childMem = getChildEdgeAt(0)->getMemory();
    if (parentMem.GetElementsCount() != childMem.GetElementsCount())
-        THROW_ERROR << "Input and output buffers have different elements count";
+        IE_THROW() << errorPrefix << " has different elements number in input and output buffers";

    void* srcPtr = parentMem.GetPtr();
    void* dstPtr = childMem.GetPtr();
@ -107,4 +125,5 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
 bool MKLDNNConvertNode::created() const {
    return getType() == Convert;
 }
+
 REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@ -13,7 +13,9 @@ namespace MKLDNNPlugin {

 class MKLDNNConvertNode : public MKLDNNNode {
 public:
-    MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
+                      const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
    ~MKLDNNConvertNode() override = default;

    void getSupportedDescriptors() override;
@ -37,9 +39,13 @@ public:
    std::shared_ptr<const InferenceEngine::TensorDesc> getInput() const { return input; }
    std::shared_ptr<const InferenceEngine::TensorDesc> getOutput() const { return output; }

+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
    std::shared_ptr<InferenceEngine::TensorDesc> input;
    std::shared_ptr<InferenceEngine::TensorDesc> output;
+
+    std::string errorPrefix;
 };
 }  // namespace MKLDNNPlugin

--- a/Show More
+++ b/Show More