[CPU] A new transformation that adds a convert layer if there is no reorders that support the data type conversion. (#3498)

2021-02-08 11:58:48 +03:00
parent db065d525e
commit 7387642a98
18 changed files with 681 additions and 198 deletions
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -46,11 +46,11 @@ set(LAYERS
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_scatter_update_node.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_interpolate_node.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reduce_node.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_convert_node.cpp

    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder_seq_len.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp
--- a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp
+++ b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp
@@ -43,6 +43,7 @@ void BF16Transformer::convertToFloat(InferenceEngine::CNNNetwork &network) {
        for (size_t o = 0; o < iter->outData.size(); o++) {
            if (inputs.find(iter->outData[o]->getName()) == inputs.end()
                && outputs.find(iter->outData[o]->getName()) == outputs.end()
+                && !CaselessEq<std::string>()(iter->type, "const")
                && iter->outData[o]->getPrecision() == Precision::BF16) {
                iter->outData[o]->setPrecision(Precision::FP32);
            }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -23,6 +23,7 @@
 #include "mkldnn_infer_request.h"
 #include <nodes/mkldnn_input_node.h>
 #include <nodes/mkldnn_reorder_node.h>
+#include <nodes/mkldnn_convert_node.h>

 #include <legacy/graph_tools.hpp>
 #include <ie_algorithm.hpp>
@@ -457,6 +458,21 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
    }
 }

+static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& childDesc, const mkldnn::engine& eng) {
+    memory::desc dstMemDesc = MKLDNNMemoryDesc(childDesc);
+    memory::desc srcMemDesc = MKLDNNMemoryDesc(parentDesc);
+    mkldnn::primitive_attr attr;
+
+    dnnl_primitive_desc_t result = nullptr;
+    auto status = dnnl_reorder_primitive_desc_create(&result, &srcMemDesc.data, eng.get(), &dstMemDesc.data, eng.get(),
+                                                     attr.get());
+    if (result) {
+        mkldnn_primitive_desc_destroy(result);
+    }
+
+    return mkldnn_success == status;
+}
+
 void MKLDNNGraph::InitEdges() {
    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");

@@ -470,18 +486,42 @@ void MKLDNNGraph::InitEdges() {
    for (auto i = 0; i < numberOfEdges; i++) {
        if (graphEdges[i]->needReorder()) {
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
-            auto &edge = graphEdges[i];
-            std::string basicLayerName = edge->getParent()->getName() + "_" +
-                    MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
-                    edge->getChild()->getName();
-            std::string layerName = basicLayerName;
-            int idx = 0;
-            while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
-                idx++;
-                layerName = basicLayerName + "_" + std::to_string(idx);
+            auto edge = graphEdges[i];
+            bool insertReorder = true;
+
+            // Check if there is a reorder that supports the type conversion
+            if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
+                !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
+                //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
+                std::string convertName = edge->getParent()->getName() + "_" +
+                                          edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
+
+                CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
+                auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
+                convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
+                InsertNode(edge, convertNode, true);
+
+                //Check if reorder is still needed
+                if (convertNode->getChildEdgeAt(0)->needReorder()) {
+                    edge = convertNode->getChildEdgeAt(0);
+                } else {
+                    insertReorder = false;
+                }
+            }
+
+            if (insertReorder) {
+                std::string basicLayerName = edge->getParent()->getName() + "_" +
+                                             MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
+                                             edge->getChild()->getName();
+                std::string layerName = basicLayerName;
+                int idx = 0;
+                while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
+                    idx++;
+                    layerName = basicLayerName + "_" + std::to_string(idx);
+                }
+                uniqueLayerNames.insert(layerName);
+                InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
            }
-            uniqueLayerNames.insert(layerName);
-            InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
            graphEdges.erase(graphEdges.begin() + i);
            i--;
            numberOfEdges--;
@@ -1095,44 +1135,17 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
    }
    reorderPtr->setDescs(inDesc, outDesc);
    reorderPtr->_scales = scales;
-
-    auto oIndex = edge->getOutputNum();
-    auto iIndex = edge->getInputNum();
-    if (iIndex < 0 || oIndex < 0)
-        THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
-                           << edge->getParent()->getName() << " and "
-                           << edge->getChild()->getName() << ".";
-
-    edge->drop();
-
-    MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
-    MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
-
-    // Add edge for beforeNode
-    beforeNode->getChild()->parentEdges.push_back(beforeNode);
-    edge->getParent()->childEdges.push_back(beforeNode);
-
-    // Add edge for afterNode
-    afterNode->getParent()->childEdges.push_back(afterNode);
-    edge->getChild()->parentEdges.push_back(afterNode);
-
    reorderPtr->setOptimized(isOptimized);

-    newReorder->getSupportedDescriptors();
-    newReorder->initSupportedPrimitiveDescriptors();
-    newReorder->selectOptimalPrimitiveDescriptor();
-
-    graphEdges.push_back(beforeNode);
-    graphEdges.push_back(afterNode);
+    InsertNode(edge, newReorder, true);

    // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
    // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
    if (!isOptimized) {
-        beforeNode->getDesc();
-        afterNode->getDesc();
+        newReorder->getParentEdgeAt(0)->getDesc();
+        newReorder->getChildEdgeAt(0)->getDesc();
    }

-    graphNodes.push_back(newReorder);
    return newReorder;
 }

@@ -1235,3 +1248,42 @@ void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
 InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
    return dump_graph_as_ie_ngraph_net(*this);
 }
+
+bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode) {
+    auto oIndex = edge->getOutputNum();
+    auto iIndex = edge->getInputNum();
+    if (iIndex < 0 || oIndex < 0)
+        THROW_IE_EXCEPTION << "Cannot insert node '" << node->getName() << "' between nodes: "
+                           << edge->getParent()->getName() << " and "
+                           << edge->getChild()->getName() << ".";
+
+    edge->drop();
+
+    return InsertNode(edge->getParent(), edge->getChild(), node, iIndex, oIndex, initNode);
+}
+
+bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode) {
+    MKLDNNEdgePtr beforeNode(new MKLDNNEdge(parent, node, parentPort, 0));
+    MKLDNNEdgePtr afterNode(new MKLDNNEdge(node, child, 0, childPort));
+
+    // Add edge for beforeNode
+    beforeNode->getChild()->parentEdges.push_back(beforeNode);
+    parent->childEdges.push_back(beforeNode);
+
+    // Add edge for afterNode
+    afterNode->getParent()->childEdges.push_back(afterNode);
+    child->parentEdges.push_back(afterNode);
+
+    if (initNode) {
+        node->getSupportedDescriptors();
+        node->initSupportedPrimitiveDescriptors();
+        node->filterSupportedPrimitiveDescriptors();
+        node->selectOptimalPrimitiveDescriptor();
+        node->initOptimalPrimitiveDescriptor();
+    }
+
+    graphEdges.push_back(beforeNode);
+    graphEdges.push_back(afterNode);
+    graphNodes.push_back(node);
+    return true;
+}
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -115,6 +115,41 @@ public:
    MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc,
            const InferenceEngine::TensorDesc& outDesc, bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);

+    /**
+     * @brief Insert MKLDNNNode at the edge-specified location.
+     * This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
+     * supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
+     * node without initialization.
+     * @param edge
+     * pointer to the edge in the graph where the node will be inserted
+     * @param node
+     * pointer to the inserted node
+     * @param initNode
+     * parameter that determines whether the node needs to be initialized
+     * @return true in case of success, false otherwise.
+     */
+    bool InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode = false);
+
+    /**
+     * @brief Insert MKLDNNNode between two specified nodes.
+     * This procedure creates two edges that link the parent and child nodes to the inserted one and adds all created objects to the graph.
+     * This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
+     * supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
+     * node without initialization.
+     * @param parent
+     * pointer to the parent node
+     * @param child
+     * pointer to the child node
+     * @param parentPort
+     * port number of the parent node to which the inserted node should be connected
+     * @param childPort
+     * port number of the child node to which the inserted node should be connected
+     * @param initNode
+     * parameter that determines whether the node needs to be initialized
+     * @return true in case of success, false otherwise.
+     */
+    bool InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode = false);
+
    InferenceEngine::CNNNetwork dump() const;

    template<typename NET>
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -55,9 +55,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
    MergeTwoEqualScaleShifts(graph);
    graph.RemoveDroppedNodes();

-    MergeConversions(graph);
-    graph.RemoveDroppedNodes();
-
    FuseBroadcastAndEltwise(graph);
    graph.RemoveDroppedNodes();

@@ -154,51 +151,6 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
    graph.RemoveDroppedEdges();
 }

-void MKLDNNGraphOptimizer::MergeConversions(MKLDNNGraph& graph) {
-    for (auto node : graph.GetNodes()) {
-        // Input with at least 2 Convertions
-        if (!IsOneOf(node->getType(), { Input }) || node->getChildEdges().size() < 2 ||
-            !IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), { Convert })) {
-            continue;
-        }
-        auto& input = node;
-
-        // Convertions of same the type with Concat as a child
-        for (size_t i = 0; i < input->getChildEdges().size(); i++) {
-            auto convInEdge = input->getChildEdgeAt(i);
-            auto conv = convInEdge->getChild();
-            auto convOutEdge = conv->getChildEdgeAt(i);
-            auto convInDims = convInEdge->getDims();
-            auto convOutDims = convOutEdge->getDims();
-            Precision convOutPrecision = conv->getCnnLayer()->precision;
-
-            for (size_t j = i + 1; j < input->getChildEdges().size();) {
-                auto childEdge = input->getChildEdgeAt(j);
-                auto child = childEdge->getChild();
-
-                if (child->getCnnLayer()->precision != convOutPrecision ||
-                    child->getChildEdgeAt(0)->getDims() != convOutDims ||
-                    childEdge->getDims() != convInDims ||
-                    child->getChildEdges().size() != 1) {
-                    j++;
-                    continue;
-                }
-
-                auto childChildEdge = child->getChildEdgeAt(0);
-                auto childChild = childChildEdge->getChild();
-                int idxChild = childChildEdge->getOutputNum();
-
-                child->remove();
-                graph.DropNode(child);
-
-                MKLDNNEdgePtr newEdge(new MKLDNNEdge(conv, childChild, 0, idxChild));
-                graph.GetEdges().push_back(newEdge);
-                conv->addEdge(newEdge);
-            }
-        }
-    }
-}
-
 void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
    auto& graphNodes = graph.GetNodes();

@@ -1844,6 +1796,10 @@ void MKLDNNGraphOptimizer::ChangeConvertToReorder(MKLDNNGraph& graph) {
        if (!InferenceEngine::details::CaselessEq<std::string>()(nodeType, "convert")) {
            continue;
        }
+        if (convertCandidate->getCnnLayer()->insData.empty() ||
+            convertCandidate->getCnnLayer()->outData.empty()) {
+            continue;
+        }
        auto inputPrecision = convertCandidate->getCnnLayer()->insData[0].lock()->getPrecision();
        auto outputPrecision = convertCandidate->getCnnLayer()->outData[0]->getPrecision();
        if (std::find(continuousPrecisions.begin(), continuousPrecisions.end(), inputPrecision) == continuousPrecisions.end() ||
@@ -2313,4 +2269,4 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
            mergePermuteAndReorder(parentNode, childNode);
        }
    }
-}
+}
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -19,7 +19,6 @@ public:
    void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);

 private:
-    void MergeConversions(MKLDNNGraph& graph);
    void MergeGroupConvolution(MKLDNNGraph& graph);
    void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
    void FuseConvolutionAndActivation(MKLDNNGraph &graph);
@@ -41,6 +40,7 @@ private:
    void DropDoubleReorders(MKLDNNGraph& graph);
    void DropConvertReorder(MKLDNNGraph& graph);
    void ChangeConvertToReorder(MKLDNNGraph &graph);
+    void AddConvertToReorder(MKLDNNGraph &graph);
    void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
    void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
    void FuseEltwiseAndSimple(MKLDNNGraph &graph);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -17,6 +17,7 @@
 #include "mkldnn_memory.h"
 #include "mkldnn_extension_utils.h"
 #include "nodes/common/cpu_memcpy.h"
+#include "nodes/common/cpu_convert.h"
 #include "ie_mkldnn.h"

 using namespace InferenceEngine;
@@ -88,10 +89,54 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo
    }
 }

+void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) {
+    if (size != 0)
+        IE_ASSERT(size <= output.GetDescriptor().get_size());
+    if (input.GetDesc() == output.GetDesc()) {
+        auto srcPtr = static_cast<uint8_t*>(input.GetPtr());
+        auto dstPtr = static_cast<uint8_t*>(output.GetPtr());
+
+        auto copySize = size == 0 ? output.GetSize() : size;
+        cpu_memcpy(dstPtr, srcPtr, copySize);
+    } else {
+        std::unique_ptr<mkldnn::reorder> pReorder;
+        std::shared_ptr<memory> srcMemoryPtr;
+        std::vector<uint8_t> tmpBuff;
+
+        try {
+            pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive()));
+            srcMemoryPtr = input.prim;
+        }
+        catch (const mkldnn::error& err) {
+            if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType()) {
+                //we probably could not make the reorder because there is no one supporting this precision conversion
+                //lets try to convert data first using cpu_convert
+                auto data = static_cast<const uint8_t *>(input.GetPtr());
+                tmpBuff.resize(input.GetSize());
+
+                cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()),
+                            MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount());
+
+                MKLDNNMemory tmpMem(output.eng);
+                tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetDesc().getFormat(), tmpBuff.data());
+
+                pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive()));
+                srcMemoryPtr = tmpMem.prim;
+            } else {
+                throw;
+            }
+        }
+        if (pReorder) {
+            mkldnn::stream loc_stream(output.eng, stream::flags::default_order);
+            pReorder->execute(loc_stream, *srcMemoryPtr, *output.prim);
+        } else {
+            THROW_IE_EXCEPTION << "Could not make mkldnn reorder.";
+        }
+    }
+}
+
 // TODO: It should be done via wrap into Memory;
 void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format, const void* data, size_t size, bool ftz) const {
-    uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType));
-
    IE_ASSERT(!one_of(format, memory::format_tag::undef, memory::format_tag::any));

    auto dst_desc = GetDescriptor();
@@ -99,25 +144,21 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format

    IE_ASSERT(size <= dst_desc.get_size());

-    if (dst_desc != src_desc) {
-        auto memData = GetDescriptor().data;
-        memory::dims dims{memData.dims, memData.dims + memData.ndims};
-
-        MKLDNNMemory src(eng);
-        src.Create(dims, dataType, format, data);
-
-        std::shared_ptr<mkldnn::reorder> pReorder =
-                std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(src.GetPrimitive(), GetPrimitive()));
-
-        mkldnn::stream loc_stream(eng, stream::flags::default_flags);
-        pReorder->execute(loc_stream, *src.prim, *this->prim);
-    } else {
+    if (dst_desc == src_desc) {
+        uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType));
        uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
        // We cannot support strides for i/o blobs because it affects performance.
        dataPtr += itemSize * prim->get_desc().data.offset0;
        cpu_memcpy(dataPtr, data, size);
-    }
+    } else {
+        auto memData = this->GetDescriptor().data;
+        memory::dims dims(memData.dims, memData.dims + memData.ndims);

+        MKLDNNMemory src(this->eng);
+        src.Create(dims, dataType, format, data);
+
+        reorderData(src, *this);
+    }
    if (ftz
        && dataType == memory::data_type::f32
        && prim->get_desc().data.format_kind != dnnl_format_kind_wino
@@ -130,21 +171,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format
 }

 void MKLDNNMemory::SetData(const MKLDNNMemory& src, size_t size, bool ftz) const {
-    if (size != 0)
-        IE_ASSERT(size <= GetDescriptor().get_size());
-
-    // TODO: Optimization. Reorder perfect is not good enough, so in triviale cases we
-    //       prefer use simple copy.
-    if (src.GetDesc() == this->GetDesc()) {
-        auto srcPtr = static_cast<uint8_t*>(src.GetPtr());
-        auto dstPtr = static_cast<uint8_t*>(this->GetPtr());
-        auto copySize = size == 0 ? this->GetSize() : size;
-        cpu_memcpy(dstPtr, srcPtr, copySize);
-    } else {
-        mkldnn::reorder reorderPrim(src.GetPrimitive(), GetPrimitive());
-        mkldnn::stream loc_stream(eng, stream::flags::default_order);
-        reorderPrim.execute(loc_stream, *src.prim, *this->prim);
-    }
+    reorderData(src, *this, size);

    if (ftz
        && src.GetDataType() == memory::data_type::f32
@@ -840,5 +867,4 @@ bool MKLDNNMemoryDesc::blocksExtended() const {
    }
    return false;
 }
-
 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
@@ -164,6 +164,8 @@ public:

    static std::string formatToString(mkldnn::memory::format_tag fmt);

+    static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0);
+
 private:
    std::shared_ptr<mkldnn::memory> prim;
    mkldnn::engine eng;
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -168,7 +168,8 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
        if (!(CaselessEq<std::string>()(layer->type, "memory") ||
            CaselessEq<std::string>()(layer->type, "memoryinput") ||
            CaselessEq<std::string>()(layer->type, "output") ||
-            CaselessEq<std::string>()(layer->type, "reorder"))) {
+            CaselessEq<std::string>()(layer->type, "reorder") ||
+            CaselessEq<std::string>()(layer->type, "convert"))) {
            THROW_IE_EXCEPTION << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
        }
    }
--- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "tensor_desc_creator.h"
+#include <numeric>
+
+using namespace InferenceEngine;
+using namespace MKLDNNPlugin;
+
+namespace {
+constexpr size_t channelsPos = 1lu;
+
+class PlainFormatCreator : public TensorDescCreator {
+public:
+    virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
+        SizeVector order(srcDims.size());
+        std::iota(order.begin(), order.end(), 0);
+        return TensorDesc(precision, srcDims, {srcDims, order});
+    }
+    virtual size_t getMinimalRank() const { return 0lu; }
+};
+
+class PerChannelCreator : public TensorDescCreator {
+public:
+    virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const {
+        SizeVector order(srcDims.size());
+        std::iota(order.begin(), order.end(), 0);
+        SizeVector blkDims = srcDims;
+        if (srcDims.size() > 2) {
+            auto moveElementBack = [](SizeVector& vector, size_t indx) {
+                auto itr = vector.begin() + indx;
+                std::rotate(itr, itr + 1, vector.end());
+            };
+
+            moveElementBack(order, channelsPos);
+            moveElementBack(blkDims, channelsPos);
+        }
+
+        return TensorDesc(precision, srcDims, {blkDims, order});
+    }
+    virtual size_t getMinimalRank() const { return 3lu; }
+};
+
+class ChannelBlockedCreator : public TensorDescCreator {
+public:
+    ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {}
+    virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
+        if (srcDims.size() < 2) {
+            THROW_IE_EXCEPTION << "Can't create blocked tensor descriptor!";
+        }
+
+        SizeVector order(srcDims.size());
+        std::iota(order.begin(), order.end(), 0);
+        order.push_back(channelsPos);
+
+        SizeVector blkDims = srcDims;
+        blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0);
+        blkDims.push_back(_blockSize);
+
+        return TensorDesc(precision, srcDims, {blkDims, order});
+    }
+    virtual size_t getMinimalRank() const { return 3lu; }
+
+private:
+    size_t _blockSize;
+};
+} // namespace
+
+const TensorDescCreator::CreatorsMap& TensorDescCreator::getCommonCreators() {
+    static const CreatorsMap map{ { TensorDescCreatorTypes::nspc, CreatorConstPtr(new PerChannelCreator) },
+                                { TensorDescCreatorTypes::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) },
+                                { TensorDescCreatorTypes::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) },
+                                { TensorDescCreatorTypes::ncsp, CreatorConstPtr(new PlainFormatCreator) } };
+    return map;
+}
+
+std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
+TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) {
+    auto rankFilter = [rank](const CreatorsMap::value_type& item) {
+        if (item.second->getMinimalRank() > rank) {
+            return false;
+        }
+        return true;
+    };
+
+    auto first = CreatorsMapFilterConstIterator(std::move(rankFilter), map.begin(), map.end());
+    auto last = first.end();
+    return std::make_pair(first, last);
+}
+
+std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
+TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes) {
+    size_t bitMask = 0ul;
+    for (auto& item : supportedTypes) {
+        bitMask |= 1 << static_cast<unsigned>(item);
+    }
+
+    auto rankTypesFilter = [rank, bitMask](const CreatorsMap::value_type& item) {
+        if (!(bitMask & (1 << static_cast<unsigned>(item.first)))) {
+            return false;
+        }
+        if (item.second->getMinimalRank() > rank) {
+            return false;
+        }
+        return true;
+    };
+
+    auto first = CreatorsMapFilterConstIterator(std::move(rankTypesFilter), map.begin(), map.end());
+    auto last = first.end();
+    return std::make_pair(first, last);
+}
+
+std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
+TensorDescCreator::makeFilteredRange(const CreatorsMap &map, TensorDescCreator::Predicate predicate) {
+    auto first = CreatorsMapFilterConstIterator(std::move(predicate), map.begin(), map.end());
+    auto last = first.end();
+    return std::make_pair(first, last);
+}
--- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h
@@ -0,0 +1,94 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_layouts.h>
+
+namespace MKLDNNPlugin {
+
+enum class TensorDescCreatorTypes : unsigned {
+    nspc,       // general per channels format
+    ncsp,        // general planar
+    nCsp8c,     // general channels blocked by 8
+    nCsp16c    // general channels blocked by 16
+};
+
+class CreatorsMapFilterConstIterator;
+
+class TensorDescCreator {
+public:
+    typedef std::shared_ptr<TensorDescCreator> CreatorPtr;
+    typedef std::shared_ptr<const TensorDescCreator> CreatorConstPtr;
+    typedef std::map<TensorDescCreatorTypes, CreatorConstPtr> CreatorsMap;
+    typedef std::function<bool(const CreatorsMap::value_type&)> Predicate;
+
+public:
+    static const CreatorsMap& getCommonCreators();
+    static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
+    makeFilteredRange(const CreatorsMap &map, unsigned rank);
+    static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
+    makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes);
+    static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
+    makeFilteredRange(const CreatorsMap& map, Predicate predicate);
+    virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0;
+    virtual size_t getMinimalRank() const = 0;
+    virtual ~TensorDescCreator() = default;
+};
+
+class CreatorsMapFilterConstIterator {
+public:
+    typedef TensorDescCreator::CreatorsMap::const_iterator Iterator;
+    typedef std::iterator_traits<Iterator>::value_type value_type;
+    typedef std::iterator_traits<Iterator>::reference reference;
+    typedef std::iterator_traits<Iterator>::pointer pointer;
+    typedef std::iterator_traits<Iterator>::difference_type difference_type;
+    typedef std::forward_iterator_tag iterator_category;
+    typedef std::function<bool(const value_type&)> predicate_type;
+
+public:
+    CreatorsMapFilterConstIterator(predicate_type filter, Iterator begin, Iterator end) : _filter(std::move(filter)), _iter(begin), _end(end)  {
+        while (_iter != _end && !_filter(*_iter)) {
+            ++_iter;
+        }
+    }
+    CreatorsMapFilterConstIterator& operator++() {
+        do {
+            ++_iter;
+        } while (_iter != _end && !_filter(*_iter));
+        return *this;
+    }
+
+    CreatorsMapFilterConstIterator end() const {
+        return CreatorsMapFilterConstIterator(predicate_type(), _end, _end);
+    }
+
+    CreatorsMapFilterConstIterator operator++(int) {
+        CreatorsMapFilterConstIterator temp(*this);
+        ++*this;
+        return temp;
+    }
+
+    reference operator*() const {
+        return *_iter;
+    }
+
+    pointer operator->() const {
+        return std::addressof(*_iter);
+    }
+
+    friend bool operator==(const CreatorsMapFilterConstIterator& lhs, const CreatorsMapFilterConstIterator& rhs) {
+        return lhs._iter == rhs._iter;
+    }
+
+    friend bool operator!=(const CreatorsMapFilterConstIterator& lhs, const CreatorsMapFilterConstIterator& rhs) {
+        return !(lhs == rhs);
+    }
+
+private:
+    Iterator _iter;
+    Iterator _end;
+    predicate_type _filter;
+};
+} // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
@@ -1,72 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <string>
-#include <vector>
-#include "ie_precision.hpp"
-#include "common/cpu_convert.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class ConvertImpl: public ExtLayerBase {
-public:
-    explicit ConvertImpl(const CNNLayer* layer) {
-        try {
-            logPrefix = "Convert layer with name '" + layer->name + "' ";
-            if (layer->insData.size() != 1 || layer->outData.size() != 1)
-                THROW_IE_EXCEPTION << logPrefix << "has incorrect number of input/output edges";
-
-            precision = layer->GetParamAsString("precision");
-
-            LayerConfig config;
-            DataConfig dataIn;
-            const SizeVector& ins_dims = layer->insData[0].lock()->getTensorDesc().getDims();
-            dataIn.desc = TensorDesc(layer->insData[0].lock()->getTensorDesc().getPrecision(), ins_dims,
-                                     layer->insData[0].lock()->getTensorDesc().getLayout());
-            config.inConfs.push_back(dataIn);
-
-            DataConfig dataConfigOut;
-            const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
-            dataConfigOut.desc = TensorDesc(layer->outData[0]->getTensorDesc().getPrecision(), out_dims,
-                                            layer->outData[0]->getTensorDesc().getLayout());
-            config.outConfs.push_back(dataConfigOut);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
-        } catch (InferenceEngine::details::InferenceEngineException &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        try {
-            void *srcPtr = inputs[0]->cbuffer().as<void *>();
-            void *dstPtr = outputs[0]->buffer().as<void *>();
-            if (inputs[0]->size() != outputs[0]->size())
-                THROW_IE_EXCEPTION << logPrefix << "has input and output buffers with different sizes";
-            cpu_convert(srcPtr, dstPtr, inputs[0]->getTensorDesc().getPrecision(), outputs[0]->getTensorDesc().getPrecision(), outputs[0]->size());
-        } catch (InferenceEngine::details::InferenceEngineException &ex) {
-            errorMsg = ex.what();
-            if (resp)
-                errorMsg.copy(resp->msg, sizeof(resp->msg)-1);
-            return GENERAL_ERROR;
-        } catch(...) {
-            return GENERAL_ERROR;
-        }
-        return OK;
-    }
-
-private:
-    std::string precision;
-    std::string logPrefix;
-};
-
-REG_FACTORY_FOR(ConvertImpl, Convert);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@@ -48,7 +48,6 @@ MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
 MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
 MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
 MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
-MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
 MKLDNN_EXTENSION_NODE(FillImpl, Fill);
 MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
 MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@@ -0,0 +1,110 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <mkldnn_extension_utils.h>
+#include "mkldnn_convert_node.h"
+#include "common/cpu_convert.h"
+#include "common/tensor_desc_creator.h"
+
+#define THROW_ERROR THROW_IE_EXCEPTION << getTypeStr() << " layer with name '" << getName() <<"' ERROR: "
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(layer, eng, cache) {}
+
+void MKLDNNConvertNode::getSupportedDescriptors() {
+    // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
+    // from correspond tensor descriptors.
+    if (outDims.empty() && output && output->getLayout() != InferenceEngine::Layout::ANY)
+        outDims.push_back(MKLDNNDims(output->getDims()));
+    if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY)
+        inDims.push_back(MKLDNNDims(input->getDims()));
+    if (getParentEdges().size() != 1)
+        THROW_ERROR << "Incorrect number of input edges";
+    if (getChildEdges().empty())
+        THROW_ERROR << "Incorrect number of output edges";
+}
+
+void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    auto layer = getCnnLayer();
+    if (layer == nullptr) {
+        THROW_ERROR << "Cannot get CNN layer";
+    }
+
+    LayerConfig config;
+    DataConfig dataIn;
+    DataConfig dataConfigOut;
+
+    config.dynBatchSupport = false;
+
+    // if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so
+    // they should be used as the actual descriptors.
+    if (input && input->getLayout() != InferenceEngine::Layout::ANY && output && output->getLayout() != InferenceEngine::Layout::ANY) {
+        dataIn.desc = *input;
+        config.inConfs.push_back(dataIn);
+
+        const auto& blockingDesc = config.inConfs[0].desc.getBlockingDesc(); // inp/out layouts must be the same
+        dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc);
+        config.outConfs.push_back(dataConfigOut);
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+    } else if (layer->insData.size() == 1 && layer->outData.size() == 1) {
+        auto insData = layer->insData[0].lock();
+        if (nullptr == insData) {
+            THROW_ERROR << "Input data is empty";
+        }
+
+        const SizeVector& insDims = insData->getTensorDesc().getDims();
+        auto insPrecision = insData->getTensorDesc().getPrecision();
+        const SizeVector& outputDims = layer->outData[0]->getTensorDesc().getDims();
+        auto outPrecision = layer->outData[0]->getTensorDesc().getPrecision();
+
+        config.inConfs.push_back(dataIn);
+        config.outConfs.push_back(dataConfigOut);
+
+        auto creators = TensorDescCreator::getCommonCreators();
+        auto range = TensorDescCreator::makeFilteredRange(creators, insDims.size());
+
+        for (auto itr = range.first; itr != range.second; ++itr) {
+            config.inConfs[0].desc = itr->second->createDesc(insPrecision, insDims);
+            config.outConfs[0].desc = itr->second->createDesc(outPrecision, outputDims);
+
+            supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+        }
+    } else {
+        THROW_ERROR << "Incorrect number of input/output edges";
+    }
+}
+
+void MKLDNNConvertNode::createPrimitive() {
+    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
+    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        THROW_ERROR << "Destination memory didn't allocate.";
+    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
+        THROW_ERROR << "Input memory didn't allocate.";
+    if (getSelectedPrimitiveDescriptor() == nullptr)
+        THROW_ERROR << "Preferable primitive descriptor is not set.";
+}
+
+void MKLDNNConvertNode::execute(mkldnn::stream strm) {
+    auto& parentMem = getParentEdgeAt(0)->getMemory();
+    auto& childMem = getChildEdgeAt(0)->getMemory();
+    if (parentMem.GetElementsCount() != childMem.GetElementsCount())
+        THROW_ERROR << "Input and output buffers have different elements count";
+
+    void* srcPtr = parentMem.GetPtr();
+    void* dstPtr = childMem.GetPtr();
+    cpu_convert(srcPtr, dstPtr, getParentEdgeAt(0)->getDesc().getPrecision(), getChildEdgeAt(0)->getDesc().getPrecision(), parentMem.GetElementsCount());
+}
+
+bool MKLDNNConvertNode::created() const {
+    return getType() == Convert;
+}
+REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNConvertNode : public MKLDNNNode {
+public:
+    MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNConvertNode() override = default;
+
+    void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override;
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+    bool canBeInPlace() const override {
+        return false;
+    }
+
+    // This is the interface extension designed to provide inp and output tensor descriptors without the CNNLayer.
+    // In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method.
+    // This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph
+    // initialization stage.
+    void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) {
+        this->input.reset(new InferenceEngine::TensorDesc(input));
+        this->output.reset(new InferenceEngine::TensorDesc(output));
+    }
+
+    std::shared_ptr<const InferenceEngine::TensorDesc> getInput() const { return input; }
+    std::shared_ptr<const InferenceEngine::TensorDesc> getOutput() const { return output; }
+
+private:
+    std::shared_ptr<InferenceEngine::TensorDesc> input;
+    std::shared_ptr<InferenceEngine::TensorDesc> output;
+};
+}  // namespace MKLDNNPlugin
+
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
@@ -0,0 +1,93 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils/cpu_test_utils.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace LayerTestsDefinitions {
+
+class AddConvertToReorderTest : virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    void BuildGraph(const ngraph::element::Type& secondInpType) {
+        secondConstantType = secondInpType;
+        int axis = 2;
+        std::vector<int> indices = {0, 3, 2, 1};
+        std::vector<size_t> indicesShape = {2, 2};
+        std::vector<size_t> inputShape = {10, 20, 30, 40};
+
+        InferenceEngine::Precision netPrecision = inPrc = outPrc = Precision::FP32;
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        ASSERT_EQ(ngraph::shape_size(indicesShape), indices.size())
+                                    << "Indices vector size and provided indices shape doesn't fit each other";
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+        auto paramOuts = ngraph::helpers::convert2OutputVector(
+                ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+        auto indicesNode = ngraph::opset3::Constant::create(secondConstantType, ngraph::Shape(indicesShape), indices);
+        auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis});
+        auto gather = std::make_shared<ngraph::opset3::Gather>(paramOuts[0], indicesNode, axisNode);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather)};
+        function = std::make_shared<ngraph::Function>(results, params, "gather");
+    }
+    std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
+        // Convert the second input constant precision to i64 to run the reference function
+        if (ngraph::element::Type_t::i8 == secondConstantType) {
+            ngraph::pass::ConvertPrecision<ngraph::element::Type_t::i8, ngraph::element::Type_t::i64>().run_on_function(function);
+        } else if (ngraph::element::Type_t::bf16 == secondConstantType) {
+            ngraph::pass::ConvertPrecision<ngraph::element::Type_t::bf16, ngraph::element::Type_t::i64>().run_on_function(function);
+        }
+        return LayerTestsUtils::LayerTestsCommon::CalculateRefs();
+    }
+
+private:
+    ngraph::element::Type secondConstantType;
+};
+
+namespace  {
+/* Test insertion of the Convert layer if there is no suitable reorder.
+
+    Parameter[FP32]     Constant[BF16]
+          \                 /
+           \               /
+            \       Convert[I32] (Is inserted by the MKLDNNGraph)
+             \           /
+             Gather[FP32]
+                  |
+                  |
+             Output[FP32]
+*/
+
+TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
+    BuildGraph(ngraph::element::bf16);
+    Run();
+    CheckNodeOfTypeCount(executableNetwork, "Convert", 1);
+    CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
+}
+
+/* Test insertion of the Reorder layer if there is one.
+
+    Parameter[FP32]     Constant[I8]
+          \                 /
+           \               /
+            \       Reorder[I32] (Is inserted by the MKLDNNGraph)
+             \           /
+             Gather[FP32]
+                  |
+                  |
+             Output[FP32]
+*/
+TEST_F(AddConvertToReorderTest, smoke_TestAddReorder_CPU) {
+    BuildGraph(ngraph::element::i8);
+    Run();
+    CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
+    CheckNodeOfTypeCount(executableNetwork, "Reorder", 1);
+}
+} // namespace
+} // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
@@ -228,6 +228,27 @@ auto adjustBlockedFormatByIsa = [](std::vector<cpu_memory_format_t>& formats) {
    return paramsVector;
 }

+void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount) {
+    InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();
+    auto function = execGraphInfo.getFunction();
+    ASSERT_NE(nullptr, function);
+    size_t actualNodeCount = 0;
+    for (const auto &node : function->get_ops()) {
+        const auto & rtInfo = node->get_rt_info();
+        auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
+            auto it = rtInfo.find(paramName);
+            IE_ASSERT(rtInfo.end() != it);
+            auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
+            IE_ASSERT(nullptr != value);
+            return value->get();
+        };
+        if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == nodeType) {
+            actualNodeCount++;
+        }
+    }
+
+    ASSERT_EQ(expectedCount, actualNodeCount) << "Unexpected count of the node type '" << nodeType << "' ";
+}
 std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams) {
    std::vector<CPUSpecificParams> resCPUParams;
    const int selectedTypeIndex = 3;
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
@@ -114,4 +114,5 @@ const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_av
 // utility functions
 std::vector<CPUSpecificParams> filterCPUSpecificParams(std::vector<CPUSpecificParams>& paramsVector);
 std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams);
+void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount);
 } // namespace CPUTestUtils