[CPU] added MergePermuteAndReorder optimization + added test (#2519)

2020-11-17 09:04:49 +03:00
parent 98e8aa8128
commit 6467a9f5b8
11 changed files with 373 additions and 98 deletions
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
@@ -124,3 +124,16 @@ bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc
    return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() &&
        in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum);
 }
 std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
    std::string inArgs, outArgs;
    if (parentDesc.getPrecision() != childDesc.getPrecision()) {
        inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
        outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
    }
    if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
        inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
        outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
    }
    return inArgs + "_" + outArgs;
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
@@ -22,6 +22,7 @@ public:
    static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType);
    static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc);
    static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2);
    static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc);
 };
 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -463,18 +463,6 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
 void MKLDNNGraph::InitEdges() {
    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
    auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
        std::string inArgs, outArgs;
        if (parentDesc.getPrecision() != childDesc.getPrecision()) {
            inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
            outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
        }
        if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
            inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
            outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
        }
        return inArgs + "_" + outArgs;
    };
    size_t numberOfEdges = graphEdges.size();
    std::unordered_set<std::string> uniqueLayerNames;
@@ -487,8 +475,8 @@ void MKLDNNGraph::InitEdges() {
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
            auto &edge = graphEdges[i];
            std::string basicLayerName = edge->getParent()->getName() + "_" +
-                                         reorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
+                    MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
-                                         edge->getChild()->getName();
+                    edge->getChild()->getName();
            std::string layerName = basicLayerName;
            int idx = 0;
            while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
@@ -496,43 +484,7 @@ void MKLDNNGraph::InitEdges() {
                layerName = basicLayerName + "_" + std::to_string(idx);
            }
            uniqueLayerNames.insert(layerName);
-            CNNLayerPtr layer(new CNNLayer({layerName,
+            InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
                                            "Reorder",
                                            edge->getInputDesc().getPrecision()}));
            MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
            auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
            if (reorderPtr) {
                reorderPtr->setDescs(edge->getInputDesc(), edge->getOutputDesc());
            }
            auto oIndex = edge->getOutputNum();
            auto iIndex = edge->getInputNum();
            if (iIndex < 0 || oIndex < 0)
                THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
                                   << edge->getParent()->getName() << " and "
                                   << edge->getChild()->getName() << ".";
            edge->drop();
            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
            // Add edge for beforeNode
            beforeNode->getChild()->parentEdges.push_back(beforeNode);
            edge->getParent()->childEdges.push_back(beforeNode);
            // Add edge for afterNode
            afterNode->getParent()->childEdges.push_back(afterNode);
            edge->getChild()->parentEdges.push_back(afterNode);
            newReorder->getSupportedDescriptors();
            newReorder->initSupportedPrimitiveDescriptors();
            newReorder->selectOptimalPrimitiveDescriptor();
            graphEdges.push_back(beforeNode);
            graphEdges.push_back(afterNode);
            graphNodes.push_back(newReorder);
            graphEdges.erase(graphEdges.begin() + i);
            i--;
            numberOfEdges--;
@@ -1131,6 +1083,57 @@ void MKLDNNGraph::RemoveDroppedEdges() {
    }
 }
 void MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
                                bool isOptimized, InferenceEngine::Blob::Ptr scales) {
    CNNLayerPtr layer(new CNNLayer({layerName,
                                    "Reorder",
                                    inDesc.getPrecision()}));
    MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
    auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
    if (reorderPtr) {
        reorderPtr->setDescs(inDesc, outDesc);
        reorderPtr->_scales = scales;
    }
    auto oIndex = edge->getOutputNum();
    auto iIndex = edge->getInputNum();
    if (iIndex < 0 || oIndex < 0)
        THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
                           << edge->getParent()->getName() << " and "
                           << edge->getChild()->getName() << ".";
    edge->drop();
    MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
    MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
    // Add edge for beforeNode
    beforeNode->getChild()->parentEdges.push_back(beforeNode);
    edge->getParent()->childEdges.push_back(beforeNode);
    // Add edge for afterNode
    afterNode->getParent()->childEdges.push_back(afterNode);
    edge->getChild()->parentEdges.push_back(afterNode);
    reorderPtr->setOptimized(isOptimized);
    newReorder->getSupportedDescriptors();
    newReorder->initSupportedPrimitiveDescriptors();
    newReorder->selectOptimalPrimitiveDescriptor();
    graphEdges.push_back(beforeNode);
    graphEdges.push_back(afterNode);
    // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
    // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
    if (!isOptimized) {
        beforeNode->getDesc();
        afterNode->getDesc();
    }
    graphNodes.push_back(newReorder);
 }
 void MKLDNNGraph::dumpToDotFile(std::string file) const {
    std::ofstream dot;
    dot.open(file);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -92,6 +92,28 @@ public:
    void DropNode(const MKLDNNNodePtr& node);
    void DropDWConvNode(const MKLDNNNodePtr& node);
    /**
     * @brief Insert Reorder node at the edge-specified location.
     * The Reorder node must be inserted in case when there are inplace conflicts or the input and output tensor descriptors do not match.
     * The Reorder node rearranges the elements in memory according to inDesc and outDesc, or reinterprets memory descriptor without
     * rearrangement of elements if isOptimized is true.
     * @param edge
     * pointer to the edge in the graph where Reorder node will be inserted
     * @param layerName
     * Reorder layer name
     * @param inDesc
     * input tensor descriptor
     * @param outDesc
     * output tensor descriptor
     * @param isOptimized
     * optimization flag; if isOptimized is true then Reorder node does nothing
     * @param scales
     * pointer to the blob containing scales
     * @return none.
     */
    void InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, const InferenceEngine::TensorDesc& outDesc,
                       bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
    InferenceEngine::CNNNetwork dump() const;
    template<typename NET>
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -14,6 +14,7 @@
 #include "nodes/mkldnn_bin_conv_node.h"
 #include "nodes/mkldnn_quantize_node.h"
 #include "nodes/mkldnn_mvn_node.h"
 #include <nodes/mkldnn_permute_node.h>
 #include "nodes/mkldnn_resample_node.h"
 #include "nodes/mkldnn_interpolate_node.h"
 #include "nodes/mkldnn_input_node.h"
@@ -151,6 +152,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
    graph.RemoveDroppedNodes();
 #endif
    MergePermuteAndReorder(graph);
    graph.RemoveDroppedNodes();
    graph.RemoveDroppedEdges();
 }
@@ -1812,8 +1816,9 @@ void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
 void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
    std::set<MKLDNNNodePtr> processed;
-    std::vector<MKLDNNNodePtr> newNodes;
+    int graphNodesSize = graph.GetNodes().size();
-    for (MKLDNNNodePtr& node : graph.GetNodes()) {
+    for (int i = 0; i < graphNodesSize; i++) {
        MKLDNNNodePtr& node = graph.GetNodes()[i];
        if (processed.find(node) == processed.end() && node->getType() == Reorder
            && node->getChildEdges().size() == 1
            && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) {
@@ -1855,54 +1860,10 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
            std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
-            CNNLayerPtr layer(new CNNLayer({layerName,
+            graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales);
                                            "Reorder",
                                            n->getInput().getPrecision()}));
            MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine(), graph.weightsCache));
            auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
            if (reorderPtr) {
                reorderPtr->setDescs(n->getInput(), nn->getOutput());
                reorderPtr->_scales = scales;
            }
            // new !!!
            auto oIndex = edge->getOutputNum();
            auto iIndex = edge->getInputNum();
            if (iIndex < 0 || oIndex < 0)
                THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
                                   << edge->getParent()->getName() << " and "
                                   << edge->getChild()->getName() << ".";
            edge->drop();
            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
            // Add edge for beforeNode
            beforeNode->getChild()->parentEdges.push_back(beforeNode);
            edge->getParent()->childEdges.push_back(beforeNode);
            // Add edge for afterNode
            afterNode->getParent()->childEdges.push_back(afterNode);
            edge->getChild()->parentEdges.push_back(afterNode);
            newReorder->getSupportedDescriptors();
            newReorder->initSupportedPrimitiveDescriptors();
            newReorder->selectOptimalPrimitiveDescriptor();
            graph.GetEdges().push_back(beforeNode);
            graph.GetEdges().push_back(afterNode);
            // Just to check accordance
            afterNode->getDesc();
            beforeNode->getDesc();
            newNodes.push_back(newReorder);
            graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
        }
    }
    for (MKLDNNNodePtr& node : newNodes) {
        graph.GetNodes().push_back(node);
    }
 }
 void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
@@ -2247,3 +2208,142 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
        }
    }
 }
 void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
    auto& graphNodes = graph.GetNodes();
    auto isSutableParentNode = [](MKLDNNNodePtr node) {
        return node->getType() == Permute && node->getChildEdges().size() == 1;
    };
    auto isSutableChildNode = [](MKLDNNNodePtr node) {
        return node->getType() == Reorder && node->getChildEdges().size() == 1;
    };
    // Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes,
    // the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation.
    auto checkAscendingSummaryOrder = [](std::shared_ptr<MKLDNNNode> &parentNode, std::shared_ptr<MKLDNNNode> &childNode) -> bool {
        auto* permuteNode = dynamic_cast<MKLDNNPermuteNode*>(parentNode.get());
        auto* reorderNode = dynamic_cast<MKLDNNReorderNode*>(childNode.get());
        if (!permuteNode || !reorderNode) {
            return false;
        }
        auto& permuteOrder = permuteNode->getOrder();
        auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
        auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder();
        auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
        if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
            return false;
        }
        // revLayoutOrder - reverse permutation for layoutOrder
        auto revLayoutOrder = SizeVector(layoutOrder.size());
        for (int i = 0; i < revLayoutOrder.size(); i++) {
            revLayoutOrder[layoutOrder[i]] = i;
        }
        // newPermuteOrder - Permute layout-aware permutation
        auto newPermuteOrder = SizeVector(permuteOrder.size());
        for (int i = 0; i < newPermuteOrder.size(); i++) {
            newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]];
        }
        // reorderOrder - Reorder layout-aware permutation
        auto reorderOrder = SizeVector(outOrder.size());
        for (int i = 0; i < reorderOrder.size(); i++) {
            for (int j = 0; j < reorderOrder.size(); j++) {
                if (outOrder[i] == inOrder[j]) {
                    reorderOrder[i] = j;
                    continue;
                }
            }
        }
        // summaryOrder - resulting Permute+Reorder permutation
        auto summaryOrder = SizeVector(permuteOrder.size());
        for (int i = 0; i < summaryOrder.size(); i++) {
            summaryOrder[i] = reorderOrder[newPermuteOrder[i]];
        }
        // check that Permute+Reorder is the identical permutation
        for (int i = 0; i < summaryOrder.size(); i++) {
            if (summaryOrder[i] != i) {
                return false;
            }
        }
        return true;
    };
    // Permute and Reorder do opposite permutation to each other.
    // Example:
    //      chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
    //      Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
    //      which will just reinterprets layout without physical change of the memory.
    // Two cases are possible:
    //      1) inPrec = outPrec
    //          In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing.
    //      2) inPrec != outPrec
    //          As in the first case, we also replace Permute+Reorder pattern with a new Reorder.
    //          Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
    //          to the output precision (outPrec)
    auto mergePermuteAndReorder = [&](std::shared_ptr<MKLDNNNode>& parentNode, std::shared_ptr<MKLDNNNode>& childNode) {
        auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent();
        auto childChildNode = childNode->getChildEdgeAt(0)->getChild();
        graph.DropNode(parentNode);
        graph.DropNode(childNode);
        auto inDesc = parentParentNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
        auto outDesc = childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
        auto inPrec = inDesc.getPrecision();
        auto outPrec = outDesc.getPrecision();
        auto reorderInDesc = TensorDesc(inDesc);
        auto reorderOutDesc = TensorDesc(outDesc);
        reorderOutDesc.setPrecision(inPrec);
        std::string reorderlayerName = parentParentNode->getName() + "_" +
                MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake";
        MKLDNNEdgePtr edge;
        for (auto &childEdge : parentParentNode->getChildEdges()) {
            if (childEdge.lock()->getChild() == childChildNode) {
                edge = childEdge.lock();
                break;
            }
        }
        graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true);
        // case 2
        if (inPrec != outPrec) {
            auto reorderNode = parentParentNode->getChildEdgeAt(0)->getChild();
            auto reorderInDesc2 = TensorDesc(reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc);
            auto reorderOutDesc2 = TensorDesc(childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc);
            std::string reorderLayerName2 = reorderNode->getName() + "_" +
                                    MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName();
            graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false);
        }
    };
    for (int i = 0; i < graphNodes.size(); i++) {
        auto parentNode = graphNodes[i];
        if (!isSutableParentNode(parentNode)) {
            continue;
        }
        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
        if (!isSutableChildNode(childNode)) {
            continue;
        }
        if (checkAscendingSummaryOrder(parentNode, childNode)) {
            mergePermuteAndReorder(parentNode, childNode);
        }
    }
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -52,6 +52,7 @@ private:
    void FuseEltwiseAndSimple(MKLDNNGraph &graph);
    void FuseScaleShiftAndQuantize(MKLDNNGraph &graph);
    void FuseClampAndQuantize(MKLDNNGraph &graph);
    void MergePermuteAndReorder(MKLDNNGraph &graph);
    bool IsOneOf(Type type, std::vector<Type> types);
    bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
@@ -55,6 +55,10 @@ public:
        return false;
    }
    const InferenceEngine::SizeVector& getOrder() const {
        return order;
    }
 private:
    InferenceEngine::SizeVector order;
    InferenceEngine::Precision prec;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@@ -46,6 +46,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() {
    config.inConfs[0].constant = false;
    config.outConfs[0].inPlace = -1;
    config.outConfs[0].constant = false;
    if (isOptimized) {
        config.inConfs[0].inPlace = 0;
        config.outConfs[0].inPlace = 0;
    }
    if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) {
        config.inConfs[0].desc = input;
        config.outConfs[0].desc = output;
@@ -71,6 +75,7 @@ void MKLDNNReorderNode::createPrimitive() {
    if (getSelectedPrimitiveDescriptor() == nullptr)
        THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
    if (!isOptimized)
    createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(),
            dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle());
 }
@@ -169,6 +174,9 @@ bool MKLDNNReorderNode::created() const {
 }
 void MKLDNNReorderNode::execute(mkldnn::stream strm) {
    if (isOptimized)
        return;
    src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
    dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
@@ -29,6 +29,10 @@ public:
        this->output = output;
    }
    void setOptimized(bool isOptimized) {
        this->isOptimized = isOptimized;
    }
    void setDynamicBatchLim(int lim) override;
    bool canBeInPlace() const override {
@@ -50,6 +54,8 @@ private:
    MKLDNNMemoryPtr dst_blocked;
    MKLDNNMemoryPtr src_blocked;
    bool isOptimized = false;
    void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr);
 };
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp
@@ -0,0 +1,35 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <tuple>
 #include <vector>
 #include <string>
 #include "test_utils/cpu_test_utils.hpp"
 #include "functional_test_utils/layer_test_utils.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 #include "ngraph_functions/builders.hpp"
 using namespace CPUTestUtils;
 namespace LayerTestsDefinitions {
 using FusePermuteAndReorderParams = std::tuple<
        InferenceEngine::SizeVector, // Input shape
        InferenceEngine::Precision   // Input precision
 >;
 class FusePermuteAndReorderTest : public testing::WithParamInterface<FusePermuteAndReorderParams>, public CPUTestsBase,
        virtual public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj);
 protected:
    void SetUp() override;
    std::string pluginTypeNode;
 };
 } // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp
@@ -0,0 +1,82 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "subgraph_tests/include/fuse_permute_reorder.hpp"
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
 namespace LayerTestsDefinitions {
 std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj) {
    std::ostringstream result;
    SizeVector inputShape;
    Precision inPrec;
    std::tie(inputShape, inPrec) = obj.param;
    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
    result << "Precision=" << inPrec.name();
    return result.str();
 }
 void FusePermuteAndReorderTest::SetUp() {
    targetDevice = CommonTestUtils::DEVICE_CPU;
    SizeVector inputShape;
    Precision inPrec;
    std::tie(inputShape, inPrec) = this->GetParam();
    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
    auto paramOuts = ngraph::helpers::convert2OutputVector(
            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
    auto order = inputShape.size() == 5 ? std::vector<int64_t>{0, 2, 3, 4, 1} : std::vector<int64_t>{0, 2, 3, 1};
    auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc;
    auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
    auto permute = std::make_shared<ngraph::opset5::Transpose>(paramOuts[0], constOrder);
    permute->get_rt_info() = setCPUInfo({memFmt}, {memFmt}, {});
    ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(permute)};
    function = std::make_shared<ngraph::Function>(results, params, "PermuteReorder");
 }
 TEST_P(FusePermuteAndReorderTest, CompareWithRefs) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED()
    Run();
    InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo();
    auto function = execGraphInfo.getFunction();
    ASSERT_NE(nullptr, function);
    bool permuteFound = false;
    for (const auto &node : function->get_ops()) {
        const auto & rtInfo = node->get_rt_info();
        auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
            auto it = rtInfo.find(paramName);
            IE_ASSERT(rtInfo.end() != it);
            auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
            IE_ASSERT(nullptr != value);
            return value->get();
        };
        if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") {
            permuteFound = true;
            break;
        }
    }
    ASSERT_TRUE(!permuteFound);
 }
 const auto fusePermuteAndReorderParams = ::testing::Combine(
        ::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}),
        ::testing::Values(Precision::I8, Precision::U8)
 );
 INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderParams, FusePermuteAndReorderTest::getTestCaseName);
 }  // namespace LayerTestsDefinitions