[CPU] added MergePermuteAndReorder optimization + added test (#2519)

This commit is contained in:
Anton Voronov
2020-11-17 09:04:49 +03:00
committed by GitHub
parent 98e8aa8128
commit 6467a9f5b8
11 changed files with 373 additions and 98 deletions

View File

@@ -124,3 +124,16 @@ bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc
return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() && return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() &&
in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum); in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum);
} }
std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
std::string inArgs, outArgs;
if (parentDesc.getPrecision() != childDesc.getPrecision()) {
inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
}
if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
}
return inArgs + "_" + outArgs;
}

View File

@@ -22,6 +22,7 @@ public:
static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType); static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType);
static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc); static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc);
static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2); static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2);
static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc);
}; };
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@@ -463,18 +463,6 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
void MKLDNNGraph::InitEdges() { void MKLDNNGraph::InitEdges() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges"); OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
std::string inArgs, outArgs;
if (parentDesc.getPrecision() != childDesc.getPrecision()) {
inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
}
if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
}
return inArgs + "_" + outArgs;
};
size_t numberOfEdges = graphEdges.size(); size_t numberOfEdges = graphEdges.size();
std::unordered_set<std::string> uniqueLayerNames; std::unordered_set<std::string> uniqueLayerNames;
@@ -487,8 +475,8 @@ void MKLDNNGraph::InitEdges() {
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE) #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
auto &edge = graphEdges[i]; auto &edge = graphEdges[i];
std::string basicLayerName = edge->getParent()->getName() + "_" + std::string basicLayerName = edge->getParent()->getName() + "_" +
reorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
edge->getChild()->getName(); edge->getChild()->getName();
std::string layerName = basicLayerName; std::string layerName = basicLayerName;
int idx = 0; int idx = 0;
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) { while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
@@ -496,43 +484,7 @@ void MKLDNNGraph::InitEdges() {
layerName = basicLayerName + "_" + std::to_string(idx); layerName = basicLayerName + "_" + std::to_string(idx);
} }
uniqueLayerNames.insert(layerName); uniqueLayerNames.insert(layerName);
CNNLayerPtr layer(new CNNLayer({layerName, InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
"Reorder",
edge->getInputDesc().getPrecision()}));
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
if (reorderPtr) {
reorderPtr->setDescs(edge->getInputDesc(), edge->getOutputDesc());
}
auto oIndex = edge->getOutputNum();
auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
<< edge->getParent()->getName() << " and "
<< edge->getChild()->getName() << ".";
edge->drop();
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
// Add edge for beforeNode
beforeNode->getChild()->parentEdges.push_back(beforeNode);
edge->getParent()->childEdges.push_back(beforeNode);
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
edge->getChild()->parentEdges.push_back(afterNode);
newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
newReorder->selectOptimalPrimitiveDescriptor();
graphEdges.push_back(beforeNode);
graphEdges.push_back(afterNode);
graphNodes.push_back(newReorder);
graphEdges.erase(graphEdges.begin() + i); graphEdges.erase(graphEdges.begin() + i);
i--; i--;
numberOfEdges--; numberOfEdges--;
@@ -1131,6 +1083,57 @@ void MKLDNNGraph::RemoveDroppedEdges() {
} }
} }
void MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
bool isOptimized, InferenceEngine::Blob::Ptr scales) {
CNNLayerPtr layer(new CNNLayer({layerName,
"Reorder",
inDesc.getPrecision()}));
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
if (reorderPtr) {
reorderPtr->setDescs(inDesc, outDesc);
reorderPtr->_scales = scales;
}
auto oIndex = edge->getOutputNum();
auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
<< edge->getParent()->getName() << " and "
<< edge->getChild()->getName() << ".";
edge->drop();
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
// Add edge for beforeNode
beforeNode->getChild()->parentEdges.push_back(beforeNode);
edge->getParent()->childEdges.push_back(beforeNode);
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
edge->getChild()->parentEdges.push_back(afterNode);
reorderPtr->setOptimized(isOptimized);
newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
newReorder->selectOptimalPrimitiveDescriptor();
graphEdges.push_back(beforeNode);
graphEdges.push_back(afterNode);
// Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
// Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
if (!isOptimized) {
beforeNode->getDesc();
afterNode->getDesc();
}
graphNodes.push_back(newReorder);
}
void MKLDNNGraph::dumpToDotFile(std::string file) const { void MKLDNNGraph::dumpToDotFile(std::string file) const {
std::ofstream dot; std::ofstream dot;
dot.open(file); dot.open(file);

View File

@@ -92,6 +92,28 @@ public:
void DropNode(const MKLDNNNodePtr& node); void DropNode(const MKLDNNNodePtr& node);
void DropDWConvNode(const MKLDNNNodePtr& node); void DropDWConvNode(const MKLDNNNodePtr& node);
/**
* @brief Insert Reorder node at the edge-specified location.
* The Reorder node must be inserted in case when there are inplace conflicts or the input and output tensor descriptors do not match.
* The Reorder node rearranges the elements in memory according to inDesc and outDesc, or reinterprets memory descriptor without
* rearrangement of elements if isOptimized is true.
* @param edge
* pointer to the edge in the graph where Reorder node will be inserted
* @param layerName
* Reorder layer name
* @param inDesc
* input tensor descriptor
* @param outDesc
* output tensor descriptor
* @param isOptimized
* optimization flag; if isOptimized is true then Reorder node does nothing
* @param scales
* pointer to the blob containing scales
* @return none.
*/
void InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, const InferenceEngine::TensorDesc& outDesc,
bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
InferenceEngine::CNNNetwork dump() const; InferenceEngine::CNNNetwork dump() const;
template<typename NET> template<typename NET>

View File

@@ -14,6 +14,7 @@
#include "nodes/mkldnn_bin_conv_node.h" #include "nodes/mkldnn_bin_conv_node.h"
#include "nodes/mkldnn_quantize_node.h" #include "nodes/mkldnn_quantize_node.h"
#include "nodes/mkldnn_mvn_node.h" #include "nodes/mkldnn_mvn_node.h"
#include <nodes/mkldnn_permute_node.h>
#include "nodes/mkldnn_resample_node.h" #include "nodes/mkldnn_resample_node.h"
#include "nodes/mkldnn_interpolate_node.h" #include "nodes/mkldnn_interpolate_node.h"
#include "nodes/mkldnn_input_node.h" #include "nodes/mkldnn_input_node.h"
@@ -151,6 +152,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
graph.RemoveDroppedNodes(); graph.RemoveDroppedNodes();
#endif #endif
MergePermuteAndReorder(graph);
graph.RemoveDroppedNodes();
graph.RemoveDroppedEdges(); graph.RemoveDroppedEdges();
} }
@@ -1812,8 +1816,9 @@ void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE) #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
std::set<MKLDNNNodePtr> processed; std::set<MKLDNNNodePtr> processed;
std::vector<MKLDNNNodePtr> newNodes; int graphNodesSize = graph.GetNodes().size();
for (MKLDNNNodePtr& node : graph.GetNodes()) { for (int i = 0; i < graphNodesSize; i++) {
MKLDNNNodePtr& node = graph.GetNodes()[i];
if (processed.find(node) == processed.end() && node->getType() == Reorder if (processed.find(node) == processed.end() && node->getType() == Reorder
&& node->getChildEdges().size() == 1 && node->getChildEdges().size() == 1
&& node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) { && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) {
@@ -1855,54 +1860,10 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName(); std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
CNNLayerPtr layer(new CNNLayer({layerName, graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales);
"Reorder",
n->getInput().getPrecision()}));
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine(), graph.weightsCache));
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
if (reorderPtr) {
reorderPtr->setDescs(n->getInput(), nn->getOutput());
reorderPtr->_scales = scales;
}
// new !!!
auto oIndex = edge->getOutputNum();
auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
<< edge->getParent()->getName() << " and "
<< edge->getChild()->getName() << ".";
edge->drop();
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
// Add edge for beforeNode
beforeNode->getChild()->parentEdges.push_back(beforeNode);
edge->getParent()->childEdges.push_back(beforeNode);
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
edge->getChild()->parentEdges.push_back(afterNode);
newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
newReorder->selectOptimalPrimitiveDescriptor();
graph.GetEdges().push_back(beforeNode);
graph.GetEdges().push_back(afterNode);
// Just to check accordance
afterNode->getDesc();
beforeNode->getDesc();
newNodes.push_back(newReorder);
graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end()); graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
} }
} }
for (MKLDNNNodePtr& node : newNodes) {
graph.GetNodes().push_back(node);
}
} }
void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) { void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
@@ -2247,3 +2208,142 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
} }
} }
} }
void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();
auto isSutableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == Permute && node->getChildEdges().size() == 1;
};
auto isSutableChildNode = [](MKLDNNNodePtr node) {
return node->getType() == Reorder && node->getChildEdges().size() == 1;
};
// Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes,
// the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation.
auto checkAscendingSummaryOrder = [](std::shared_ptr<MKLDNNNode> &parentNode, std::shared_ptr<MKLDNNNode> &childNode) -> bool {
auto* permuteNode = dynamic_cast<MKLDNNPermuteNode*>(parentNode.get());
auto* reorderNode = dynamic_cast<MKLDNNReorderNode*>(childNode.get());
if (!permuteNode || !reorderNode) {
return false;
}
auto& permuteOrder = permuteNode->getOrder();
auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder();
auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
return false;
}
// revLayoutOrder - reverse permutation for layoutOrder
auto revLayoutOrder = SizeVector(layoutOrder.size());
for (int i = 0; i < revLayoutOrder.size(); i++) {
revLayoutOrder[layoutOrder[i]] = i;
}
// newPermuteOrder - Permute layout-aware permutation
auto newPermuteOrder = SizeVector(permuteOrder.size());
for (int i = 0; i < newPermuteOrder.size(); i++) {
newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]];
}
// reorderOrder - Reorder layout-aware permutation
auto reorderOrder = SizeVector(outOrder.size());
for (int i = 0; i < reorderOrder.size(); i++) {
for (int j = 0; j < reorderOrder.size(); j++) {
if (outOrder[i] == inOrder[j]) {
reorderOrder[i] = j;
continue;
}
}
}
// summaryOrder - resulting Permute+Reorder permutation
auto summaryOrder = SizeVector(permuteOrder.size());
for (int i = 0; i < summaryOrder.size(); i++) {
summaryOrder[i] = reorderOrder[newPermuteOrder[i]];
}
// check that Permute+Reorder is the identical permutation
for (int i = 0; i < summaryOrder.size(); i++) {
if (summaryOrder[i] != i) {
return false;
}
}
return true;
};
// Permute and Reorder do opposite permutation to each other.
// Example:
// chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
// Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
// which will just reinterprets layout without physical change of the memory.
// Two cases are possible:
// 1) inPrec = outPrec
// In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing.
// 2) inPrec != outPrec
// As in the first case, we also replace Permute+Reorder pattern with a new Reorder.
// Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
// to the output precision (outPrec)
auto mergePermuteAndReorder = [&](std::shared_ptr<MKLDNNNode>& parentNode, std::shared_ptr<MKLDNNNode>& childNode) {
auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent();
auto childChildNode = childNode->getChildEdgeAt(0)->getChild();
graph.DropNode(parentNode);
graph.DropNode(childNode);
auto inDesc = parentParentNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
auto outDesc = childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
auto inPrec = inDesc.getPrecision();
auto outPrec = outDesc.getPrecision();
auto reorderInDesc = TensorDesc(inDesc);
auto reorderOutDesc = TensorDesc(outDesc);
reorderOutDesc.setPrecision(inPrec);
std::string reorderlayerName = parentParentNode->getName() + "_" +
MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake";
MKLDNNEdgePtr edge;
for (auto &childEdge : parentParentNode->getChildEdges()) {
if (childEdge.lock()->getChild() == childChildNode) {
edge = childEdge.lock();
break;
}
}
graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true);
// case 2
if (inPrec != outPrec) {
auto reorderNode = parentParentNode->getChildEdgeAt(0)->getChild();
auto reorderInDesc2 = TensorDesc(reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc);
auto reorderOutDesc2 = TensorDesc(childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc);
std::string reorderLayerName2 = reorderNode->getName() + "_" +
MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName();
graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false);
}
};
for (int i = 0; i < graphNodes.size(); i++) {
auto parentNode = graphNodes[i];
if (!isSutableParentNode(parentNode)) {
continue;
}
auto childNode = parentNode->getChildEdgeAt(0)->getChild();
if (!isSutableChildNode(childNode)) {
continue;
}
if (checkAscendingSummaryOrder(parentNode, childNode)) {
mergePermuteAndReorder(parentNode, childNode);
}
}
}

View File

@@ -52,6 +52,7 @@ private:
void FuseEltwiseAndSimple(MKLDNNGraph &graph); void FuseEltwiseAndSimple(MKLDNNGraph &graph);
void FuseScaleShiftAndQuantize(MKLDNNGraph &graph); void FuseScaleShiftAndQuantize(MKLDNNGraph &graph);
void FuseClampAndQuantize(MKLDNNGraph &graph); void FuseClampAndQuantize(MKLDNNGraph &graph);
void MergePermuteAndReorder(MKLDNNGraph &graph);
bool IsOneOf(Type type, std::vector<Type> types); bool IsOneOf(Type type, std::vector<Type> types);
bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs); bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);

View File

@@ -55,6 +55,10 @@ public:
return false; return false;
} }
const InferenceEngine::SizeVector& getOrder() const {
return order;
}
private: private:
InferenceEngine::SizeVector order; InferenceEngine::SizeVector order;
InferenceEngine::Precision prec; InferenceEngine::Precision prec;

View File

@@ -46,6 +46,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() {
config.inConfs[0].constant = false; config.inConfs[0].constant = false;
config.outConfs[0].inPlace = -1; config.outConfs[0].inPlace = -1;
config.outConfs[0].constant = false; config.outConfs[0].constant = false;
if (isOptimized) {
config.inConfs[0].inPlace = 0;
config.outConfs[0].inPlace = 0;
}
if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) { if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) {
config.inConfs[0].desc = input; config.inConfs[0].desc = input;
config.outConfs[0].desc = output; config.outConfs[0].desc = output;
@@ -71,6 +75,7 @@ void MKLDNNReorderNode::createPrimitive() {
if (getSelectedPrimitiveDescriptor() == nullptr) if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set."; THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
if (!isOptimized)
createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(), createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(),
dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle()); dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle());
} }
@@ -169,6 +174,9 @@ bool MKLDNNReorderNode::created() const {
} }
void MKLDNNReorderNode::execute(mkldnn::stream strm) { void MKLDNNReorderNode::execute(mkldnn::stream strm) {
if (isOptimized)
return;
src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle()); src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle()); dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());

View File

@@ -29,6 +29,10 @@ public:
this->output = output; this->output = output;
} }
void setOptimized(bool isOptimized) {
this->isOptimized = isOptimized;
}
void setDynamicBatchLim(int lim) override; void setDynamicBatchLim(int lim) override;
bool canBeInPlace() const override { bool canBeInPlace() const override {
@@ -50,6 +54,8 @@ private:
MKLDNNMemoryPtr dst_blocked; MKLDNNMemoryPtr dst_blocked;
MKLDNNMemoryPtr src_blocked; MKLDNNMemoryPtr src_blocked;
bool isOptimized = false;
void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr); void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr);
}; };

View File

@@ -0,0 +1,35 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <tuple>
#include <vector>
#include <string>
#include "test_utils/cpu_test_utils.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
using namespace CPUTestUtils;
namespace LayerTestsDefinitions {
using FusePermuteAndReorderParams = std::tuple<
InferenceEngine::SizeVector, // Input shape
InferenceEngine::Precision // Input precision
>;
class FusePermuteAndReorderTest : public testing::WithParamInterface<FusePermuteAndReorderParams>, public CPUTestsBase,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj);
protected:
void SetUp() override;
std::string pluginTypeNode;
};
} // namespace LayerTestsDefinitions

View File

@@ -0,0 +1,82 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "subgraph_tests/include/fuse_permute_reorder.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace LayerTestsDefinitions {
std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj) {
std::ostringstream result;
SizeVector inputShape;
Precision inPrec;
std::tie(inputShape, inPrec) = obj.param;
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "Precision=" << inPrec.name();
return result.str();
}
void FusePermuteAndReorderTest::SetUp() {
targetDevice = CommonTestUtils::DEVICE_CPU;
SizeVector inputShape;
Precision inPrec;
std::tie(inputShape, inPrec) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto order = inputShape.size() == 5 ? std::vector<int64_t>{0, 2, 3, 4, 1} : std::vector<int64_t>{0, 2, 3, 1};
auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc;
auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
auto permute = std::make_shared<ngraph::opset5::Transpose>(paramOuts[0], constOrder);
permute->get_rt_info() = setCPUInfo({memFmt}, {memFmt}, {});
ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(permute)};
function = std::make_shared<ngraph::Function>(results, params, "PermuteReorder");
}
TEST_P(FusePermuteAndReorderTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo();
auto function = execGraphInfo.getFunction();
ASSERT_NE(nullptr, function);
bool permuteFound = false;
for (const auto &node : function->get_ops()) {
const auto & rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
auto it = rtInfo.find(paramName);
IE_ASSERT(rtInfo.end() != it);
auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
IE_ASSERT(nullptr != value);
return value->get();
};
if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") {
permuteFound = true;
break;
}
}
ASSERT_TRUE(!permuteFound);
}
const auto fusePermuteAndReorderParams = ::testing::Combine(
::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}),
::testing::Values(Precision::I8, Precision::U8)
);
INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderParams, FusePermuteAndReorderTest::getTestCaseName);
} // namespace LayerTestsDefinitions