[CPU] added MergePermuteAndReorder optimization + added test (#2519)
This commit is contained in:
@@ -124,3 +124,16 @@ bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc
|
||||
return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() &&
|
||||
in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum);
|
||||
}
|
||||
|
||||
std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
|
||||
std::string inArgs, outArgs;
|
||||
if (parentDesc.getPrecision() != childDesc.getPrecision()) {
|
||||
inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
|
||||
outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
|
||||
}
|
||||
if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
|
||||
inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
|
||||
outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
|
||||
}
|
||||
return inArgs + "_" + outArgs;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ public:
|
||||
static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType);
|
||||
static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc);
|
||||
static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2);
|
||||
static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc);
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
@@ -463,18 +463,6 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
void MKLDNNGraph::InitEdges() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
|
||||
|
||||
auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
|
||||
std::string inArgs, outArgs;
|
||||
if (parentDesc.getPrecision() != childDesc.getPrecision()) {
|
||||
inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
|
||||
outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
|
||||
}
|
||||
if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
|
||||
inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
|
||||
outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
|
||||
}
|
||||
return inArgs + "_" + outArgs;
|
||||
};
|
||||
size_t numberOfEdges = graphEdges.size();
|
||||
|
||||
std::unordered_set<std::string> uniqueLayerNames;
|
||||
@@ -487,8 +475,8 @@ void MKLDNNGraph::InitEdges() {
|
||||
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
|
||||
auto &edge = graphEdges[i];
|
||||
std::string basicLayerName = edge->getParent()->getName() + "_" +
|
||||
reorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
|
||||
edge->getChild()->getName();
|
||||
MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
|
||||
edge->getChild()->getName();
|
||||
std::string layerName = basicLayerName;
|
||||
int idx = 0;
|
||||
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
|
||||
@@ -496,43 +484,7 @@ void MKLDNNGraph::InitEdges() {
|
||||
layerName = basicLayerName + "_" + std::to_string(idx);
|
||||
}
|
||||
uniqueLayerNames.insert(layerName);
|
||||
CNNLayerPtr layer(new CNNLayer({layerName,
|
||||
"Reorder",
|
||||
edge->getInputDesc().getPrecision()}));
|
||||
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
|
||||
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
|
||||
if (reorderPtr) {
|
||||
reorderPtr->setDescs(edge->getInputDesc(), edge->getOutputDesc());
|
||||
}
|
||||
|
||||
auto oIndex = edge->getOutputNum();
|
||||
auto iIndex = edge->getInputNum();
|
||||
if (iIndex < 0 || oIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
|
||||
<< edge->getParent()->getName() << " and "
|
||||
<< edge->getChild()->getName() << ".";
|
||||
|
||||
edge->drop();
|
||||
|
||||
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
|
||||
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
|
||||
|
||||
// Add edge for beforeNode
|
||||
beforeNode->getChild()->parentEdges.push_back(beforeNode);
|
||||
edge->getParent()->childEdges.push_back(beforeNode);
|
||||
|
||||
// Add edge for afterNode
|
||||
afterNode->getParent()->childEdges.push_back(afterNode);
|
||||
edge->getChild()->parentEdges.push_back(afterNode);
|
||||
|
||||
newReorder->getSupportedDescriptors();
|
||||
newReorder->initSupportedPrimitiveDescriptors();
|
||||
newReorder->selectOptimalPrimitiveDescriptor();
|
||||
|
||||
graphEdges.push_back(beforeNode);
|
||||
graphEdges.push_back(afterNode);
|
||||
|
||||
graphNodes.push_back(newReorder);
|
||||
InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
|
||||
graphEdges.erase(graphEdges.begin() + i);
|
||||
i--;
|
||||
numberOfEdges--;
|
||||
@@ -1131,6 +1083,57 @@ void MKLDNNGraph::RemoveDroppedEdges() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
|
||||
bool isOptimized, InferenceEngine::Blob::Ptr scales) {
|
||||
CNNLayerPtr layer(new CNNLayer({layerName,
|
||||
"Reorder",
|
||||
inDesc.getPrecision()}));
|
||||
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
|
||||
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
|
||||
if (reorderPtr) {
|
||||
reorderPtr->setDescs(inDesc, outDesc);
|
||||
reorderPtr->_scales = scales;
|
||||
}
|
||||
|
||||
auto oIndex = edge->getOutputNum();
|
||||
auto iIndex = edge->getInputNum();
|
||||
if (iIndex < 0 || oIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
|
||||
<< edge->getParent()->getName() << " and "
|
||||
<< edge->getChild()->getName() << ".";
|
||||
|
||||
edge->drop();
|
||||
|
||||
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
|
||||
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
|
||||
|
||||
// Add edge for beforeNode
|
||||
beforeNode->getChild()->parentEdges.push_back(beforeNode);
|
||||
edge->getParent()->childEdges.push_back(beforeNode);
|
||||
|
||||
// Add edge for afterNode
|
||||
afterNode->getParent()->childEdges.push_back(afterNode);
|
||||
edge->getChild()->parentEdges.push_back(afterNode);
|
||||
|
||||
reorderPtr->setOptimized(isOptimized);
|
||||
|
||||
newReorder->getSupportedDescriptors();
|
||||
newReorder->initSupportedPrimitiveDescriptors();
|
||||
newReorder->selectOptimalPrimitiveDescriptor();
|
||||
|
||||
graphEdges.push_back(beforeNode);
|
||||
graphEdges.push_back(afterNode);
|
||||
|
||||
// Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
|
||||
// Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
|
||||
if (!isOptimized) {
|
||||
beforeNode->getDesc();
|
||||
afterNode->getDesc();
|
||||
}
|
||||
|
||||
graphNodes.push_back(newReorder);
|
||||
}
|
||||
|
||||
void MKLDNNGraph::dumpToDotFile(std::string file) const {
|
||||
std::ofstream dot;
|
||||
dot.open(file);
|
||||
|
||||
@@ -92,6 +92,28 @@ public:
|
||||
void DropNode(const MKLDNNNodePtr& node);
|
||||
void DropDWConvNode(const MKLDNNNodePtr& node);
|
||||
|
||||
/**
|
||||
* @brief Insert Reorder node at the edge-specified location.
|
||||
* The Reorder node must be inserted in case when there are inplace conflicts or the input and output tensor descriptors do not match.
|
||||
* The Reorder node rearranges the elements in memory according to inDesc and outDesc, or reinterprets memory descriptor without
|
||||
* rearrangement of elements if isOptimized is true.
|
||||
* @param edge
|
||||
* pointer to the edge in the graph where Reorder node will be inserted
|
||||
* @param layerName
|
||||
* Reorder layer name
|
||||
* @param inDesc
|
||||
* input tensor descriptor
|
||||
* @param outDesc
|
||||
* output tensor descriptor
|
||||
* @param isOptimized
|
||||
* optimization flag; if isOptimized is true then Reorder node does nothing
|
||||
* @param scales
|
||||
* pointer to the blob containing scales
|
||||
* @return none.
|
||||
*/
|
||||
void InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, const InferenceEngine::TensorDesc& outDesc,
|
||||
bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
|
||||
|
||||
InferenceEngine::CNNNetwork dump() const;
|
||||
|
||||
template<typename NET>
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "nodes/mkldnn_bin_conv_node.h"
|
||||
#include "nodes/mkldnn_quantize_node.h"
|
||||
#include "nodes/mkldnn_mvn_node.h"
|
||||
#include <nodes/mkldnn_permute_node.h>
|
||||
#include "nodes/mkldnn_resample_node.h"
|
||||
#include "nodes/mkldnn_interpolate_node.h"
|
||||
#include "nodes/mkldnn_input_node.h"
|
||||
@@ -151,6 +152,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
|
||||
graph.RemoveDroppedNodes();
|
||||
#endif
|
||||
|
||||
MergePermuteAndReorder(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
graph.RemoveDroppedEdges();
|
||||
}
|
||||
|
||||
@@ -1812,8 +1816,9 @@ void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
|
||||
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
|
||||
void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
|
||||
std::set<MKLDNNNodePtr> processed;
|
||||
std::vector<MKLDNNNodePtr> newNodes;
|
||||
for (MKLDNNNodePtr& node : graph.GetNodes()) {
|
||||
int graphNodesSize = graph.GetNodes().size();
|
||||
for (int i = 0; i < graphNodesSize; i++) {
|
||||
MKLDNNNodePtr& node = graph.GetNodes()[i];
|
||||
if (processed.find(node) == processed.end() && node->getType() == Reorder
|
||||
&& node->getChildEdges().size() == 1
|
||||
&& node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) {
|
||||
@@ -1855,54 +1860,10 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
|
||||
|
||||
|
||||
std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
|
||||
CNNLayerPtr layer(new CNNLayer({layerName,
|
||||
"Reorder",
|
||||
n->getInput().getPrecision()}));
|
||||
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, graph.getEngine(), graph.weightsCache));
|
||||
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
|
||||
if (reorderPtr) {
|
||||
reorderPtr->setDescs(n->getInput(), nn->getOutput());
|
||||
reorderPtr->_scales = scales;
|
||||
}
|
||||
|
||||
// new !!!
|
||||
auto oIndex = edge->getOutputNum();
|
||||
auto iIndex = edge->getInputNum();
|
||||
if (iIndex < 0 || oIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
|
||||
<< edge->getParent()->getName() << " and "
|
||||
<< edge->getChild()->getName() << ".";
|
||||
edge->drop();
|
||||
|
||||
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
|
||||
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
|
||||
|
||||
// Add edge for beforeNode
|
||||
beforeNode->getChild()->parentEdges.push_back(beforeNode);
|
||||
edge->getParent()->childEdges.push_back(beforeNode);
|
||||
|
||||
// Add edge for afterNode
|
||||
afterNode->getParent()->childEdges.push_back(afterNode);
|
||||
edge->getChild()->parentEdges.push_back(afterNode);
|
||||
|
||||
newReorder->getSupportedDescriptors();
|
||||
newReorder->initSupportedPrimitiveDescriptors();
|
||||
newReorder->selectOptimalPrimitiveDescriptor();
|
||||
|
||||
graph.GetEdges().push_back(beforeNode);
|
||||
graph.GetEdges().push_back(afterNode);
|
||||
|
||||
// Just to check accordance
|
||||
afterNode->getDesc();
|
||||
beforeNode->getDesc();
|
||||
|
||||
newNodes.push_back(newReorder);
|
||||
graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales);
|
||||
graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
|
||||
}
|
||||
}
|
||||
for (MKLDNNNodePtr& node : newNodes) {
|
||||
graph.GetNodes().push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
|
||||
@@ -2247,3 +2208,142 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
auto isSutableParentNode = [](MKLDNNNodePtr node) {
|
||||
return node->getType() == Permute && node->getChildEdges().size() == 1;
|
||||
};
|
||||
|
||||
auto isSutableChildNode = [](MKLDNNNodePtr node) {
|
||||
return node->getType() == Reorder && node->getChildEdges().size() == 1;
|
||||
};
|
||||
|
||||
// Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes,
|
||||
// the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation.
|
||||
auto checkAscendingSummaryOrder = [](std::shared_ptr<MKLDNNNode> &parentNode, std::shared_ptr<MKLDNNNode> &childNode) -> bool {
|
||||
auto* permuteNode = dynamic_cast<MKLDNNPermuteNode*>(parentNode.get());
|
||||
auto* reorderNode = dynamic_cast<MKLDNNReorderNode*>(childNode.get());
|
||||
if (!permuteNode || !reorderNode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto& permuteOrder = permuteNode->getOrder();
|
||||
auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
|
||||
auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder();
|
||||
auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
|
||||
|
||||
if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// revLayoutOrder - reverse permutation for layoutOrder
|
||||
auto revLayoutOrder = SizeVector(layoutOrder.size());
|
||||
for (int i = 0; i < revLayoutOrder.size(); i++) {
|
||||
revLayoutOrder[layoutOrder[i]] = i;
|
||||
}
|
||||
|
||||
// newPermuteOrder - Permute layout-aware permutation
|
||||
auto newPermuteOrder = SizeVector(permuteOrder.size());
|
||||
for (int i = 0; i < newPermuteOrder.size(); i++) {
|
||||
newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]];
|
||||
}
|
||||
|
||||
// reorderOrder - Reorder layout-aware permutation
|
||||
auto reorderOrder = SizeVector(outOrder.size());
|
||||
for (int i = 0; i < reorderOrder.size(); i++) {
|
||||
for (int j = 0; j < reorderOrder.size(); j++) {
|
||||
if (outOrder[i] == inOrder[j]) {
|
||||
reorderOrder[i] = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// summaryOrder - resulting Permute+Reorder permutation
|
||||
auto summaryOrder = SizeVector(permuteOrder.size());
|
||||
for (int i = 0; i < summaryOrder.size(); i++) {
|
||||
summaryOrder[i] = reorderOrder[newPermuteOrder[i]];
|
||||
}
|
||||
|
||||
// check that Permute+Reorder is the identical permutation
|
||||
for (int i = 0; i < summaryOrder.size(); i++) {
|
||||
if (summaryOrder[i] != i) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// Permute and Reorder do opposite permutation to each other.
|
||||
// Example:
|
||||
// chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
|
||||
// Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
|
||||
// which will just reinterprets layout without physical change of the memory.
|
||||
// Two cases are possible:
|
||||
// 1) inPrec = outPrec
|
||||
// In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing.
|
||||
// 2) inPrec != outPrec
|
||||
// As in the first case, we also replace Permute+Reorder pattern with a new Reorder.
|
||||
// Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
|
||||
// to the output precision (outPrec)
|
||||
auto mergePermuteAndReorder = [&](std::shared_ptr<MKLDNNNode>& parentNode, std::shared_ptr<MKLDNNNode>& childNode) {
|
||||
auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent();
|
||||
auto childChildNode = childNode->getChildEdgeAt(0)->getChild();
|
||||
|
||||
graph.DropNode(parentNode);
|
||||
graph.DropNode(childNode);
|
||||
|
||||
auto inDesc = parentParentNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
|
||||
auto outDesc = childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
|
||||
|
||||
auto inPrec = inDesc.getPrecision();
|
||||
auto outPrec = outDesc.getPrecision();
|
||||
|
||||
auto reorderInDesc = TensorDesc(inDesc);
|
||||
auto reorderOutDesc = TensorDesc(outDesc);
|
||||
reorderOutDesc.setPrecision(inPrec);
|
||||
|
||||
std::string reorderlayerName = parentParentNode->getName() + "_" +
|
||||
MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake";
|
||||
|
||||
MKLDNNEdgePtr edge;
|
||||
for (auto &childEdge : parentParentNode->getChildEdges()) {
|
||||
if (childEdge.lock()->getChild() == childChildNode) {
|
||||
edge = childEdge.lock();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true);
|
||||
|
||||
// case 2
|
||||
if (inPrec != outPrec) {
|
||||
auto reorderNode = parentParentNode->getChildEdgeAt(0)->getChild();
|
||||
auto reorderInDesc2 = TensorDesc(reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc);
|
||||
auto reorderOutDesc2 = TensorDesc(childChildNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc);
|
||||
|
||||
std::string reorderLayerName2 = reorderNode->getName() + "_" +
|
||||
MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName();
|
||||
|
||||
graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false);
|
||||
}
|
||||
};
|
||||
|
||||
for (int i = 0; i < graphNodes.size(); i++) {
|
||||
auto parentNode = graphNodes[i];
|
||||
if (!isSutableParentNode(parentNode)) {
|
||||
continue;
|
||||
}
|
||||
auto childNode = parentNode->getChildEdgeAt(0)->getChild();
|
||||
if (!isSutableChildNode(childNode)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (checkAscendingSummaryOrder(parentNode, childNode)) {
|
||||
mergePermuteAndReorder(parentNode, childNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ private:
|
||||
void FuseEltwiseAndSimple(MKLDNNGraph &graph);
|
||||
void FuseScaleShiftAndQuantize(MKLDNNGraph &graph);
|
||||
void FuseClampAndQuantize(MKLDNNGraph &graph);
|
||||
void MergePermuteAndReorder(MKLDNNGraph &graph);
|
||||
|
||||
bool IsOneOf(Type type, std::vector<Type> types);
|
||||
bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
|
||||
|
||||
@@ -55,6 +55,10 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
const InferenceEngine::SizeVector& getOrder() const {
|
||||
return order;
|
||||
}
|
||||
|
||||
private:
|
||||
InferenceEngine::SizeVector order;
|
||||
InferenceEngine::Precision prec;
|
||||
|
||||
@@ -46,6 +46,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() {
|
||||
config.inConfs[0].constant = false;
|
||||
config.outConfs[0].inPlace = -1;
|
||||
config.outConfs[0].constant = false;
|
||||
if (isOptimized) {
|
||||
config.inConfs[0].inPlace = 0;
|
||||
config.outConfs[0].inPlace = 0;
|
||||
}
|
||||
if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) {
|
||||
config.inConfs[0].desc = input;
|
||||
config.outConfs[0].desc = output;
|
||||
@@ -71,6 +75,7 @@ void MKLDNNReorderNode::createPrimitive() {
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
|
||||
if (!isOptimized)
|
||||
createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(),
|
||||
dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle());
|
||||
}
|
||||
@@ -169,6 +174,9 @@ bool MKLDNNReorderNode::created() const {
|
||||
}
|
||||
|
||||
void MKLDNNReorderNode::execute(mkldnn::stream strm) {
|
||||
if (isOptimized)
|
||||
return;
|
||||
|
||||
src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
|
||||
dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
|
||||
|
||||
|
||||
@@ -29,6 +29,10 @@ public:
|
||||
this->output = output;
|
||||
}
|
||||
|
||||
void setOptimized(bool isOptimized) {
|
||||
this->isOptimized = isOptimized;
|
||||
}
|
||||
|
||||
void setDynamicBatchLim(int lim) override;
|
||||
|
||||
bool canBeInPlace() const override {
|
||||
@@ -50,6 +54,8 @@ private:
|
||||
MKLDNNMemoryPtr dst_blocked;
|
||||
MKLDNNMemoryPtr src_blocked;
|
||||
|
||||
bool isOptimized = false;
|
||||
|
||||
void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr);
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
using FusePermuteAndReorderParams = std::tuple<
|
||||
InferenceEngine::SizeVector, // Input shape
|
||||
InferenceEngine::Precision // Input precision
|
||||
>;
|
||||
|
||||
class FusePermuteAndReorderTest : public testing::WithParamInterface<FusePermuteAndReorderParams>, public CPUTestsBase,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
std::string pluginTypeNode;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
@@ -0,0 +1,82 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "subgraph_tests/include/fuse_permute_reorder.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj) {
|
||||
std::ostringstream result;
|
||||
SizeVector inputShape;
|
||||
Precision inPrec;
|
||||
std::tie(inputShape, inPrec) = obj.param;
|
||||
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||
result << "Precision=" << inPrec.name();
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void FusePermuteAndReorderTest::SetUp() {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
SizeVector inputShape;
|
||||
Precision inPrec;
|
||||
|
||||
std::tie(inputShape, inPrec) = this->GetParam();
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
|
||||
auto order = inputShape.size() == 5 ? std::vector<int64_t>{0, 2, 3, 4, 1} : std::vector<int64_t>{0, 2, 3, 1};
|
||||
auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc;
|
||||
|
||||
auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
|
||||
|
||||
auto permute = std::make_shared<ngraph::opset5::Transpose>(paramOuts[0], constOrder);
|
||||
|
||||
permute->get_rt_info() = setCPUInfo({memFmt}, {memFmt}, {});
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(permute)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "PermuteReorder");
|
||||
}
|
||||
|
||||
TEST_P(FusePermuteAndReorderTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
|
||||
InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo();
|
||||
auto function = execGraphInfo.getFunction();
|
||||
ASSERT_NE(nullptr, function);
|
||||
bool permuteFound = false;
|
||||
for (const auto &node : function->get_ops()) {
|
||||
const auto & rtInfo = node->get_rt_info();
|
||||
auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
|
||||
auto it = rtInfo.find(paramName);
|
||||
IE_ASSERT(rtInfo.end() != it);
|
||||
auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
|
||||
IE_ASSERT(nullptr != value);
|
||||
return value->get();
|
||||
};
|
||||
if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") {
|
||||
permuteFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(!permuteFound);
|
||||
}
|
||||
|
||||
const auto fusePermuteAndReorderParams = ::testing::Combine(
|
||||
::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}),
|
||||
::testing::Values(Precision::I8, Precision::U8)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderParams, FusePermuteAndReorderTest::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
Reference in New Issue
Block a user