diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index d56c81ce79c..b8961c2e13b 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -722,8 +722,13 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: auto input = inputNodesMap.find(name); if (input != inputNodesMap.end()) { + auto& inTensorDesc = in->getTensorDesc(); + auto node = input->second; + auto childEdge = node->getChildEdgeAt(0); + const auto& outDims = node->getOutputShapeAtPort(0); + const void *ext_data_ptr = in->cbuffer(); - void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData(); + void *inter_data_ptr = childEdge->getMemory().GetData(); if (ext_data_ptr != inter_data_ptr) { auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); @@ -731,17 +736,16 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: auto ext_mem = MKLDNNMemory(eng); ext_mem.Create(ext_tdesc, ext_data_ptr, false); - input->second->getChildEdgeAt(0)->getMemory().SetData(ext_mem, 0, false); + childEdge->getMemory().SetData(ext_mem, 0, false); } // todo: make sure 'name' exists in this map... if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) { - if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { - _normalizePreprocMap[name].NormalizeImage(input->second->getOutputShapeAtPort(0), - reinterpret_cast(inter_data_ptr), - in->getTensorDesc().getLayout()); + if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) { + _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast(inter_data_ptr), + inTensorDesc.getLayout()); } else { - IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported"; + IE_THROW() << "Mean image of type " << inTensorDesc.getPrecision().name() << " is unsupported"; } } } else { @@ -756,15 +760,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { for (auto &outputMap : outputNodesMap) { auto name = outputMap.first; auto node = outputMap.second; - const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory(); + auto parentEdge = node->getParentEdgeAt(0); + const MKLDNNMemory& intr_blob = parentEdge->getMemory(); - auto ext_blob = out.find(name); - if (ext_blob == out.end()) { + const auto ext_blob_map = out.find(name); + const auto ext_blob = ext_blob_map->second; + if (ext_blob_map == out.end()) { IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\""; } const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc()); - auto &expectedDesc = ext_blob->second->getTensorDesc(); + auto &expectedDesc = ext_blob->getTensorDesc(); // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar @@ -797,27 +803,16 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { auto srcPrec = actualDesc.getPrecision(); auto dstPrec = expectedDesc.getPrecision(); - if (srcPrec == dstPrec && ext_blob->second->byteSize() != intr_blob.GetSize()) + if (srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize()) IE_THROW() << "Output blob byte size is not equal network output byte size (" - << ext_blob->second->byteSize() << "!=" << intr_blob.GetSize() << ")."; + << ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ")."; - void *ext_blob_ptr = ext_blob->second->buffer(); + void *ext_blob_ptr = ext_blob->buffer(); void *intr_blob_ptr = intr_blob.GetData(); // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; - size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount(); - // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT??? - // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm - if (config.batchLimit) { - if (node->isDynamicNode()) { - IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape"; - } - int MB_to_process = node->batchToProcess(); - size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies()) * MB_to_process; - } - if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); auto outBloMem = MKLDNNMemory(eng); @@ -825,6 +820,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { outBloMem.SetData(intr_blob, 0, false); } else { + size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount(); + // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT??? + // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm + if (config.batchLimit) { + if (node->isDynamicNode()) { + IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape"; + } + int MB_to_process = node->batchToProcess(); + size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies()) * MB_to_process; + } + cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy); } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 3b3e5b099ba..72efcfcfe37 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -84,27 +84,27 @@ MKLDNNPlugin::MKLDNNInferRequest::~MKLDNNInferRequest() { } void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) { - bool needConvert = inPrec != inputBlob->getTensorDesc().getPrecision(); + auto& tensorDesc = inputBlob->getTensorDesc(); + bool needConvert = inPrec != tensorDesc.getPrecision(); - if (inputBlob->cbuffer().as() == nullptr) { + const void* srcData = inputBlob->cbuffer().as(); + if (srcData == nullptr) { IE_THROW() << "Input blob has no allocated memory"; } InferenceEngine::Blob::Ptr iconv; if (needConvert) { - iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, inputBlob->getTensorDesc().getDims(), - inputBlob->getTensorDesc().getLayout())); + iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, tensorDesc.getDims(), tensorDesc.getLayout())); iconv->allocate(); if (inputBlob->size() != iconv->size()) IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and " << iconv->size(); - void *srcData = inputBlob->cbuffer().as(); void *dstData = iconv->buffer().as(); if (dstData == nullptr) { IE_THROW() << "Converted input blob has no allocated memory"; } - cpu_convert(srcData, dstData, inputBlob->getTensorDesc().getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size()); + cpu_convert(srcData, dstData, tensorDesc.getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size()); } graph->PushInputData(inputName, needConvert ? iconv : inputBlob); @@ -112,27 +112,30 @@ void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, I void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() { for (auto input : _inputs) { - if (!_networkInputs[input.first]) { - IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first; + auto inputName = input.first; + if (!_networkInputs[inputName]) { + IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName; } - auto inPrec = input.second->getTensorDesc().getPrecision(); - if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) { + auto inputBlob = input.second; + auto& inputTensorDesc = inputBlob->getTensorDesc(); + auto inPrec = inputTensorDesc.getPrecision(); + if (graph->hasMeanImageFor(inputName) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) { inPrec = InferenceEngine::Precision::FP32; } else { inPrec = normalizeToSupportedPrecision(inPrec); } if (inPrec == InferenceEngine::Precision::UNSPECIFIED) { - IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision(); + IE_THROW() << "Unsupported input precision " << inputTensorDesc.getPrecision(); } // User can initialize input via setBlob API using tensorDesc with default (ANY) layout. // Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input. - if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) { - input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout()); + if (inputTensorDesc.getLayout() == InferenceEngine::ANY) { + inputTensorDesc.setLayout(_networkInputs[inputName]->getLayout()); } - pushInput(input.first, input.second, inPrec); + pushInput(inputName, inputBlob, inPrec); } } @@ -502,71 +505,104 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void * void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { for (auto& it : externalPtr) { - auto input = graph->GetInputNodesMap().find(it.first); - if (input != graph->GetInputNodesMap().end()) { - if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second) + const auto& inputNodesMap = graph->GetInputNodesMap(); + auto input = inputNodesMap.find(it.first); + if (input != inputNodesMap.end()) { + MKLDNNNodePtr inputNodePtr = input->second; + if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second) continue; + auto& childEdges = inputNodePtr->getChildEdges(); // Input cannot be in-place with other primitives bool canBeInPlace = true; - for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) { - auto& child = input->second->getChildEdgeAt(i)->getChild(); - if (child->isConstant()) - canBeInPlace = false; + for (auto& childEdge : childEdges) { + auto ce = childEdge.lock(); + if (!ce) + IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge"; - auto* concat = dynamic_cast(child.get()); - if (canBeInPlace && concat && concat->isOptimized()) - canBeInPlace = false; + auto& child = ce->getChild(); - // Cannot be in-place before split because split is using different ptrs without offsets - auto* split = dynamic_cast(child.get()); - if (canBeInPlace && split) - canBeInPlace = false; - - if (child->isInplace()) - canBeInPlace = false; - for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) { - if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() == - input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle()) - canBeInPlace = false; - } - } - for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) { - changeEdgePtr(input->second->getChildEdgeAt(i), it.second); - } - continue; - } - - MKLDNNNodePtr output; - for (auto& out : graph->GetOutputNodesMap()) { - if (out.first == it.first) { - output = out.second; - break; - } - } - if (output) { - if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second) - continue; - bool canBeInPlace = true; - void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle(); - // Cannot be in-place after concat because concat is using different ptrs without offsets - auto parent = output->getParentEdgeAt(0)->getParent(); - MKLDNNNodePtr previousParent; - do { - previousParent = parent; - if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) { + if (child->isConstant()) { canBeInPlace = false; break; } - for (size_t i = 0; i < parent->getParentEdges().size(); i++) { - if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) { - parent = parent->getParentEdgeAt(i)->getParent(); + if (child->getType() == Concatenation && dynamic_cast(child.get())->isOptimized()) { + canBeInPlace = false; + break; + } + + // Cannot be in-place before split because split is using different ptrs without offsets + if (child->getType() == Split) { + canBeInPlace = false; + break; + } + + if (child->isInPlace()) { + canBeInPlace = false; + break; + } + + auto& edges = child->getChildEdges(); + for (auto& edge : edges) { + auto e = edge.lock(); + if (!e) + IE_THROW() << "Node " << child->getName() << " contains empty child edge"; + + if (e->getMemory().GetPrimitive().get_data_handle() == ce->getMemory().GetPrimitive().get_data_handle()) { + canBeInPlace = false; + break; + } + } + + if (!canBeInPlace) + break; + } + if (canBeInPlace) { + for (auto& edge : childEdges) { + auto e = edge.lock(); + if (!e) + IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge"; + + changeEdgePtr(e, it.second); + } + } + + continue; + } + + const auto& outputNodesMap = graph->GetOutputNodesMap(); + auto output = outputNodesMap.find(it.first); + if (output != outputNodesMap.end()) { + auto parentEdge = output->second->getParentEdgeAt(0); + if (parentEdge->getMemory().GetPrimitive().get_data_handle() == it.second) + continue; + + bool canBeInPlace = true; + void* defaultPtr = parentEdge->getMemory().GetPrimitivePtr()->get_data_handle(); + // Cannot be in-place after concat because concat is using different ptrs without offsets + auto parent = parentEdge->getParent(); + MKLDNNNodePtr previousParent; + do { + previousParent = parent; + if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) { + canBeInPlace = false; + break; + } + + auto& parentEdges = parent->getParentEdges(); + for (auto& edge : parentEdges) { + auto e = edge.lock(); + if (!e) + IE_THROW() << "Node " << parent->getName() << " contains empty parent edge"; + + if (e->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) { + parent = e->getParent(); break; } } } while (previousParent != parent); if (canBeInPlace) - changeEdgePtr(output->getParentEdgeAt(0), it.second); + changeEdgePtr(parentEdge, it.second); continue; } IE_THROW() << "Cannot find input/output blob: " << it.first; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index cda4c052816..11fe2d4006b 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -770,15 +770,29 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de } } -bool MKLDNNNode::isInplace() const { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); +bool MKLDNNNode::isInPlace() { + if (inplace == InPlaceType::Unknown) { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; - for (auto &in : config.inConfs) if (in.inPlace >= 0) return true; - for (auto &out : config.outConfs) if (out.inPlace >= 0) return true; - return false; + inplace = InPlaceType::NoInPlace; + auto config = selected_pd->getConfig(); + for (auto &in : config.inConfs) { + if (in.inPlace >= 0) { + inplace = InPlaceType::InPlace; + break; + } + } + for (auto &out : config.outConfs) { + if (out.inPlace >= 0) { + inplace = InPlaceType::InPlace; + break; + } + } + } + + return inplace == InPlaceType::InPlace; } bool MKLDNNNode::isConstant() { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 6406101a878..3e0448f0db6 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -195,6 +195,8 @@ public: return engine; } + bool isInPlace(); + // must be called only after MKLDNNGraph::InitEdges() virtual bool isExecutable() const { return true; @@ -202,8 +204,6 @@ public: bool isConstant(); - bool isInplace() const; - bool isFusedWith(Type type) const; void addFusedNode(const MKLDNNNodePtr &fusingNode) { @@ -336,6 +336,10 @@ public: selectedPrimitiveDescriptorIndex = -1; else selectedPrimitiveDescriptorIndex = index; + + // Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection + // we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace() + inplace = InPlaceType::Unknown; } std::string getPrimitiveDescriptorType(); @@ -616,11 +620,17 @@ protected: bool permanent = false; bool temporary = false; int dynBatchLim = 0; + enum class InPlaceType { + Unknown, + InPlace, + NoInPlace + }; enum class ConstantType { Unknown, Const, NoConst }; + InPlaceType inplace = InPlaceType::Unknown; ConstantType constant = ConstantType::Unknown; std::vector internalBlobs; std::vector internalBlobMemory;