[CPU] Reduced overheads in inference stage (#6794)
This commit is contained in:
parent
3ab533a89c
commit
802b5bcfbb
@ -722,8 +722,13 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
|
||||
|
||||
auto input = inputNodesMap.find(name);
|
||||
if (input != inputNodesMap.end()) {
|
||||
auto& inTensorDesc = in->getTensorDesc();
|
||||
auto node = input->second;
|
||||
auto childEdge = node->getChildEdgeAt(0);
|
||||
const auto& outDims = node->getOutputShapeAtPort(0);
|
||||
|
||||
const void *ext_data_ptr = in->cbuffer();
|
||||
void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
|
||||
void *inter_data_ptr = childEdge->getMemory().GetData();
|
||||
|
||||
if (ext_data_ptr != inter_data_ptr) {
|
||||
auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc());
|
||||
@ -731,17 +736,16 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
|
||||
auto ext_mem = MKLDNNMemory(eng);
|
||||
ext_mem.Create(ext_tdesc, ext_data_ptr, false);
|
||||
|
||||
input->second->getChildEdgeAt(0)->getMemory().SetData(ext_mem, 0, false);
|
||||
childEdge->getMemory().SetData(ext_mem, 0, false);
|
||||
}
|
||||
|
||||
// todo: make sure 'name' exists in this map...
|
||||
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
|
||||
if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
|
||||
_normalizePreprocMap[name].NormalizeImage(input->second->getOutputShapeAtPort(0),
|
||||
reinterpret_cast<float *>(inter_data_ptr),
|
||||
in->getTensorDesc().getLayout());
|
||||
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
|
||||
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
|
||||
inTensorDesc.getLayout());
|
||||
} else {
|
||||
IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
|
||||
IE_THROW() << "Mean image of type " << inTensorDesc.getPrecision().name() << " is unsupported";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -756,15 +760,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
for (auto &outputMap : outputNodesMap) {
|
||||
auto name = outputMap.first;
|
||||
auto node = outputMap.second;
|
||||
const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory();
|
||||
auto parentEdge = node->getParentEdgeAt(0);
|
||||
const MKLDNNMemory& intr_blob = parentEdge->getMemory();
|
||||
|
||||
auto ext_blob = out.find(name);
|
||||
if (ext_blob == out.end()) {
|
||||
const auto ext_blob_map = out.find(name);
|
||||
const auto ext_blob = ext_blob_map->second;
|
||||
if (ext_blob_map == out.end()) {
|
||||
IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\"";
|
||||
}
|
||||
|
||||
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
|
||||
auto &expectedDesc = ext_blob->second->getTensorDesc();
|
||||
auto &expectedDesc = ext_blob->getTensorDesc();
|
||||
|
||||
// TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
|
||||
// WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
|
||||
@ -797,27 +803,16 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
auto srcPrec = actualDesc.getPrecision();
|
||||
auto dstPrec = expectedDesc.getPrecision();
|
||||
|
||||
if (srcPrec == dstPrec && ext_blob->second->byteSize() != intr_blob.GetSize())
|
||||
if (srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize())
|
||||
IE_THROW() << "Output blob byte size is not equal network output byte size ("
|
||||
<< ext_blob->second->byteSize() << "!=" << intr_blob.GetSize() << ").";
|
||||
<< ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ").";
|
||||
|
||||
void *ext_blob_ptr = ext_blob->second->buffer();
|
||||
void *ext_blob_ptr = ext_blob->buffer();
|
||||
void *intr_blob_ptr = intr_blob.GetData();
|
||||
|
||||
// That is the same memory. No need to copy
|
||||
if (ext_blob_ptr == intr_blob_ptr) continue;
|
||||
|
||||
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
|
||||
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
|
||||
if (config.batchLimit) {
|
||||
if (node->isDynamicNode()) {
|
||||
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
|
||||
}
|
||||
int MB_to_process = node->batchToProcess();
|
||||
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
|
||||
}
|
||||
|
||||
if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
|
||||
auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
|
||||
auto outBloMem = MKLDNNMemory(eng);
|
||||
@ -825,6 +820,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
|
||||
outBloMem.SetData(intr_blob, 0, false);
|
||||
} else {
|
||||
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
|
||||
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
|
||||
if (config.batchLimit) {
|
||||
if (node->isDynamicNode()) {
|
||||
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
|
||||
}
|
||||
int MB_to_process = node->batchToProcess();
|
||||
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
|
||||
}
|
||||
|
||||
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
|
||||
}
|
||||
}
|
||||
|
@ -84,27 +84,27 @@ MKLDNNPlugin::MKLDNNInferRequest::~MKLDNNInferRequest() {
|
||||
}
|
||||
|
||||
void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
|
||||
bool needConvert = inPrec != inputBlob->getTensorDesc().getPrecision();
|
||||
auto& tensorDesc = inputBlob->getTensorDesc();
|
||||
bool needConvert = inPrec != tensorDesc.getPrecision();
|
||||
|
||||
if (inputBlob->cbuffer().as<const void *>() == nullptr) {
|
||||
const void* srcData = inputBlob->cbuffer().as<const void *>();
|
||||
if (srcData == nullptr) {
|
||||
IE_THROW() << "Input blob has no allocated memory";
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr iconv;
|
||||
if (needConvert) {
|
||||
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, inputBlob->getTensorDesc().getDims(),
|
||||
inputBlob->getTensorDesc().getLayout()));
|
||||
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, tensorDesc.getDims(), tensorDesc.getLayout()));
|
||||
iconv->allocate();
|
||||
if (inputBlob->size() != iconv->size())
|
||||
IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and "
|
||||
<< iconv->size();
|
||||
|
||||
void *srcData = inputBlob->cbuffer().as<void *>();
|
||||
void *dstData = iconv->buffer().as<void *>();
|
||||
if (dstData == nullptr) {
|
||||
IE_THROW() << "Converted input blob has no allocated memory";
|
||||
}
|
||||
cpu_convert(srcData, dstData, inputBlob->getTensorDesc().getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
|
||||
cpu_convert(srcData, dstData, tensorDesc.getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
|
||||
}
|
||||
|
||||
graph->PushInputData(inputName, needConvert ? iconv : inputBlob);
|
||||
@ -112,27 +112,30 @@ void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, I
|
||||
|
||||
void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
|
||||
for (auto input : _inputs) {
|
||||
if (!_networkInputs[input.first]) {
|
||||
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first;
|
||||
auto inputName = input.first;
|
||||
if (!_networkInputs[inputName]) {
|
||||
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName;
|
||||
}
|
||||
auto inPrec = input.second->getTensorDesc().getPrecision();
|
||||
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
|
||||
auto inputBlob = input.second;
|
||||
auto& inputTensorDesc = inputBlob->getTensorDesc();
|
||||
auto inPrec = inputTensorDesc.getPrecision();
|
||||
if (graph->hasMeanImageFor(inputName) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
|
||||
inPrec = InferenceEngine::Precision::FP32;
|
||||
} else {
|
||||
inPrec = normalizeToSupportedPrecision(inPrec);
|
||||
}
|
||||
|
||||
if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
|
||||
IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
|
||||
IE_THROW() << "Unsupported input precision " << inputTensorDesc.getPrecision();
|
||||
}
|
||||
|
||||
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
|
||||
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
|
||||
if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) {
|
||||
input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout());
|
||||
if (inputTensorDesc.getLayout() == InferenceEngine::ANY) {
|
||||
inputTensorDesc.setLayout(_networkInputs[inputName]->getLayout());
|
||||
}
|
||||
|
||||
pushInput(input.first, input.second, inPrec);
|
||||
pushInput(inputName, inputBlob, inPrec);
|
||||
}
|
||||
}
|
||||
|
||||
@ -502,71 +505,104 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *
|
||||
|
||||
void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
|
||||
for (auto& it : externalPtr) {
|
||||
auto input = graph->GetInputNodesMap().find(it.first);
|
||||
if (input != graph->GetInputNodesMap().end()) {
|
||||
if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
const auto& inputNodesMap = graph->GetInputNodesMap();
|
||||
auto input = inputNodesMap.find(it.first);
|
||||
if (input != inputNodesMap.end()) {
|
||||
MKLDNNNodePtr inputNodePtr = input->second;
|
||||
if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
continue;
|
||||
auto& childEdges = inputNodePtr->getChildEdges();
|
||||
// Input cannot be in-place with other primitives
|
||||
bool canBeInPlace = true;
|
||||
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
|
||||
auto& child = input->second->getChildEdgeAt(i)->getChild();
|
||||
if (child->isConstant())
|
||||
canBeInPlace = false;
|
||||
for (auto& childEdge : childEdges) {
|
||||
auto ce = childEdge.lock();
|
||||
if (!ce)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
|
||||
auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
|
||||
if (canBeInPlace && concat && concat->isOptimized())
|
||||
canBeInPlace = false;
|
||||
auto& child = ce->getChild();
|
||||
|
||||
// Cannot be in-place before split because split is using different ptrs without offsets
|
||||
auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
|
||||
if (canBeInPlace && split)
|
||||
canBeInPlace = false;
|
||||
|
||||
if (child->isInplace())
|
||||
canBeInPlace = false;
|
||||
for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
|
||||
if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
|
||||
input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
|
||||
canBeInPlace = false;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
|
||||
changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
MKLDNNNodePtr output;
|
||||
for (auto& out : graph->GetOutputNodesMap()) {
|
||||
if (out.first == it.first) {
|
||||
output = out.second;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (output) {
|
||||
if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
continue;
|
||||
bool canBeInPlace = true;
|
||||
void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
|
||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||
auto parent = output->getParentEdgeAt(0)->getParent();
|
||||
MKLDNNNodePtr previousParent;
|
||||
do {
|
||||
previousParent = parent;
|
||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
|
||||
if (child->isConstant()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
|
||||
if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
|
||||
parent = parent->getParentEdgeAt(i)->getParent();
|
||||
if (child->getType() == Concatenation && dynamic_cast<MKLDNNConcatNode*>(child.get())->isOptimized()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Cannot be in-place before split because split is using different ptrs without offsets
|
||||
if (child->getType() == Split) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (child->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
auto& edges = child->getChildEdges();
|
||||
for (auto& edge : edges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << child->getName() << " contains empty child edge";
|
||||
|
||||
if (e->getMemory().GetPrimitive().get_data_handle() == ce->getMemory().GetPrimitive().get_data_handle()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!canBeInPlace)
|
||||
break;
|
||||
}
|
||||
if (canBeInPlace) {
|
||||
for (auto& edge : childEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
|
||||
changeEdgePtr(e, it.second);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& outputNodesMap = graph->GetOutputNodesMap();
|
||||
auto output = outputNodesMap.find(it.first);
|
||||
if (output != outputNodesMap.end()) {
|
||||
auto parentEdge = output->second->getParentEdgeAt(0);
|
||||
if (parentEdge->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
continue;
|
||||
|
||||
bool canBeInPlace = true;
|
||||
void* defaultPtr = parentEdge->getMemory().GetPrimitivePtr()->get_data_handle();
|
||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||
auto parent = parentEdge->getParent();
|
||||
MKLDNNNodePtr previousParent;
|
||||
do {
|
||||
previousParent = parent;
|
||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
auto& parentEdges = parent->getParentEdges();
|
||||
for (auto& edge : parentEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
|
||||
|
||||
if (e->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
|
||||
parent = e->getParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (previousParent != parent);
|
||||
if (canBeInPlace)
|
||||
changeEdgePtr(output->getParentEdgeAt(0), it.second);
|
||||
changeEdgePtr(parentEdge, it.second);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Cannot find input/output blob: " << it.first;
|
||||
|
@ -770,15 +770,29 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isInplace() const {
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
auto config = selected_pd->getConfig();
|
||||
bool MKLDNNNode::isInPlace() {
|
||||
if (inplace == InPlaceType::Unknown) {
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
|
||||
for (auto &in : config.inConfs) if (in.inPlace >= 0) return true;
|
||||
for (auto &out : config.outConfs) if (out.inPlace >= 0) return true;
|
||||
return false;
|
||||
inplace = InPlaceType::NoInPlace;
|
||||
auto config = selected_pd->getConfig();
|
||||
for (auto &in : config.inConfs) {
|
||||
if (in.inPlace >= 0) {
|
||||
inplace = InPlaceType::InPlace;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (auto &out : config.outConfs) {
|
||||
if (out.inPlace >= 0) {
|
||||
inplace = InPlaceType::InPlace;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return inplace == InPlaceType::InPlace;
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isConstant() {
|
||||
|
@ -195,6 +195,8 @@ public:
|
||||
return engine;
|
||||
}
|
||||
|
||||
bool isInPlace();
|
||||
|
||||
// must be called only after MKLDNNGraph::InitEdges()
|
||||
virtual bool isExecutable() const {
|
||||
return true;
|
||||
@ -202,8 +204,6 @@ public:
|
||||
|
||||
bool isConstant();
|
||||
|
||||
bool isInplace() const;
|
||||
|
||||
bool isFusedWith(Type type) const;
|
||||
|
||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
@ -336,6 +336,10 @@ public:
|
||||
selectedPrimitiveDescriptorIndex = -1;
|
||||
else
|
||||
selectedPrimitiveDescriptorIndex = index;
|
||||
|
||||
// Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection
|
||||
// we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace()
|
||||
inplace = InPlaceType::Unknown;
|
||||
}
|
||||
|
||||
std::string getPrimitiveDescriptorType();
|
||||
@ -616,11 +620,17 @@ protected:
|
||||
bool permanent = false;
|
||||
bool temporary = false;
|
||||
int dynBatchLim = 0;
|
||||
enum class InPlaceType {
|
||||
Unknown,
|
||||
InPlace,
|
||||
NoInPlace
|
||||
};
|
||||
enum class ConstantType {
|
||||
Unknown,
|
||||
Const,
|
||||
NoConst
|
||||
};
|
||||
InPlaceType inplace = InPlaceType::Unknown;
|
||||
ConstantType constant = ConstantType::Unknown;
|
||||
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
|
||||
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
||||
|
Loading…
Reference in New Issue
Block a user