[CPU] Reduced overheads in inference stage (#6794)
This commit is contained in:
parent
3ab533a89c
commit
802b5bcfbb
@ -722,8 +722,13 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
|
|||||||
|
|
||||||
auto input = inputNodesMap.find(name);
|
auto input = inputNodesMap.find(name);
|
||||||
if (input != inputNodesMap.end()) {
|
if (input != inputNodesMap.end()) {
|
||||||
|
auto& inTensorDesc = in->getTensorDesc();
|
||||||
|
auto node = input->second;
|
||||||
|
auto childEdge = node->getChildEdgeAt(0);
|
||||||
|
const auto& outDims = node->getOutputShapeAtPort(0);
|
||||||
|
|
||||||
const void *ext_data_ptr = in->cbuffer();
|
const void *ext_data_ptr = in->cbuffer();
|
||||||
void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
|
void *inter_data_ptr = childEdge->getMemory().GetData();
|
||||||
|
|
||||||
if (ext_data_ptr != inter_data_ptr) {
|
if (ext_data_ptr != inter_data_ptr) {
|
||||||
auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc());
|
auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc());
|
||||||
@ -731,17 +736,16 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
|
|||||||
auto ext_mem = MKLDNNMemory(eng);
|
auto ext_mem = MKLDNNMemory(eng);
|
||||||
ext_mem.Create(ext_tdesc, ext_data_ptr, false);
|
ext_mem.Create(ext_tdesc, ext_data_ptr, false);
|
||||||
|
|
||||||
input->second->getChildEdgeAt(0)->getMemory().SetData(ext_mem, 0, false);
|
childEdge->getMemory().SetData(ext_mem, 0, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: make sure 'name' exists in this map...
|
// todo: make sure 'name' exists in this map...
|
||||||
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
|
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
|
||||||
if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
|
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
|
||||||
_normalizePreprocMap[name].NormalizeImage(input->second->getOutputShapeAtPort(0),
|
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
|
||||||
reinterpret_cast<float *>(inter_data_ptr),
|
inTensorDesc.getLayout());
|
||||||
in->getTensorDesc().getLayout());
|
|
||||||
} else {
|
} else {
|
||||||
IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
|
IE_THROW() << "Mean image of type " << inTensorDesc.getPrecision().name() << " is unsupported";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -756,15 +760,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
|||||||
for (auto &outputMap : outputNodesMap) {
|
for (auto &outputMap : outputNodesMap) {
|
||||||
auto name = outputMap.first;
|
auto name = outputMap.first;
|
||||||
auto node = outputMap.second;
|
auto node = outputMap.second;
|
||||||
const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory();
|
auto parentEdge = node->getParentEdgeAt(0);
|
||||||
|
const MKLDNNMemory& intr_blob = parentEdge->getMemory();
|
||||||
|
|
||||||
auto ext_blob = out.find(name);
|
const auto ext_blob_map = out.find(name);
|
||||||
if (ext_blob == out.end()) {
|
const auto ext_blob = ext_blob_map->second;
|
||||||
|
if (ext_blob_map == out.end()) {
|
||||||
IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\"";
|
IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\"";
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
|
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
|
||||||
auto &expectedDesc = ext_blob->second->getTensorDesc();
|
auto &expectedDesc = ext_blob->getTensorDesc();
|
||||||
|
|
||||||
// TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
|
// TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
|
||||||
// WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
|
// WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
|
||||||
@ -797,27 +803,16 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
|||||||
auto srcPrec = actualDesc.getPrecision();
|
auto srcPrec = actualDesc.getPrecision();
|
||||||
auto dstPrec = expectedDesc.getPrecision();
|
auto dstPrec = expectedDesc.getPrecision();
|
||||||
|
|
||||||
if (srcPrec == dstPrec && ext_blob->second->byteSize() != intr_blob.GetSize())
|
if (srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize())
|
||||||
IE_THROW() << "Output blob byte size is not equal network output byte size ("
|
IE_THROW() << "Output blob byte size is not equal network output byte size ("
|
||||||
<< ext_blob->second->byteSize() << "!=" << intr_blob.GetSize() << ").";
|
<< ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ").";
|
||||||
|
|
||||||
void *ext_blob_ptr = ext_blob->second->buffer();
|
void *ext_blob_ptr = ext_blob->buffer();
|
||||||
void *intr_blob_ptr = intr_blob.GetData();
|
void *intr_blob_ptr = intr_blob.GetData();
|
||||||
|
|
||||||
// That is the same memory. No need to copy
|
// That is the same memory. No need to copy
|
||||||
if (ext_blob_ptr == intr_blob_ptr) continue;
|
if (ext_blob_ptr == intr_blob_ptr) continue;
|
||||||
|
|
||||||
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
|
||||||
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
|
|
||||||
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
|
|
||||||
if (config.batchLimit) {
|
|
||||||
if (node->isDynamicNode()) {
|
|
||||||
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
|
|
||||||
}
|
|
||||||
int MB_to_process = node->batchToProcess();
|
|
||||||
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
|
if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
|
||||||
auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
|
auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
|
||||||
auto outBloMem = MKLDNNMemory(eng);
|
auto outBloMem = MKLDNNMemory(eng);
|
||||||
@ -825,6 +820,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
|||||||
|
|
||||||
outBloMem.SetData(intr_blob, 0, false);
|
outBloMem.SetData(intr_blob, 0, false);
|
||||||
} else {
|
} else {
|
||||||
|
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||||
|
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
|
||||||
|
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
|
||||||
|
if (config.batchLimit) {
|
||||||
|
if (node->isDynamicNode()) {
|
||||||
|
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
|
||||||
|
}
|
||||||
|
int MB_to_process = node->batchToProcess();
|
||||||
|
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
|
||||||
|
}
|
||||||
|
|
||||||
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
|
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -84,27 +84,27 @@ MKLDNNPlugin::MKLDNNInferRequest::~MKLDNNInferRequest() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
|
void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
|
||||||
bool needConvert = inPrec != inputBlob->getTensorDesc().getPrecision();
|
auto& tensorDesc = inputBlob->getTensorDesc();
|
||||||
|
bool needConvert = inPrec != tensorDesc.getPrecision();
|
||||||
|
|
||||||
if (inputBlob->cbuffer().as<const void *>() == nullptr) {
|
const void* srcData = inputBlob->cbuffer().as<const void *>();
|
||||||
|
if (srcData == nullptr) {
|
||||||
IE_THROW() << "Input blob has no allocated memory";
|
IE_THROW() << "Input blob has no allocated memory";
|
||||||
}
|
}
|
||||||
|
|
||||||
InferenceEngine::Blob::Ptr iconv;
|
InferenceEngine::Blob::Ptr iconv;
|
||||||
if (needConvert) {
|
if (needConvert) {
|
||||||
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, inputBlob->getTensorDesc().getDims(),
|
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, tensorDesc.getDims(), tensorDesc.getLayout()));
|
||||||
inputBlob->getTensorDesc().getLayout()));
|
|
||||||
iconv->allocate();
|
iconv->allocate();
|
||||||
if (inputBlob->size() != iconv->size())
|
if (inputBlob->size() != iconv->size())
|
||||||
IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and "
|
IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and "
|
||||||
<< iconv->size();
|
<< iconv->size();
|
||||||
|
|
||||||
void *srcData = inputBlob->cbuffer().as<void *>();
|
|
||||||
void *dstData = iconv->buffer().as<void *>();
|
void *dstData = iconv->buffer().as<void *>();
|
||||||
if (dstData == nullptr) {
|
if (dstData == nullptr) {
|
||||||
IE_THROW() << "Converted input blob has no allocated memory";
|
IE_THROW() << "Converted input blob has no allocated memory";
|
||||||
}
|
}
|
||||||
cpu_convert(srcData, dstData, inputBlob->getTensorDesc().getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
|
cpu_convert(srcData, dstData, tensorDesc.getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
|
||||||
}
|
}
|
||||||
|
|
||||||
graph->PushInputData(inputName, needConvert ? iconv : inputBlob);
|
graph->PushInputData(inputName, needConvert ? iconv : inputBlob);
|
||||||
@ -112,27 +112,30 @@ void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, I
|
|||||||
|
|
||||||
void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
|
void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
|
||||||
for (auto input : _inputs) {
|
for (auto input : _inputs) {
|
||||||
if (!_networkInputs[input.first]) {
|
auto inputName = input.first;
|
||||||
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first;
|
if (!_networkInputs[inputName]) {
|
||||||
|
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName;
|
||||||
}
|
}
|
||||||
auto inPrec = input.second->getTensorDesc().getPrecision();
|
auto inputBlob = input.second;
|
||||||
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
|
auto& inputTensorDesc = inputBlob->getTensorDesc();
|
||||||
|
auto inPrec = inputTensorDesc.getPrecision();
|
||||||
|
if (graph->hasMeanImageFor(inputName) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
|
||||||
inPrec = InferenceEngine::Precision::FP32;
|
inPrec = InferenceEngine::Precision::FP32;
|
||||||
} else {
|
} else {
|
||||||
inPrec = normalizeToSupportedPrecision(inPrec);
|
inPrec = normalizeToSupportedPrecision(inPrec);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
|
if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
|
||||||
IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
|
IE_THROW() << "Unsupported input precision " << inputTensorDesc.getPrecision();
|
||||||
}
|
}
|
||||||
|
|
||||||
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
|
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
|
||||||
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
|
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
|
||||||
if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) {
|
if (inputTensorDesc.getLayout() == InferenceEngine::ANY) {
|
||||||
input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout());
|
inputTensorDesc.setLayout(_networkInputs[inputName]->getLayout());
|
||||||
}
|
}
|
||||||
|
|
||||||
pushInput(input.first, input.second, inPrec);
|
pushInput(inputName, inputBlob, inPrec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -502,71 +505,104 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *
|
|||||||
|
|
||||||
void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
|
void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
|
||||||
for (auto& it : externalPtr) {
|
for (auto& it : externalPtr) {
|
||||||
auto input = graph->GetInputNodesMap().find(it.first);
|
const auto& inputNodesMap = graph->GetInputNodesMap();
|
||||||
if (input != graph->GetInputNodesMap().end()) {
|
auto input = inputNodesMap.find(it.first);
|
||||||
if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
if (input != inputNodesMap.end()) {
|
||||||
|
MKLDNNNodePtr inputNodePtr = input->second;
|
||||||
|
if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||||
continue;
|
continue;
|
||||||
|
auto& childEdges = inputNodePtr->getChildEdges();
|
||||||
// Input cannot be in-place with other primitives
|
// Input cannot be in-place with other primitives
|
||||||
bool canBeInPlace = true;
|
bool canBeInPlace = true;
|
||||||
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
|
for (auto& childEdge : childEdges) {
|
||||||
auto& child = input->second->getChildEdgeAt(i)->getChild();
|
auto ce = childEdge.lock();
|
||||||
if (child->isConstant())
|
if (!ce)
|
||||||
canBeInPlace = false;
|
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||||
|
|
||||||
auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
|
auto& child = ce->getChild();
|
||||||
if (canBeInPlace && concat && concat->isOptimized())
|
|
||||||
canBeInPlace = false;
|
|
||||||
|
|
||||||
// Cannot be in-place before split because split is using different ptrs without offsets
|
if (child->isConstant()) {
|
||||||
auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
|
|
||||||
if (canBeInPlace && split)
|
|
||||||
canBeInPlace = false;
|
|
||||||
|
|
||||||
if (child->isInplace())
|
|
||||||
canBeInPlace = false;
|
|
||||||
for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
|
|
||||||
if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
|
|
||||||
input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
|
|
||||||
canBeInPlace = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
|
|
||||||
changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
MKLDNNNodePtr output;
|
|
||||||
for (auto& out : graph->GetOutputNodesMap()) {
|
|
||||||
if (out.first == it.first) {
|
|
||||||
output = out.second;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (output) {
|
|
||||||
if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
|
||||||
continue;
|
|
||||||
bool canBeInPlace = true;
|
|
||||||
void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
|
|
||||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
|
||||||
auto parent = output->getParentEdgeAt(0)->getParent();
|
|
||||||
MKLDNNNodePtr previousParent;
|
|
||||||
do {
|
|
||||||
previousParent = parent;
|
|
||||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
|
|
||||||
canBeInPlace = false;
|
canBeInPlace = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
|
if (child->getType() == Concatenation && dynamic_cast<MKLDNNConcatNode*>(child.get())->isOptimized()) {
|
||||||
if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
|
canBeInPlace = false;
|
||||||
parent = parent->getParentEdgeAt(i)->getParent();
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannot be in-place before split because split is using different ptrs without offsets
|
||||||
|
if (child->getType() == Split) {
|
||||||
|
canBeInPlace = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (child->isInPlace()) {
|
||||||
|
canBeInPlace = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& edges = child->getChildEdges();
|
||||||
|
for (auto& edge : edges) {
|
||||||
|
auto e = edge.lock();
|
||||||
|
if (!e)
|
||||||
|
IE_THROW() << "Node " << child->getName() << " contains empty child edge";
|
||||||
|
|
||||||
|
if (e->getMemory().GetPrimitive().get_data_handle() == ce->getMemory().GetPrimitive().get_data_handle()) {
|
||||||
|
canBeInPlace = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!canBeInPlace)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (canBeInPlace) {
|
||||||
|
for (auto& edge : childEdges) {
|
||||||
|
auto e = edge.lock();
|
||||||
|
if (!e)
|
||||||
|
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||||
|
|
||||||
|
changeEdgePtr(e, it.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& outputNodesMap = graph->GetOutputNodesMap();
|
||||||
|
auto output = outputNodesMap.find(it.first);
|
||||||
|
if (output != outputNodesMap.end()) {
|
||||||
|
auto parentEdge = output->second->getParentEdgeAt(0);
|
||||||
|
if (parentEdge->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bool canBeInPlace = true;
|
||||||
|
void* defaultPtr = parentEdge->getMemory().GetPrimitivePtr()->get_data_handle();
|
||||||
|
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||||
|
auto parent = parentEdge->getParent();
|
||||||
|
MKLDNNNodePtr previousParent;
|
||||||
|
do {
|
||||||
|
previousParent = parent;
|
||||||
|
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
|
||||||
|
canBeInPlace = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& parentEdges = parent->getParentEdges();
|
||||||
|
for (auto& edge : parentEdges) {
|
||||||
|
auto e = edge.lock();
|
||||||
|
if (!e)
|
||||||
|
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
|
||||||
|
|
||||||
|
if (e->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
|
||||||
|
parent = e->getParent();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (previousParent != parent);
|
} while (previousParent != parent);
|
||||||
if (canBeInPlace)
|
if (canBeInPlace)
|
||||||
changeEdgePtr(output->getParentEdgeAt(0), it.second);
|
changeEdgePtr(parentEdge, it.second);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
IE_THROW() << "Cannot find input/output blob: " << it.first;
|
IE_THROW() << "Cannot find input/output blob: " << it.first;
|
||||||
|
@ -770,15 +770,29 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MKLDNNNode::isInplace() const {
|
bool MKLDNNNode::isInPlace() {
|
||||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
if (inplace == InPlaceType::Unknown) {
|
||||||
if (selected_pd == nullptr)
|
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
if (selected_pd == nullptr)
|
||||||
auto config = selected_pd->getConfig();
|
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||||
|
|
||||||
for (auto &in : config.inConfs) if (in.inPlace >= 0) return true;
|
inplace = InPlaceType::NoInPlace;
|
||||||
for (auto &out : config.outConfs) if (out.inPlace >= 0) return true;
|
auto config = selected_pd->getConfig();
|
||||||
return false;
|
for (auto &in : config.inConfs) {
|
||||||
|
if (in.inPlace >= 0) {
|
||||||
|
inplace = InPlaceType::InPlace;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto &out : config.outConfs) {
|
||||||
|
if (out.inPlace >= 0) {
|
||||||
|
inplace = InPlaceType::InPlace;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return inplace == InPlaceType::InPlace;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MKLDNNNode::isConstant() {
|
bool MKLDNNNode::isConstant() {
|
||||||
|
@ -195,6 +195,8 @@ public:
|
|||||||
return engine;
|
return engine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isInPlace();
|
||||||
|
|
||||||
// must be called only after MKLDNNGraph::InitEdges()
|
// must be called only after MKLDNNGraph::InitEdges()
|
||||||
virtual bool isExecutable() const {
|
virtual bool isExecutable() const {
|
||||||
return true;
|
return true;
|
||||||
@ -202,8 +204,6 @@ public:
|
|||||||
|
|
||||||
bool isConstant();
|
bool isConstant();
|
||||||
|
|
||||||
bool isInplace() const;
|
|
||||||
|
|
||||||
bool isFusedWith(Type type) const;
|
bool isFusedWith(Type type) const;
|
||||||
|
|
||||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||||
@ -336,6 +336,10 @@ public:
|
|||||||
selectedPrimitiveDescriptorIndex = -1;
|
selectedPrimitiveDescriptorIndex = -1;
|
||||||
else
|
else
|
||||||
selectedPrimitiveDescriptorIndex = index;
|
selectedPrimitiveDescriptorIndex = index;
|
||||||
|
|
||||||
|
// Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection
|
||||||
|
// we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace()
|
||||||
|
inplace = InPlaceType::Unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string getPrimitiveDescriptorType();
|
std::string getPrimitiveDescriptorType();
|
||||||
@ -616,11 +620,17 @@ protected:
|
|||||||
bool permanent = false;
|
bool permanent = false;
|
||||||
bool temporary = false;
|
bool temporary = false;
|
||||||
int dynBatchLim = 0;
|
int dynBatchLim = 0;
|
||||||
|
enum class InPlaceType {
|
||||||
|
Unknown,
|
||||||
|
InPlace,
|
||||||
|
NoInPlace
|
||||||
|
};
|
||||||
enum class ConstantType {
|
enum class ConstantType {
|
||||||
Unknown,
|
Unknown,
|
||||||
Const,
|
Const,
|
||||||
NoConst
|
NoConst
|
||||||
};
|
};
|
||||||
|
InPlaceType inplace = InPlaceType::Unknown;
|
||||||
ConstantType constant = ConstantType::Unknown;
|
ConstantType constant = ConstantType::Unknown;
|
||||||
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
|
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
|
||||||
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
||||||
|
Loading…
Reference in New Issue
Block a user