[CPU] Fix memory allocation for non default shape infer path (#9475)

This commit is contained in:
Maxim Andronov 2021-12-30 17:04:33 +03:00 committed by GitHub
parent bea10d6e3c
commit 8ba94cfb8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 10 additions and 26 deletions

View File

@ -287,12 +287,7 @@ bool MKLDNNMatrixNmsNode::isExecutable() const {
void MKLDNNMatrixNmsNode::executeDynamicImpl(mkldnn::stream strm) {
if (hasEmptyInputTensors()) {
getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTED_OUTPUTS)->cloneWithNewDims({0, 6}));
getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTED_INDICES)->cloneWithNewDims({0, 1}));
getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_VALID_OUTPUTS)->cloneWithNewDims({0}));
redefineOutputMemory({{0, 6}, {0, 1}, {0}});
return;
}
execute(strm);
@ -375,9 +370,7 @@ void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (isDynamicNode()) {
size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), 0);
selectedOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTED_OUTPUTS)->cloneWithNewDims({totalBox, 6}));
selectedIndicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTED_INDICES)->cloneWithNewDims({totalBox, 1}));
validOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_VALID_OUTPUTS)->cloneWithNewDims({m_numBatches}));
redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}});
}
float* selectedOutputs = reinterpret_cast<float*>(selectedOutputsMemPtr->GetPtr());
int* selectedIndices = reinterpret_cast<int*>(selectedIndicesMemPtr->GetPtr());

View File

@ -144,12 +144,7 @@ bool MKLDNNMultiClassNmsNode::isExecutable() const {
void MKLDNNMultiClassNmsNode::executeDynamicImpl(mkldnn::stream strm) {
if (hasEmptyInputTensors()) {
getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTEDOUTPUTS)->cloneWithNewDims({0, 6}));
getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims({0, 1}));
getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTEDNUM)->cloneWithNewDims({0}));
redefineOutputMemory({{0, 6}, {0, 1}, {0}});
return;
}
execute(strm);
@ -268,9 +263,7 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (isDynamicNode()) {
size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), 0);
selectedOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDOUTPUTS)->cloneWithNewDims({totalBox, 6}));
selectedIndicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims({totalBox, 1}));
validOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDNUM)->cloneWithNewDims({m_numBatches}));
redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}});
}
int* selected_indices = reinterpret_cast<int*>(selectedIndicesMemPtr->GetPtr());
float* selected_outputs = reinterpret_cast<float*>(selectedOutputsMemPtr->GetPtr());

View File

@ -713,10 +713,7 @@ void MKLDNNNonMaxSuppressionNode::createJitKernel() {
void MKLDNNNonMaxSuppressionNode::executeDynamicImpl(mkldnn::stream strm) {
if (hasEmptyInputTensors() || (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS &&
reinterpret_cast<int *>(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0] == 0)) {
getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims({0, 3}));
getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->redefineDesc(
getBaseMemDescAtOutputPort(NMS_SELECTEDSCORES)->cloneWithNewDims({0, 3}));
redefineOutputMemory({{0, 3}, {0, 3}, {1}});
*reinterpret_cast<int *>(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()) = 0;
return;
}
@ -794,8 +791,7 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) {
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (isDynamicNode()) {
VectorDims newDims{validOutputs, 3};
indicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims(newDims));
scoresMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDSCORES)->cloneWithNewDims(newDims));
redefineOutputMemory({newDims, newDims, {1}});
}
int selectedIndicesStride = indicesMemPtr->GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];

View File

@ -112,7 +112,7 @@ void MKLDNNNonZeroNode::executeSpecified() {
if (isDynamicNode()) {
VectorDims newDims{inRank, nonZeroCount};
dstMemPtr->redefineDesc(getBaseMemDescAtOutputPort(0)->cloneWithNewDims(newDims));
redefineOutputMemory({newDims});
}
int *dst = reinterpret_cast<int *>(dstMemPtr->GetPtr());
size_t inSize = inShape.getElementsCount();

View File

@ -104,6 +104,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) {
IE_THROW() << errorMsg;
}
}
template <typename data_t>
size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) const {
data_t start = 0, limit = 0, delta = 0;
@ -126,13 +127,14 @@ size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t
return static_cast<size_t>(std::ceil(std::fabs(span) / std::fabs(step)));
}
}
template <typename data_t>
InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() {
data_t start = 0, delta = 0;
size_t work_amount_dst = getWorkAmount<data_t>(&start, nullptr, &delta);
if (isDynamicNode()) {
VectorDims newOutputShape {work_amount_dst};
getChildEdgeAt(0)->getMemoryPtr()->redefineDesc(getBaseMemDescAtOutputPort(0)->cloneWithNewDims(newOutputShape));
redefineOutputMemory({newOutputShape});
}
data_t* dst_data = reinterpret_cast<data_t *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
parallel_nt(0, [&](const int ithr, const int nthr) {