[CPU] Infer_request crashes for SpaceToBatch operation. (#16974)

* [CPU] Infer_request crashes for SpaceToBatch operation.

* Fixes as per comments.

* Fixes as per comments 2.
This commit is contained in:
Nikolay Shchegolev
2023-04-26 17:39:54 +04:00
committed by GitHub
parent 6ed85178d5
commit c8ac7c9b82
4 changed files with 56 additions and 44 deletions

View File

@@ -2,16 +2,10 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <cmath>
#include <vector>
#include <string>
#include <dnnl_types.h>
#include "ie_parallel.hpp"
#include "utils/bfloat16.hpp"
#include <selective_build.h>
#include "space_to_batch.h"
#include <nodes/common/blocked_desc_creator.h>
#include <ngraph/opsets/opset2.hpp>
#include "ie_parallel.hpp"
#include <openvino/op/space_to_batch.hpp>
using namespace InferenceEngine;
@@ -19,9 +13,9 @@ namespace ov {
namespace intel_cpu {
namespace node {
bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
const auto spaceToBatch = std::dynamic_pointer_cast<const ngraph::opset2::SpaceToBatch>(op);
const auto spaceToBatch = std::dynamic_pointer_cast<const ov::op::v1::SpaceToBatch>(op);
if (!spaceToBatch) {
errorMessage = "Only opset2 SpaceToBatch operation is supported";
return false;
@@ -32,7 +26,7 @@ bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ngraph::Node
return true;
}
SpaceToBatch::SpaceToBatch(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
SpaceToBatch::SpaceToBatch(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
: Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
@@ -104,8 +98,11 @@ static std::vector<size_t> getShape5D(const SizeVector &shape) {
template<typename T>
void SpaceToBatch::SpaceToBatchKernel() {
const auto& srcMem = getParentEdgesAtPort(0)[0]->getMemoryPtr();
const auto& dstMem = getChildEdgesAtPort(0)[0]->getMemoryPtr();
const auto *blockShapesPtr = reinterpret_cast<int *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank();
size_t dataRank = srcMem->GetShape().getRank();
blockShapeIn.clear();
for (size_t i = 0; i < dataRank; i++) {
blockShapeIn.push_back(*(blockShapesPtr + i));
@@ -117,21 +114,24 @@ void SpaceToBatch::SpaceToBatchKernel() {
padsBeginIn.push_back(*(padsBeginPtr + i));
}
const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
const auto *srcData = reinterpret_cast<const T *>(srcMem->GetPtr());
auto *dstData = reinterpret_cast<T *>(dstMem->GetPtr());
const auto &inDims = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getStaticDims();
const auto &outDims = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getStaticDims();
const auto srcLen = srcMem->GetSize() / sizeof(T);
const auto dstLen = dstMem->GetSize() / sizeof(T);
const bool blocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) ||
getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c);
const auto &inDims = srcMem->getStaticDims();
const auto &outDims = dstMem->getStaticDims();
const bool blocked = srcMem->getDesc().hasLayoutType(LayoutType::nCsp16c) ||
srcMem->getDesc().hasLayoutType(LayoutType::nCsp8c);
const auto dimsSize = inDims.size();
auto inShape5D = getShape5D(outDims);
auto outShape5D = getShape5D(inDims);
auto blockShape = getShape5D(blockShapeIn);
if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) {
if (srcMem->getDesc().hasLayoutType(LayoutType::nspc)) {
inShape5D.push_back(inShape5D[1]);
inShape5D.erase(inShape5D.begin() + 1);
outShape5D.push_back(outShape5D[1]);
@@ -140,10 +140,10 @@ void SpaceToBatch::SpaceToBatchKernel() {
blockShape.erase(blockShape.begin() + 1);
}
const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
const auto outBlkDims = dstMem->GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
const size_t blockSize = blocked ? outBlkDims.back() : 1lu;
const size_t blockCountInput = outBlkDims[1];
const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims()[1];
const size_t blockCountOutput = srcMem->GetDescWithType<BlockedMemoryDesc>()->getBlockDims()[1];
const auto blockRemainder = inShape5D[1] % blockSize;
const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
@@ -153,11 +153,7 @@ void SpaceToBatch::SpaceToBatchKernel() {
const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start(0lu), end(0lu);
splitter(inShape5D[0] * inBatchStep, nthr, ithr, start, end);
std::fill(dstData + start, dstData + end, T(0));
});
memset(dstData, 0, dstMem->GetSize());
size_t channels = (inShape5D[1] / blockSize);
channels = channels == 0 ? 1 : channels;
@@ -169,6 +165,9 @@ void SpaceToBatch::SpaceToBatchKernel() {
std::vector<size_t> indxStart(2, 0);
std::vector<size_t> indxEnd(2, 0);
parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
if (start >= end) {
return;
}
parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
std::vector<int64_t> oAdd(5, 1);
std::vector<size_t> begin(5, 0);
@@ -184,7 +183,7 @@ void SpaceToBatch::SpaceToBatchKernel() {
oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu;
bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1];
if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) {
if (srcMem->getDesc().hasLayoutType(LayoutType::nspc)) {
oAdd.push_back(oAdd[1]);
oAdd.erase(oAdd.begin() + 1);
}
@@ -225,6 +224,9 @@ void SpaceToBatch::SpaceToBatchKernel() {
const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
const size_t srcIdx5 = srcIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
const size_t dstIdx5 = dstIdx4 + i5;
if (srcIdx5 >= srcLen || dstIdx5 >= dstLen) {
continue;
}
dstData[dstIdx5] = srcData[srcIdx5];
}
}

View File

@@ -4,10 +4,10 @@
#pragma once
#include <ie_common.h>
#include <node.h>
#include <string>
#include <memory>
#include <string>
#include <vector>
namespace ov {
@@ -16,7 +16,7 @@ namespace node {
class SpaceToBatch : public Node {
public:
SpaceToBatch(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
SpaceToBatch(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@@ -27,7 +27,7 @@ public:
bool needShapeInfer() const override {return true;};
void executeDynamicImpl(dnnl::stream strm) override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
private:
std::vector<size_t> blockShapeIn;

View File

@@ -9,14 +9,13 @@
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ngraph::opset3;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
namespace {
std::vector<int64_t> blockShape, padsBegin, padsEnd;
ngraph::Shape paramShape;
ov::Shape paramShape;
} // namespace
using SpaceToBatchLayerTestCPUParams = std::tuple<
@@ -65,7 +64,7 @@ public:
const auto& funcInput = funcInputs[i];
ov::Tensor tensor;
if (i == 0) {
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
} else if (i == 1) {
tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
auto *dataPtr = tensor.data<int64_t>();
@@ -108,21 +107,21 @@ protected:
selectedType = std::string("ref_any_") + netPrecision.name();
auto params = ngraph::builder::makeDynamicParams(ngPrec, {inputDynamicShapes.front()});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
paramShape = {paramOuts[0].get_partial_shape().size()};
std::shared_ptr<ov::Node> in2, in3, in4;
auto blockShapeParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
auto blockShapeParam = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, paramShape);
in2 = blockShapeParam;
params.push_back(blockShapeParam);
auto padsBeginParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
auto padsBeginParam = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, paramShape);
in3 = padsBeginParam;
params.push_back(padsBeginParam);
auto padsEndParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
auto padsEndParam = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, paramShape);
in4 = padsEndParam;
params.push_back(padsEndParam);
auto s2b = std::make_shared<ngraph::opset2::SpaceToBatch>(paramOuts[0], in2, in3, in4);
auto s2b = std::make_shared<ov::op::v1::SpaceToBatch>(paramOuts[0], in2, in3, in4);
function = makeNgraphFunction(inType, params, s2b, "SpaceToBatchCPU");
}
};
@@ -287,6 +286,19 @@ INSTANTIATE_TEST_SUITE_P(smoke_DynamicSpaceToBatchCPULayerTestCase2_4D, SpaceToB
INSTANTIATE_TEST_SUITE_P(smoke_DynamicSpaceToBatchCPULayerTestCaseWithBlocked2_4D, SpaceToBatchCPULayerTest,
dynamicSpaceToBatchParamsWithBlockedSet4D2, SpaceToBatchCPULayerTest::getTestCaseName);
std::vector<std::vector<ov::Shape>> staticInputShapes4DPE = {
{{1, 2, 9, 1}, {4}, {4}, {4}}
};
INSTANTIATE_TEST_SUITE_P(smoke_StaticSpaceToBatch_4D_parallel_block_edge, SpaceToBatchCPULayerTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(staticInputShapes4DPE)),
::testing::Values(std::vector<int64_t>{1, 4, 3, 1}),
::testing::Values(std::vector<int64_t>{0, 1, 2, 0}),
::testing::Values(std::vector<int64_t>{0, 1, 4, 0}),
::testing::Values(Precision::FP32),
::testing::Values(CPUSpecificParams{})),
SpaceToBatchCPULayerTest::getTestCaseName);
const std::vector<std::vector<int64_t>> blockShape5D = {{1, 1, 2, 2, 1}, {1, 2, 4, 1, 3}};
const std::vector<std::vector<int64_t>> padsBegin5D = {{0, 0, 0, 0, 0}, {0, 0, 4, 0, 0}, {0, 0, 0, 2, 3}};
const std::vector<std::vector<int64_t>> padsEnd5D = {{0, 0, 0, 0, 0}, {0, 0, 0, 4, 3}, {0, 0, 4, 2, 3}};

View File

@@ -40,10 +40,8 @@ class TestSpaceToBatch(CommonTFLayerTest):
test_data_4D = [
dict(in_shape=[1, 2, 2, 3], block_shape_value=[2, 2], pads_value=[[0, 0], [0, 0]]),
pytest.param(dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]]),
marks=pytest.mark.skip(reason="107967")),
pytest.param(dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]]),
marks=pytest.mark.skip(reason="107967")),
dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]]),
dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]])
]
@pytest.mark.parametrize("params", test_data_4D)