[CPU] Infer_request crashes for SpaceToBatch operation. (#16974)

* [CPU] Infer_request crashes for SpaceToBatch operation. * Fixes as per comments. * Fixes as per comments 2.
2023-04-26 17:39:54 +04:00
parent 6ed85178d5
commit c8ac7c9b82
4 changed files with 56 additions and 44 deletions
--- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp
+++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp
@@ -2,16 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include <cmath>
-#include <vector>
-#include <string>
-#include <dnnl_types.h>
-#include "ie_parallel.hpp"
-#include "utils/bfloat16.hpp"
-#include <selective_build.h>
 #include "space_to_batch.h"
-#include <nodes/common/blocked_desc_creator.h>
-#include <ngraph/opsets/opset2.hpp>
+
+#include "ie_parallel.hpp"
+#include <openvino/op/space_to_batch.hpp>

 using namespace InferenceEngine;

@@ -19,9 +13,9 @@ namespace ov {
 namespace intel_cpu {
 namespace node {

-bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
    try {
-        const auto spaceToBatch = std::dynamic_pointer_cast<const ngraph::opset2::SpaceToBatch>(op);
+        const auto spaceToBatch = std::dynamic_pointer_cast<const ov::op::v1::SpaceToBatch>(op);
        if (!spaceToBatch) {
            errorMessage = "Only opset2 SpaceToBatch operation is supported";
            return false;
@@ -32,7 +26,7 @@ bool SpaceToBatch::isSupportedOperation(const std::shared_ptr<const ngraph::Node
    return true;
 }

-SpaceToBatch::SpaceToBatch(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
+SpaceToBatch::SpaceToBatch(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
    : Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) {
    std::string errorMessage;
    if (!isSupportedOperation(op, errorMessage)) {
@@ -104,8 +98,11 @@ static std::vector<size_t> getShape5D(const SizeVector &shape) {

 template<typename T>
 void SpaceToBatch::SpaceToBatchKernel() {
+    const auto& srcMem = getParentEdgesAtPort(0)[0]->getMemoryPtr();
+    const auto& dstMem = getChildEdgesAtPort(0)[0]->getMemoryPtr();
+
    const auto *blockShapesPtr = reinterpret_cast<int *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
-    size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank();
+    size_t dataRank = srcMem->GetShape().getRank();
    blockShapeIn.clear();
    for (size_t i = 0; i < dataRank; i++) {
        blockShapeIn.push_back(*(blockShapesPtr + i));
@@ -117,21 +114,24 @@ void SpaceToBatch::SpaceToBatchKernel() {
        padsBeginIn.push_back(*(padsBeginPtr + i));
    }

-    const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
-    auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const auto *srcData = reinterpret_cast<const T *>(srcMem->GetPtr());
+    auto *dstData = reinterpret_cast<T *>(dstMem->GetPtr());

-    const auto &inDims = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getStaticDims();
-    const auto &outDims = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getStaticDims();
+    const auto srcLen = srcMem->GetSize() / sizeof(T);
+    const auto dstLen = dstMem->GetSize() / sizeof(T);

-    const bool blocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) ||
-                         getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c);
+    const auto &inDims = srcMem->getStaticDims();
+    const auto &outDims = dstMem->getStaticDims();
+
+    const bool blocked = srcMem->getDesc().hasLayoutType(LayoutType::nCsp16c) ||
+                         srcMem->getDesc().hasLayoutType(LayoutType::nCsp8c);
    const auto dimsSize = inDims.size();

    auto inShape5D  = getShape5D(outDims);
    auto outShape5D = getShape5D(inDims);
    auto blockShape = getShape5D(blockShapeIn);

-    if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) {
+    if (srcMem->getDesc().hasLayoutType(LayoutType::nspc)) {
        inShape5D.push_back(inShape5D[1]);
        inShape5D.erase(inShape5D.begin() + 1);
        outShape5D.push_back(outShape5D[1]);
@@ -140,10 +140,10 @@ void SpaceToBatch::SpaceToBatchKernel() {
        blockShape.erase(blockShape.begin() + 1);
    }

-    const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
+    const auto outBlkDims = dstMem->GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
    const size_t blockSize = blocked ? outBlkDims.back() : 1lu;
    const size_t blockCountInput = outBlkDims[1];
-    const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims()[1];
+    const size_t blockCountOutput = srcMem->GetDescWithType<BlockedMemoryDesc>()->getBlockDims()[1];
    const auto blockRemainder = inShape5D[1] % blockSize;
    const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;

@@ -153,11 +153,7 @@ void SpaceToBatch::SpaceToBatchKernel() {
    const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
    const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;

-    parallel_nt(0, [&](const int ithr, const int nthr) {
-        size_t start(0lu), end(0lu);
-        splitter(inShape5D[0] * inBatchStep, nthr, ithr, start, end);
-        std::fill(dstData + start, dstData + end, T(0));
-    });
+    memset(dstData, 0, dstMem->GetSize());

    size_t channels = (inShape5D[1] / blockSize);
    channels = channels == 0 ? 1 : channels;
@@ -169,6 +165,9 @@ void SpaceToBatch::SpaceToBatchKernel() {
        std::vector<size_t> indxStart(2, 0);
        std::vector<size_t> indxEnd(2, 0);
        parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
+        if (start >= end) {
+            return;
+        }
        parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
        std::vector<int64_t> oAdd(5, 1);
        std::vector<size_t> begin(5, 0);
@@ -184,7 +183,7 @@ void SpaceToBatch::SpaceToBatchKernel() {
            oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu;
            bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
            oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1];
-            if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) {
+            if (srcMem->getDesc().hasLayoutType(LayoutType::nspc)) {
                oAdd.push_back(oAdd[1]);
                oAdd.erase(oAdd.begin() + 1);
            }
@@ -225,6 +224,9 @@ void SpaceToBatch::SpaceToBatchKernel() {
                                    const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
                                    const size_t srcIdx5 = srcIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
                                    const size_t dstIdx5 = dstIdx4 + i5;
+                                    if (srcIdx5 >= srcLen || dstIdx5 >= dstLen) {
+                                        continue;
+                                    }
                                    dstData[dstIdx5] = srcData[srcIdx5];
                                }
                            }
--- a/src/plugins/intel_cpu/src/nodes/space_to_batch.h
+++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.h
@@ -4,10 +4,10 @@

 #pragma once

-#include <ie_common.h>
 #include <node.h>
-#include <string>
+
 #include <memory>
+#include <string>
 #include <vector>

 namespace ov {
@@ -16,7 +16,7 @@ namespace node {

 class SpaceToBatch : public Node {
 public:
-    SpaceToBatch(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    SpaceToBatch(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
@@ -27,7 +27,7 @@ public:
    bool needShapeInfer() const override {return true;};
    void executeDynamicImpl(dnnl::stream strm) override;

-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;

 private:
    std::vector<size_t> blockShapeIn;
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp
@@ -9,14 +9,13 @@

 using namespace InferenceEngine;
 using namespace CPUTestUtils;
-using namespace ngraph::opset3;
 using namespace ov::test;

 namespace CPULayerTestsDefinitions  {

 namespace {
    std::vector<int64_t> blockShape, padsBegin, padsEnd;
-    ngraph::Shape paramShape;
+    ov::Shape paramShape;
 }  // namespace

 using SpaceToBatchLayerTestCPUParams = std::tuple<
@@ -65,7 +64,7 @@ public:
            const auto& funcInput = funcInputs[i];
            ov::Tensor tensor;
            if (i == 0) {
-                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
+                tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
            } else if (i == 1) {
                tensor = ov::Tensor(funcInput.get_element_type(), paramShape);
                auto *dataPtr = tensor.data<int64_t>();
@@ -108,21 +107,21 @@ protected:
            selectedType = std::string("ref_any_") + netPrecision.name();

        auto params = ngraph::builder::makeDynamicParams(ngPrec, {inputDynamicShapes.front()});
-        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
        paramShape = {paramOuts[0].get_partial_shape().size()};

        std::shared_ptr<ov::Node> in2, in3, in4;
-        auto blockShapeParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        auto blockShapeParam = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, paramShape);
        in2 = blockShapeParam;
        params.push_back(blockShapeParam);
-        auto padsBeginParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        auto padsBeginParam = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, paramShape);
        in3 = padsBeginParam;
        params.push_back(padsBeginParam);
-        auto padsEndParam = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, paramShape);
+        auto padsEndParam = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, paramShape);
        in4 = padsEndParam;
        params.push_back(padsEndParam);

-        auto s2b = std::make_shared<ngraph::opset2::SpaceToBatch>(paramOuts[0], in2, in3, in4);
+        auto s2b = std::make_shared<ov::op::v1::SpaceToBatch>(paramOuts[0], in2, in3, in4);
        function = makeNgraphFunction(inType, params, s2b, "SpaceToBatchCPU");
    }
 };
@@ -287,6 +286,19 @@ INSTANTIATE_TEST_SUITE_P(smoke_DynamicSpaceToBatchCPULayerTestCase2_4D, SpaceToB
 INSTANTIATE_TEST_SUITE_P(smoke_DynamicSpaceToBatchCPULayerTestCaseWithBlocked2_4D, SpaceToBatchCPULayerTest,
                         dynamicSpaceToBatchParamsWithBlockedSet4D2, SpaceToBatchCPULayerTest::getTestCaseName);

+std::vector<std::vector<ov::Shape>> staticInputShapes4DPE = {
+    {{1, 2, 9, 1}, {4}, {4}, {4}}
+};
+INSTANTIATE_TEST_SUITE_P(smoke_StaticSpaceToBatch_4D_parallel_block_edge, SpaceToBatchCPULayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(static_shapes_to_test_representation(staticInputShapes4DPE)),
+                ::testing::Values(std::vector<int64_t>{1, 4, 3, 1}),
+                ::testing::Values(std::vector<int64_t>{0, 1, 2, 0}),
+                ::testing::Values(std::vector<int64_t>{0, 1, 4, 0}),
+                ::testing::Values(Precision::FP32),
+                ::testing::Values(CPUSpecificParams{})),
+        SpaceToBatchCPULayerTest::getTestCaseName);
+
 const std::vector<std::vector<int64_t>> blockShape5D = {{1, 1, 2, 2, 1}, {1, 2, 4, 1, 3}};
 const std::vector<std::vector<int64_t>> padsBegin5D = {{0, 0, 0, 0, 0}, {0, 0, 4, 0, 0}, {0, 0, 0, 2, 3}};
 const std::vector<std::vector<int64_t>> padsEnd5D   = {{0, 0, 0, 0, 0}, {0, 0, 0, 4, 3}, {0, 0, 4, 2, 3}};
--- a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py
@@ -40,10 +40,8 @@ class TestSpaceToBatch(CommonTFLayerTest):

    test_data_4D = [
        dict(in_shape=[1, 2, 2, 3], block_shape_value=[2, 2], pads_value=[[0, 0], [0, 0]]),
-        pytest.param(dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]]),
-                     marks=pytest.mark.skip(reason="107967")),
-        pytest.param(dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]]),
-                     marks=pytest.mark.skip(reason="107967")),
+        dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]]),
+        dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]])
    ]

    @pytest.mark.parametrize("params", test_data_4D)