diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 4136724303f..0a99497f36a 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -2,16 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include "ie_parallel.hpp" -#include "utils/bfloat16.hpp" -#include #include "space_to_batch.h" -#include -#include + +#include "ie_parallel.hpp" +#include using namespace InferenceEngine; @@ -19,9 +13,9 @@ namespace ov { namespace intel_cpu { namespace node { -bool SpaceToBatch::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool SpaceToBatch::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - const auto spaceToBatch = std::dynamic_pointer_cast(op); + const auto spaceToBatch = std::dynamic_pointer_cast(op); if (!spaceToBatch) { errorMessage = "Only opset2 SpaceToBatch operation is supported"; return false; @@ -32,7 +26,7 @@ bool SpaceToBatch::isSupportedOperation(const std::shared_ptr& op, const GraphContext::CPtr context) +SpaceToBatch::SpaceToBatch(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { @@ -104,8 +98,11 @@ static std::vector getShape5D(const SizeVector &shape) { template void SpaceToBatch::SpaceToBatchKernel() { + const auto& srcMem = getParentEdgesAtPort(0)[0]->getMemoryPtr(); + const auto& dstMem = getChildEdgesAtPort(0)[0]->getMemoryPtr(); + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank(); + size_t dataRank = srcMem->GetShape().getRank(); blockShapeIn.clear(); for (size_t i = 0; i < dataRank; i++) { blockShapeIn.push_back(*(blockShapesPtr + i)); @@ -117,21 +114,24 @@ void SpaceToBatch::SpaceToBatchKernel() { padsBeginIn.push_back(*(padsBeginPtr + i)); } - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(srcMem->GetPtr()); + auto *dstData = reinterpret_cast(dstMem->GetPtr()); - const auto &inDims = getParentEdgesAtPort(0)[0]->getMemoryPtr()->getStaticDims(); - const auto &outDims = getChildEdgesAtPort(0)[0]->getMemoryPtr()->getStaticDims(); + const auto srcLen = srcMem->GetSize() / sizeof(T); + const auto dstLen = dstMem->GetSize() / sizeof(T); - const bool blocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c); + const auto &inDims = srcMem->getStaticDims(); + const auto &outDims = dstMem->getStaticDims(); + + const bool blocked = srcMem->getDesc().hasLayoutType(LayoutType::nCsp16c) || + srcMem->getDesc().hasLayoutType(LayoutType::nCsp8c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(outDims); auto outShape5D = getShape5D(inDims); auto blockShape = getShape5D(blockShapeIn); - if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { + if (srcMem->getDesc().hasLayoutType(LayoutType::nspc)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -140,10 +140,10 @@ void SpaceToBatch::SpaceToBatchKernel() { blockShape.erase(blockShape.begin() + 1); } - const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); + const auto outBlkDims = dstMem->GetDescWithType()->getBlockDims(); const size_t blockSize = blocked ? outBlkDims.back() : 1lu; const size_t blockCountInput = outBlkDims[1]; - const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims()[1]; + const size_t blockCountOutput = srcMem->GetDescWithType()->getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -153,11 +153,7 @@ void SpaceToBatch::SpaceToBatchKernel() { const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4]; const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep; - parallel_nt(0, [&](const int ithr, const int nthr) { - size_t start(0lu), end(0lu); - splitter(inShape5D[0] * inBatchStep, nthr, ithr, start, end); - std::fill(dstData + start, dstData + end, T(0)); - }); + memset(dstData, 0, dstMem->GetSize()); size_t channels = (inShape5D[1] / blockSize); channels = channels == 0 ? 1 : channels; @@ -169,6 +165,9 @@ void SpaceToBatch::SpaceToBatchKernel() { std::vector indxStart(2, 0); std::vector indxEnd(2, 0); parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels); + if (start >= end) { + return; + } parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels); std::vector oAdd(5, 1); std::vector begin(5, 0); @@ -184,7 +183,7 @@ void SpaceToBatch::SpaceToBatchKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1]; - if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { + if (srcMem->getDesc().hasLayoutType(LayoutType::nspc)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -225,6 +224,9 @@ void SpaceToBatch::SpaceToBatchKernel() { const int64_t tmpOc = i5 * blockShape[1] + addTmpOc; const size_t srcIdx5 = srcIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize); const size_t dstIdx5 = dstIdx4 + i5; + if (srcIdx5 >= srcLen || dstIdx5 >= dstLen) { + continue; + } dstData[dstIdx5] = srcData[srcIdx5]; } } diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.h b/src/plugins/intel_cpu/src/nodes/space_to_batch.h index ccfa0d853d4..42b1bba2491 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.h +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.h @@ -4,10 +4,10 @@ #pragma once -#include #include -#include + #include +#include #include namespace ov { @@ -16,7 +16,7 @@ namespace node { class SpaceToBatch : public Node { public: - SpaceToBatch(const std::shared_ptr& op, const GraphContext::CPtr context); + SpaceToBatch(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -27,7 +27,7 @@ public: bool needShapeInfer() const override {return true;}; void executeDynamicImpl(dnnl::stream strm) override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: std::vector blockShapeIn; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp index 4cbf4379033..82b3b154036 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp @@ -9,14 +9,13 @@ using namespace InferenceEngine; using namespace CPUTestUtils; -using namespace ngraph::opset3; using namespace ov::test; namespace CPULayerTestsDefinitions { namespace { std::vector blockShape, padsBegin, padsEnd; - ngraph::Shape paramShape; + ov::Shape paramShape; } // namespace using SpaceToBatchLayerTestCPUParams = std::tuple< @@ -65,7 +64,7 @@ public: const auto& funcInput = funcInputs[i]; ov::Tensor tensor; if (i == 0) { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); } else if (i == 1) { tensor = ov::Tensor(funcInput.get_element_type(), paramShape); auto *dataPtr = tensor.data(); @@ -108,21 +107,21 @@ protected: selectedType = std::string("ref_any_") + netPrecision.name(); auto params = ngraph::builder::makeDynamicParams(ngPrec, {inputDynamicShapes.front()}); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); paramShape = {paramOuts[0].get_partial_shape().size()}; std::shared_ptr in2, in3, in4; - auto blockShapeParam = std::make_shared(ngraph::element::i64, paramShape); + auto blockShapeParam = std::make_shared(ov::element::i64, paramShape); in2 = blockShapeParam; params.push_back(blockShapeParam); - auto padsBeginParam = std::make_shared(ngraph::element::i64, paramShape); + auto padsBeginParam = std::make_shared(ov::element::i64, paramShape); in3 = padsBeginParam; params.push_back(padsBeginParam); - auto padsEndParam = std::make_shared(ngraph::element::i64, paramShape); + auto padsEndParam = std::make_shared(ov::element::i64, paramShape); in4 = padsEndParam; params.push_back(padsEndParam); - auto s2b = std::make_shared(paramOuts[0], in2, in3, in4); + auto s2b = std::make_shared(paramOuts[0], in2, in3, in4); function = makeNgraphFunction(inType, params, s2b, "SpaceToBatchCPU"); } }; @@ -287,6 +286,19 @@ INSTANTIATE_TEST_SUITE_P(smoke_DynamicSpaceToBatchCPULayerTestCase2_4D, SpaceToB INSTANTIATE_TEST_SUITE_P(smoke_DynamicSpaceToBatchCPULayerTestCaseWithBlocked2_4D, SpaceToBatchCPULayerTest, dynamicSpaceToBatchParamsWithBlockedSet4D2, SpaceToBatchCPULayerTest::getTestCaseName); +std::vector> staticInputShapes4DPE = { + {{1, 2, 9, 1}, {4}, {4}, {4}} +}; +INSTANTIATE_TEST_SUITE_P(smoke_StaticSpaceToBatch_4D_parallel_block_edge, SpaceToBatchCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(staticInputShapes4DPE)), + ::testing::Values(std::vector{1, 4, 3, 1}), + ::testing::Values(std::vector{0, 1, 2, 0}), + ::testing::Values(std::vector{0, 1, 4, 0}), + ::testing::Values(Precision::FP32), + ::testing::Values(CPUSpecificParams{})), + SpaceToBatchCPULayerTest::getTestCaseName); + const std::vector> blockShape5D = {{1, 1, 2, 2, 1}, {1, 2, 4, 1, 3}}; const std::vector> padsBegin5D = {{0, 0, 0, 0, 0}, {0, 0, 4, 0, 0}, {0, 0, 0, 2, 3}}; const std::vector> padsEnd5D = {{0, 0, 0, 0, 0}, {0, 0, 0, 4, 3}, {0, 0, 4, 2, 3}}; diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py index 37edd236794..7bb5998f182 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py @@ -40,10 +40,8 @@ class TestSpaceToBatch(CommonTFLayerTest): test_data_4D = [ dict(in_shape=[1, 2, 2, 3], block_shape_value=[2, 2], pads_value=[[0, 0], [0, 0]]), - pytest.param(dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]]), - marks=pytest.mark.skip(reason="107967")), - pytest.param(dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]]), - marks=pytest.mark.skip(reason="107967")), + dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]]), + dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]]) ] @pytest.mark.parametrize("params", test_data_4D)