From b47ca664c170bbbabd08cd9826dbddf9934aa455 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 26 Mar 2021 16:28:32 +0300 Subject: [PATCH] [CPU] Fixed Pad 1D case (#4828) --- .../mkldnn_plugin/nodes/mkldnn_pad_node.cpp | 60 ++++++++++--------- .../src/mkldnn_plugin/nodes/mkldnn_pad_node.h | 1 + .../single_layer_tests/pad.cpp | 48 ++++++++++++++- ngraph/test/runtime/ie/unit_test.manifest | 9 --- 4 files changed, 80 insertions(+), 38 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp index baeb2c1bc66..3c9d9e141e5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp @@ -174,9 +174,10 @@ void MKLDNNPadNode::createPrimitive() { } } - size_t nGluingLastDims = params.dstStrides[std::max(endIdx - 1, 0)]; - params.nDimsForWork = std::max(endIdx - std::max(beginIdx, 0), 1); - params.workAmount = params.dstDims[0]; + params.lastDstDim = params.dstStrides[std::max(endIdx - 1, 0)]; + params.nDimsForWork = endIdx - std::max(beginIdx, 0); + params.nThreads = params.nDimsForWork > 0 ? 0 : 1; + params.workAmount = params.nDimsForWork > 0 ? params.dstDims[0] : 1lu; for (int i = 1; i <= beginIdx; ++i) { params.workAmount *= params.dstDims[i]; params.dstDims[0] *= params.dstDims[i]; @@ -194,9 +195,8 @@ void MKLDNNPadNode::createPrimitive() { padsBegin.erase(padsBegin.begin() + 1, padsBegin.begin() + beginIdx); padsEnd.erase(padsEnd.begin() + 1, padsEnd.begin() + beginIdx); } - params.workAmount = params.workAmount * params.dstStrides[0] / nGluingLastDims; - params.lastDstDim = nGluingLastDims; + params.workAmount = params.workAmount * params.dstStrides[0] / params.lastDstDim; params.shift = params.dstStrides[params.nDimsForWork]; if (padMode != CONSTANT || (padMode == CONSTANT && padValue == 0)) { params.lastDstDim *= params.sizeData; @@ -266,11 +266,15 @@ void MKLDNNPadNode::padConstant() { template void MKLDNNPadNode::padConstantCommon() { - T* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const T* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); T* dstData = reinterpret_cast(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - T value = static_cast(padValue); + const T value = static_cast(padValue); - parallel_nt(0, [&](const int ithr, const int nthr) { + const size_t beginShift = padsBegin[params.nDimsForWork] * params.shift; + const size_t copySize = params.srcDims[params.nDimsForWork] * params.shift; + const size_t endShift = padsEnd[params.nDimsForWork] * params.shift; + + parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; SizeVector indexes(params.nDimsForWork, 0); splitter(params.workAmount, nthr, ithr, start, end); @@ -296,11 +300,9 @@ void MKLDNNPadNode::padConstantCommon() { for (size_t idx = 0; idx < params.nDimsForWork; ++idx) srcIdx += (indexes[idx] - padsBegin[idx]) * params.srcStrides[idx]; - std::fill_n(&dstData[dstIdx], padsBegin[params.nDimsForWork] * params.shift, value); - cpu_memcpy(&dstData[dstIdx + padsBegin[params.nDimsForWork] * params.shift], &srcData[srcIdx], - params.srcDims[params.nDimsForWork] * params.shift * params.sizeData); - std::fill_n(&dstData[dstIdx + params.srcODims[params.nDimsForWork] * params.shift], - padsEnd[params.nDimsForWork] * params.shift, value); + std::fill_n(&dstData[dstIdx], beginShift, value); + cpu_memcpy(&dstData[dstIdx + beginShift], &srcData[srcIdx], copySize * params.sizeData); + std::fill_n(&dstData[dstIdx + beginShift + copySize], endShift, value); parallel_step(params.nDimsForWork, params.dstDims, indexes); } @@ -308,10 +310,14 @@ void MKLDNNPadNode::padConstantCommon() { } void MKLDNNPadNode::padConstantZero() { - uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); uint8_t* dstData = reinterpret_cast(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - parallel_nt(0, [&](const int ithr, const int nthr) { + const size_t beginShift = padsBegin[params.nDimsForWork] * params.shift; + const size_t copySize = params.srcDims[params.nDimsForWork] * params.shift; + const size_t endShift = padsEnd[params.nDimsForWork] * params.shift; + + parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; SizeVector indexes(params.nDimsForWork, 0); splitter(params.workAmount, nthr, ithr, start, end); @@ -338,10 +344,9 @@ void MKLDNNPadNode::padConstantZero() { srcIdx += (indexes[idx] - padsBegin[idx]) * params.srcStrides[idx]; srcIdx *= params.sizeData; - memset(&dstData[dstIdx], 0, padsBegin[params.nDimsForWork] * params.shift); - cpu_memcpy(&dstData[dstIdx + padsBegin[params.nDimsForWork] * params.shift], &srcData[srcIdx], - params.srcDims[params.nDimsForWork] * params.shift); - memset(&dstData[dstIdx + params.srcODims[params.nDimsForWork] * params.shift], 0, padsEnd[params.nDimsForWork] * params.shift); + memset(&dstData[dstIdx], 0, beginShift); + cpu_memcpy(&dstData[dstIdx + beginShift], &srcData[srcIdx], copySize); + memset(&dstData[dstIdx + beginShift + copySize], 0, endShift); parallel_step(params.nDimsForWork, params.dstDims, indexes); } @@ -349,10 +354,13 @@ void MKLDNNPadNode::padConstantZero() { } void MKLDNNPadNode::padEdge() { - uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); uint8_t* dstData = reinterpret_cast(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - parallel_nt(0, [&](const int ithr, const int nthr) { + const size_t beginShift = padsBegin[params.nDimsForWork] * params.shift; + const size_t copySize = params.srcDims[params.nDimsForWork] * params.shift; + + parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; SizeVector indexes(params.nDimsForWork, 0); splitter(params.workAmount, nthr, ithr, start, end); @@ -373,11 +381,10 @@ void MKLDNNPadNode::padEdge() { for (size_t i = 0; i < padsBegin[params.nDimsForWork]; ++i) cpu_memcpy(&dstData[dstIdx + i * params.shift], &srcData[srcIdx], params.shift); - cpu_memcpy(&dstData[dstIdx + padsBegin[params.nDimsForWork] * params.shift], &srcData[srcIdx], - params.srcDims[params.nDimsForWork] * params.shift); + cpu_memcpy(&dstData[dstIdx + beginShift], &srcData[srcIdx], copySize); for (size_t i = 0; i < padsEnd[params.nDimsForWork]; ++i) - cpu_memcpy(&dstData[dstIdx + params.srcODims[params.nDimsForWork] * params.shift + i * params.shift], + cpu_memcpy(&dstData[dstIdx + beginShift + copySize + i * params.shift], &srcData[srcIdx + (params.srcDims[params.nDimsForWork] - 1) * params.shift], params.shift); parallel_step(params.nDimsForWork, params.dstDims, indexes); @@ -386,12 +393,11 @@ void MKLDNNPadNode::padEdge() { } void MKLDNNPadNode::padReflectOrSymmetric(const bool isSymmetric) { - uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); uint8_t* dstData = reinterpret_cast(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - size_t shift = isSymmetric ? 1 : 0; - parallel_nt(0, [&](const int ithr, const int nthr) { + parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; SizeVector indexes(params.nDimsForWork, 0); splitter(params.workAmount, nthr, ithr, start, end); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h index c3a34f629fd..1c598e497d0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h @@ -49,6 +49,7 @@ private: InferenceEngine::SizeVector srcStrides; InferenceEngine::SizeVector dstStrides; InferenceEngine::SizeVector srcDimsForReflectOrSymmetric; + int nThreads = 0; size_t nDimsForWork = 0lu; size_t workAmount = 0lu; size_t lastDstDim = 1lu; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp index 93d7f430c05..0a760ed69e5 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp @@ -19,8 +19,6 @@ const std::vector netPrecisions = { InferenceEngine::Precision::U8, }; -const std::vector> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}}; -const std::vector> padsEnd2D = {{0, 0}, {1, 1}, {0, 1}, {3, 2}}; const std::vector argPadValue = {0.f, 1.f, -1.f, 2.5f}; const std::vector padMode = { @@ -29,6 +27,52 @@ const std::vector padMode = { ngraph::helpers::PadMode::SYMMETRIC }; +const std::vector> padsBegin1D = {{0}, {1}, {2}}; +const std::vector> padsEnd1D = {{0}, {1}, {2}}; + +const auto pad1DConstparams = testing::Combine( + testing::ValuesIn(padsBegin1D), + testing::ValuesIn(padsEnd1D), + testing::ValuesIn(argPadValue), + testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(std::vector{5}), + testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P( + smoke_Pad1DConst, + PadLayerTest, + pad1DConstparams, + PadLayerTest::getTestCaseName +); + +const auto pad1Dparams = testing::Combine( + testing::ValuesIn(padsBegin1D), + testing::ValuesIn(padsEnd1D), + testing::Values(0), + testing::ValuesIn(padMode), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(std::vector{5}), + testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P( + smoke_Pad1D, + PadLayerTest, + pad1Dparams, + PadLayerTest::getTestCaseName +); + +const std::vector> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}}; +const std::vector> padsEnd2D = {{0, 0}, {1, 1}, {0, 1}, {3, 2}}; + const auto pad2DConstparams = testing::Combine( testing::ValuesIn(padsBegin2D), testing::ValuesIn(padsEnd2D), diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index f9592326928..98223013a35 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -747,33 +747,24 @@ dyn_convolution_backprop_data dyn_convolution_backprop_filter # Pad Pad_524448 with not constant pad_value is not allowed -pad_exterior_1d pad_negative_exterior_1d pad_negative_exterior_1d_check_limits -pad_edge_1d pad_edge_1d_top_neg pad_edge_1d_top_neg_bigger_than_tensor pad_edge_1d_bottom_neg pad_edge_1d_bottom_neg_bigger_than_tensor -pad_edge_2d pad_edge_2d_with_neg -pad_reflect_1d pad_reflect_1d_top_neg pad_reflect_1d_top_neg_bigger_than_tensor pad_reflect_1d_bottom_neg pad_reflect_1d_bottom_neg_bigger_than_tensor pad_reflect_1d_multi_reflect -pad_reflect_2d pad_reflect_2d_with_neg pad_negative_exterior_2d pad_negative_exterior_2d_all_negative pad_exterior_4d_1x2x2x2 pad_negative_exterior_4d pad_2channel_2image_asym -pad_symmetric - - -IE_CPU/PadBackendTest.PadBackendTestForSpec # LRN operation should be converted to LRN_IE lrn_across_h