From b47ca664c170bbbabd08cd9826dbddf9934aa455 Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Fri, 26 Mar 2021 16:28:32 +0300
Subject: [PATCH] [CPU] Fixed Pad 1D case (#4828)

---
 .../mkldnn_plugin/nodes/mkldnn_pad_node.cpp   | 60 ++++++++++---------
 .../src/mkldnn_plugin/nodes/mkldnn_pad_node.h |  1 +
 .../single_layer_tests/pad.cpp                | 48 ++++++++++++++-
 ngraph/test/runtime/ie/unit_test.manifest     |  9 ---
 4 files changed, 80 insertions(+), 38 deletions(-)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
index baeb2c1bc66..3c9d9e141e5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
@@ -174,9 +174,10 @@ void MKLDNNPadNode::createPrimitive() {
         }
     }
 
-    size_t nGluingLastDims = params.dstStrides[std::max(endIdx - 1, 0)];
-    params.nDimsForWork = std::max(endIdx - std::max(beginIdx, 0), 1);
-    params.workAmount = params.dstDims[0];
+    params.lastDstDim = params.dstStrides[std::max(endIdx - 1, 0)];
+    params.nDimsForWork = endIdx - std::max(beginIdx, 0);
+    params.nThreads = params.nDimsForWork > 0 ? 0 : 1;
+    params.workAmount = params.nDimsForWork > 0 ? params.dstDims[0] : 1lu;
     for (int i = 1; i <= beginIdx; ++i) {
         params.workAmount *= params.dstDims[i];
         params.dstDims[0] *= params.dstDims[i];
@@ -194,9 +195,8 @@ void MKLDNNPadNode::createPrimitive() {
         padsBegin.erase(padsBegin.begin() + 1, padsBegin.begin() + beginIdx);
         padsEnd.erase(padsEnd.begin() + 1, padsEnd.begin() + beginIdx);
     }
-    params.workAmount = params.workAmount * params.dstStrides[0] / nGluingLastDims;
 
-    params.lastDstDim = nGluingLastDims;
+    params.workAmount = params.workAmount * params.dstStrides[0] / params.lastDstDim;
     params.shift = params.dstStrides[params.nDimsForWork];
     if (padMode != CONSTANT || (padMode == CONSTANT && padValue == 0)) {
         params.lastDstDim *= params.sizeData;
@@ -266,11 +266,15 @@ void MKLDNNPadNode::padConstant() {
 
 template<typename T>
 void MKLDNNPadNode::padConstantCommon() {
-    T* srcData = reinterpret_cast<T*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const T* srcData = reinterpret_cast<const T*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     T* dstData = reinterpret_cast<T*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
-    T value = static_cast<T>(padValue);
+    const T value = static_cast<T>(padValue);
 
-    parallel_nt(0, [&](const int ithr, const int nthr) {
+    const size_t beginShift = padsBegin[params.nDimsForWork] * params.shift;
+    const size_t copySize = params.srcDims[params.nDimsForWork] * params.shift;
+    const size_t endShift = padsEnd[params.nDimsForWork] * params.shift;
+
+    parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
         size_t start = 0, end = 0;
         SizeVector indexes(params.nDimsForWork, 0);
         splitter(params.workAmount, nthr, ithr, start, end);
@@ -296,11 +300,9 @@ void MKLDNNPadNode::padConstantCommon() {
             for (size_t idx = 0; idx < params.nDimsForWork; ++idx)
                 srcIdx += (indexes[idx] - padsBegin[idx]) * params.srcStrides[idx];
 
-            std::fill_n(&dstData[dstIdx], padsBegin[params.nDimsForWork] * params.shift, value);
-            cpu_memcpy(&dstData[dstIdx + padsBegin[params.nDimsForWork] * params.shift], &srcData[srcIdx],
-                       params.srcDims[params.nDimsForWork] * params.shift * params.sizeData);
-            std::fill_n(&dstData[dstIdx + params.srcODims[params.nDimsForWork] * params.shift],
-                        padsEnd[params.nDimsForWork] * params.shift, value);
+            std::fill_n(&dstData[dstIdx], beginShift, value);
+            cpu_memcpy(&dstData[dstIdx + beginShift], &srcData[srcIdx], copySize * params.sizeData);
+            std::fill_n(&dstData[dstIdx + beginShift + copySize], endShift, value);
 
             parallel_step(params.nDimsForWork, params.dstDims, indexes);
         }
@@ -308,10 +310,14 @@ void MKLDNNPadNode::padConstantCommon() {
 }
 
 void MKLDNNPadNode::padConstantZero() {
-    uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    parallel_nt(0, [&](const int ithr, const int nthr) {
+    const size_t beginShift = padsBegin[params.nDimsForWork] * params.shift;
+    const size_t copySize = params.srcDims[params.nDimsForWork] * params.shift;
+    const size_t endShift = padsEnd[params.nDimsForWork] * params.shift;
+
+    parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
         size_t start = 0, end = 0;
         SizeVector indexes(params.nDimsForWork, 0);
         splitter(params.workAmount, nthr, ithr, start, end);
@@ -338,10 +344,9 @@ void MKLDNNPadNode::padConstantZero() {
                 srcIdx += (indexes[idx] - padsBegin[idx]) * params.srcStrides[idx];
             srcIdx *= params.sizeData;
 
-            memset(&dstData[dstIdx], 0, padsBegin[params.nDimsForWork] * params.shift);
-            cpu_memcpy(&dstData[dstIdx + padsBegin[params.nDimsForWork] * params.shift], &srcData[srcIdx],
-                       params.srcDims[params.nDimsForWork] * params.shift);
-            memset(&dstData[dstIdx + params.srcODims[params.nDimsForWork] * params.shift], 0, padsEnd[params.nDimsForWork] * params.shift);
+            memset(&dstData[dstIdx], 0, beginShift);
+            cpu_memcpy(&dstData[dstIdx + beginShift], &srcData[srcIdx], copySize);
+            memset(&dstData[dstIdx + beginShift + copySize], 0, endShift);
 
             parallel_step(params.nDimsForWork, params.dstDims, indexes);
         }
@@ -349,10 +354,13 @@ void MKLDNNPadNode::padConstantZero() {
 }
 
 void MKLDNNPadNode::padEdge() {
-    uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    parallel_nt(0, [&](const int ithr, const int nthr) {
+    const size_t beginShift = padsBegin[params.nDimsForWork] * params.shift;
+    const size_t copySize = params.srcDims[params.nDimsForWork] * params.shift;
+
+    parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
         size_t start = 0, end = 0;
         SizeVector indexes(params.nDimsForWork, 0);
         splitter(params.workAmount, nthr, ithr, start, end);
@@ -373,11 +381,10 @@ void MKLDNNPadNode::padEdge() {
             for (size_t i = 0; i < padsBegin[params.nDimsForWork]; ++i)
                 cpu_memcpy(&dstData[dstIdx + i * params.shift], &srcData[srcIdx], params.shift);
 
-            cpu_memcpy(&dstData[dstIdx + padsBegin[params.nDimsForWork] * params.shift], &srcData[srcIdx],
-                       params.srcDims[params.nDimsForWork] * params.shift);
+            cpu_memcpy(&dstData[dstIdx + beginShift], &srcData[srcIdx], copySize);
 
             for (size_t i = 0; i < padsEnd[params.nDimsForWork]; ++i)
-                cpu_memcpy(&dstData[dstIdx + params.srcODims[params.nDimsForWork] * params.shift + i * params.shift],
+                cpu_memcpy(&dstData[dstIdx + beginShift + copySize + i * params.shift],
                            &srcData[srcIdx + (params.srcDims[params.nDimsForWork] - 1) * params.shift], params.shift);
 
             parallel_step(params.nDimsForWork, params.dstDims, indexes);
@@ -386,12 +393,11 @@ void MKLDNNPadNode::padEdge() {
 }
 
 void MKLDNNPadNode::padReflectOrSymmetric(const bool isSymmetric) {
-    uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
-
     size_t shift = isSymmetric ? 1 : 0;
 
-    parallel_nt(0, [&](const int ithr, const int nthr) {
+    parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
         size_t start = 0, end = 0;
         SizeVector indexes(params.nDimsForWork, 0);
         splitter(params.workAmount, nthr, ithr, start, end);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
index c3a34f629fd..1c598e497d0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
@@ -49,6 +49,7 @@ private:
         InferenceEngine::SizeVector srcStrides;
         InferenceEngine::SizeVector dstStrides;
         InferenceEngine::SizeVector srcDimsForReflectOrSymmetric;
+        int nThreads = 0;
         size_t nDimsForWork = 0lu;
         size_t workAmount = 0lu;
         size_t lastDstDim = 1lu;
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp
index 93d7f430c05..0a760ed69e5 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pad.cpp
@@ -19,8 +19,6 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::U8,
 };
 
-const std::vector<std::vector<int64_t>> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}};
-const std::vector<std::vector<int64_t>> padsEnd2D   = {{0, 0}, {1, 1}, {0, 1}, {3, 2}};
 const std::vector<float> argPadValue = {0.f, 1.f, -1.f, 2.5f};
 
 const std::vector<ngraph::helpers::PadMode> padMode = {
@@ -29,6 +27,52 @@ const std::vector<ngraph::helpers::PadMode> padMode = {
         ngraph::helpers::PadMode::SYMMETRIC
 };
 
+const std::vector<std::vector<int64_t>> padsBegin1D = {{0}, {1}, {2}};
+const std::vector<std::vector<int64_t>> padsEnd1D   = {{0}, {1}, {2}};
+
+const auto pad1DConstparams = testing::Combine(
+        testing::ValuesIn(padsBegin1D),
+        testing::ValuesIn(padsEnd1D),
+        testing::ValuesIn(argPadValue),
+        testing::Values(ngraph::helpers::PadMode::CONSTANT),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{5}),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Pad1DConst,
+        PadLayerTest,
+        pad1DConstparams,
+        PadLayerTest::getTestCaseName
+);
+
+const auto pad1Dparams = testing::Combine(
+        testing::ValuesIn(padsBegin1D),
+        testing::ValuesIn(padsEnd1D),
+        testing::Values(0),
+        testing::ValuesIn(padMode),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{5}),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Pad1D,
+        PadLayerTest,
+        pad1Dparams,
+        PadLayerTest::getTestCaseName
+);
+
+const std::vector<std::vector<int64_t>> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}};
+const std::vector<std::vector<int64_t>> padsEnd2D   = {{0, 0}, {1, 1}, {0, 1}, {3, 2}};
+
 const auto pad2DConstparams = testing::Combine(
         testing::ValuesIn(padsBegin2D),
         testing::ValuesIn(padsEnd2D),
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index f9592326928..98223013a35 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -747,33 +747,24 @@ dyn_convolution_backprop_data
 dyn_convolution_backprop_filter
 
 # Pad Pad_524448 with not constant pad_value is not allowed
-pad_exterior_1d
 pad_negative_exterior_1d
 pad_negative_exterior_1d_check_limits
-pad_edge_1d
 pad_edge_1d_top_neg
 pad_edge_1d_top_neg_bigger_than_tensor
 pad_edge_1d_bottom_neg
 pad_edge_1d_bottom_neg_bigger_than_tensor
-pad_edge_2d
 pad_edge_2d_with_neg
-pad_reflect_1d
 pad_reflect_1d_top_neg
 pad_reflect_1d_top_neg_bigger_than_tensor
 pad_reflect_1d_bottom_neg
 pad_reflect_1d_bottom_neg_bigger_than_tensor
 pad_reflect_1d_multi_reflect
-pad_reflect_2d
 pad_reflect_2d_with_neg
 pad_negative_exterior_2d
 pad_negative_exterior_2d_all_negative
 pad_exterior_4d_1x2x2x2
 pad_negative_exterior_4d
 pad_2channel_2image_asym
-pad_symmetric
-
-
-IE_CPU/PadBackendTest.PadBackendTestForSpec
 
 # LRN operation should be converted to LRN_IE
 lrn_across_h