[CPU] Converted Pad Layer to MKLDNNNode and added more optimized implementations (#2586)

Pad-1: fixed ngraph reference for symmetric mode and added cpu tests
This commit is contained in:
Alexandra Sidorova 2020-11-12 14:25:42 +03:00 committed by GitHub
parent be3df1837c
commit a8e581ff8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 319 additions and 261 deletions

View File

@ -23,6 +23,7 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_input_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_lrn_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pad_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_permute_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_quantize_node.cpp
@ -64,7 +65,6 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/log_softmax.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/math.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/one_hot.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/pad.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/powerfile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorbox.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorbox_clustered.cpp

View File

@ -31,6 +31,7 @@
#include <nodes/mkldnn_softmax_node.h>
#include <nodes/mkldnn_tile_node.h>
#include <nodes/mkldnn_split_node.h>
#include <nodes/mkldnn_pad_node.h>
#include <nodes/mkldnn_permute_node.h>
#include <nodes/mkldnn_memory_node.hpp>
#include <nodes/mkldnn_rnn.h>
@ -103,6 +104,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
{ "ROIPooling", ROIPooling },
{ "BatchNormalization", BatchNormalization },
{ "Flatten", Flatten },
{ "Pad", Pad },
{ "Permute", Permute },
{ "Copy", Copy },
{ "LSTMCell", RNNCell },

View File

@ -53,6 +53,7 @@ enum Type {
ROIPooling,
BatchNormalization,
Flatten,
Pad,
Permute,
Copy,
MemoryOutput,
@ -133,6 +134,8 @@ static std::string NameFromType(Type type) {
return "BatchNormalization";
case Flatten:
return "Flatten";
case Pad:
return "Pad";
case Permute:
return "Permute";
case Copy:

View File

@ -68,7 +68,6 @@ MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
MKLDNN_EXTENSION_NODE(PadImpl, Pad);
MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);

View File

@ -0,0 +1,255 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mkldnn_pad_node.h"
#include <legacy/ie_layers.h>
#include <string>
#include <cmath>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include <limits>
#include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
MKLDNNPadNode::MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(layer, eng, cache) {}
void MKLDNNPadNode::getSupportedDescriptors() {
auto* padLayer = dynamic_cast<PadLayer*>(getCnnLayer().get());
if (padLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot convert Pad layer.";
padsBegin = padLayer->GetParamAsUInts("pads_begin");
padsEnd = padLayer->GetParamAsUInts("pads_end");
SizeVector srcDims = padLayer->insData[0].lock()->getTensorDesc().getDims();
SizeVector dstDims = padLayer->outData[0]->getTensorDesc().getDims();
if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size())
THROW_IE_EXCEPTION << padLayer->name << " Incorrect number of input/output dimensions!";
std::string pad_mode = padLayer->GetParamAsString("pad_mode");
if (pad_mode == "constant") {
padMode = CONSTANT;
padValue = padLayer->GetParamAsFloat("pad_value", 0.f);
} else if (pad_mode == "edge") {
padMode = EDGE;
} else if (pad_mode == "reflect") {
padMode = REFLECT;
for (size_t i = 0; i < srcDims.size(); i++) {
if ((srcDims[i] - 1) < padsBegin[i] || (srcDims[i] - 1) < padsEnd[i])
THROW_IE_EXCEPTION << padLayer->name << " Incorrect padsBegin or padsEnd for 'reflect' pad mode";
}
} else if (pad_mode == "symmetric") {
padMode = SYMMETRIC;
for (size_t i = 0; i < srcDims.size(); i++) {
if (srcDims[i] < padsBegin[i] || srcDims[i] < padsEnd[i])
THROW_IE_EXCEPTION << padLayer->name << " Incorrect padsBegin or padsEnd for 'symmetric' pad mode";
}
} else {
THROW_IE_EXCEPTION << padLayer->name
<< " Incorrect pad_mode. Only constants|edge|reflect|symmetric modes are supported!";
}
if (getParentEdges().size() != 1)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
if (getChildEdges().empty())
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
}
void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
auto srcDims = getParentEdgeAt(0)->getDims();
InferenceEngine::LayerConfig config;
config.dynBatchSupport = false;
config.inConfs.resize(1);
config.outConfs.resize(1);
config.inConfs[0].inPlace = -1;
config.inConfs[0].constant = false;
config.outConfs[0].inPlace = -1;
config.outConfs[0].constant = false;
auto memoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, memoryFormat);
config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, memoryFormat);
supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memoryFormat});
}
void MKLDNNPadNode::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims();
SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims();
params.srcStrides = getParentEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getStrides();
params.dstStrides = getChildEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getStrides();
params.srcODims.resize(srcDims.size());
params.padDims.resize(padsBegin.size());
for (size_t i = 0; i < srcDims.size(); i++) {
params.srcODims[i] = srcDims[i] + padsBegin[i];
params.padDims[i] = padsBegin[i] + padsEnd[i];
params.padPointsNum += padsBegin[i] + padsEnd[i];
}
}
void MKLDNNPadNode::execute(mkldnn::stream strm) {
const float *srcData = getParentEdgeAt(0)->getBlob()->cbuffer().as<const float*>() +
getParentEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getOffsetPadding();
float* dstData = getChildEdgeAt(0)->getBlob()->buffer().as<float*>() +
getChildEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getOffsetPadding();
auto srcDims = getParentEdgeAt(0)->getDims().ToSizeVector();
auto dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
switch (padMode) {
case CONSTANT:
padConstantOrEdge(srcData, dstData, srcDims, dstDims);
break;
case EDGE:
padConstantOrEdge(srcData, dstData, srcDims, dstDims, true);
break;
case REFLECT:
padReflectOrSymmetric(srcData, dstData, srcDims, dstDims);
break;
case SYMMETRIC:
padReflectOrSymmetric(srcData, dstData, srcDims, dstDims, true);
break;
}
}
inline size_t parallel_init(size_t start, size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
for (int j = size - 1; j >= 0; j--) {
counters[j] = start % dims[j];
start = start / dims[j];
}
return start;
}
inline void parallel_step(size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
for (int j = size - 1; j >= 0; j--) {
counters[j] = (counters[j] + 1) % dims[j];
if (counters[j] != 0)
return;
}
}
void MKLDNNPadNode::padConstantOrEdge(const float* srcData, float* dstData, SizeVector srcDims, SizeVector dstDims,
const bool isEdge) {
size_t dimsSize_1 = dstDims.size() - 1;
size_t inputSV = dstDims[dimsSize_1];
size_t workAmountDst = getWorkAmountDst();
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
SizeVector counters(dimsSize_1, 0);
splitter(workAmountDst, nthr, ithr, start, end);
parallel_init(start, dimsSize_1, counters, dstDims);
int dstIdx = 0;
for (size_t i = 0; i < dimsSize_1; ++i)
dstIdx += counters[i] * params.dstStrides[i];
for (size_t iwork = start; iwork < end; ++iwork, dstIdx += inputSV) {
if (!isEdge) {
size_t j;
for (j = 0; j < dimsSize_1; ++j) {
if ((counters[j] < padsBegin[j]) || (counters[j] >= params.srcODims[j]))
break;
}
if (j != dimsSize_1) {
std::fill_n(&dstData[dstIdx], dstDims[dimsSize_1], padValue);
parallel_step(dimsSize_1, counters, dstDims);
continue;
}
}
int srcIdx = 0;
for (size_t i = 0; i < dimsSize_1; ++i) {
int idx = (counters[i] < padsBegin[i]) ? 0 :
((counters[i] >= params.srcODims[i]) ? (srcDims[i] - 1) : (counters[i] - padsBegin[i]));
srcIdx += idx * params.srcStrides[i];
}
std::fill_n(&dstData[dstIdx], padsBegin[dimsSize_1],
isEdge ? srcData[srcIdx] : padValue);
cpu_memcpy(&dstData[dstIdx + padsBegin[dimsSize_1]], &srcData[srcIdx],
srcDims[dimsSize_1] * sizeof(float));
std::fill_n(&dstData[dstIdx + params.srcODims[dimsSize_1]], padsEnd[dimsSize_1],
isEdge ? srcData[srcIdx + srcDims[dimsSize_1] - 1] : padValue);
parallel_step(dimsSize_1, counters, dstDims);
}
});
}
void MKLDNNPadNode::padReflectOrSymmetric(const float *srcData, float* dstData, SizeVector srcDims, SizeVector dstDims,
const bool isSymmetric) {
int shift = isSymmetric ? 1 : 0;
SizeVector src_2;
for (size_t i = 0; i < srcDims.size(); i++)
src_2.push_back(srcDims[i] + params.srcODims[i] - 2 + shift);
size_t dimsSize_1 = dstDims.size() - 1;
size_t inputSV = dstDims[dimsSize_1];
size_t workAmountDst = getWorkAmountDst();
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
SizeVector counters(dimsSize_1, 0);
splitter(workAmountDst, nthr, ithr, start, end);
parallel_init(start, dimsSize_1, counters, dstDims);
int dstIdx = 0;
for (size_t i = 0; i < dimsSize_1; ++i)
dstIdx += counters[i] * params.dstStrides[i];
for (size_t iwork = start; iwork < end; ++iwork, dstIdx += inputSV) {
int srcIdx = 0;
for (size_t i = 0; i < dimsSize_1; ++i) {
int idx = (counters[i] < padsBegin[i]) ? (padsBegin[i] - counters[i] - shift) :
((counters[i] >= params.srcODims[i]) ? (src_2[i] - counters[i]) : (counters[i] - padsBegin[i]));
srcIdx += idx * params.srcStrides[i];
}
for (size_t i = 0; i < padsBegin[dimsSize_1]; ++i)
dstData[dstIdx + i] = srcData[srcIdx + padsBegin[dimsSize_1] - i - shift];
cpu_memcpy(&dstData[dstIdx + padsBegin[dimsSize_1]], &srcData[srcIdx],
sizeof(float) * srcDims[dimsSize_1]);
for (size_t i = params.srcODims[dimsSize_1]; i < dstDims[dimsSize_1]; ++i)
dstData[dstIdx + i] = srcData[srcIdx + src_2[dimsSize_1] - i];
parallel_step(dimsSize_1, counters, dstDims);
}
});
}
size_t MKLDNNPadNode::getWorkAmountDst() const {
auto dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
return params.dstStrides[0] * dstDims[0] / dstDims[dstDims.size() - 1];
}
bool MKLDNNPadNode::created() const {
return getType() == Pad;
}
REG_MKLDNN_PRIM_FOR(MKLDNNPadNode, Pad);

View File

@ -0,0 +1,55 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include <mkldnn_node.h>
#include <string>
namespace MKLDNNPlugin {
class MKLDNNPadNode : public MKLDNNNode {
public:
MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNPadNode() override = default;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
private:
enum PadMode {
CONSTANT = 0,
EDGE = 1,
REFLECT = 2,
SYMMETRIC = 3
};
void padConstantOrEdge(const float *srcData, float* dstData,
InferenceEngine::SizeVector srcDims, InferenceEngine::SizeVector dstDims,
const bool isEdge = false);
void padReflectOrSymmetric(const float *srcData, float* dstData,
InferenceEngine::SizeVector srcDims, InferenceEngine::SizeVector dstDims,
const bool isSymmetric = false);
size_t getWorkAmountDst() const;
PadMode padMode = CONSTANT;
float padValue = 0.f;
std::vector<unsigned int> padsBegin;
std::vector<unsigned int> padsEnd;
struct {
InferenceEngine::SizeVector srcODims;
InferenceEngine::SizeVector srcStrides;
InferenceEngine::SizeVector dstStrides;
size_t padPointsNum = 0;
InferenceEngine::SizeVector padDims;
} params;
};
} // namespace MKLDNNPlugin

View File

@ -1,257 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "base.hpp"
#include <cmath>
#include <string>
#include <vector>
#include <cassert>
#include "ie_parallel.hpp"
namespace InferenceEngine {
namespace Extensions {
namespace Cpu {
class PadImpl: public ExtLayerBase {
public:
explicit PadImpl(const CNNLayer* layer) {
try {
if (layer->insData.empty() || layer->outData.empty())
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
pads_begin = layer->GetParamAsUInts("pads_begin");
std::vector<unsigned int> pads_end = layer->GetParamAsUInts("pads_end");
src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
dst_dims = layer->outData[0]->getTensorDesc().getDims();
if (src_dims.size() != dst_dims.size() || pads_begin.size() != src_dims.size())
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
std::string pad_mode = layer->GetParamAsString("pad_mode");
if (pad_mode == "constant") {
padMode = CONSTANT;
} else if (pad_mode == "edge") {
padMode = EDGE;
} else if (pad_mode == "reflect") {
padMode = REFLECT;
for (size_t i = 0; i < src_dims.size(); i++) {
if ((src_dims[i] - 1) < pads_begin[i] || (src_dims[i] - 1) < pads_end[i])
THROW_IE_EXCEPTION << layer->name << " Incorrect pads_begin or pads_end for 'reflect' pad mode";
}
} else if (pad_mode == "symmetric") {
padMode = SYMMETRIC;
for (size_t i = 0; i < src_dims.size(); i++) {
if (src_dims[i] < pads_begin[i] || src_dims[i] < pads_end[i])
THROW_IE_EXCEPTION << layer->name << " Incorrect pads_begin or pads_end for 'symmetric' pad mode";
}
} else {
THROW_IE_EXCEPTION << layer->name
<< " Incorrect pad_mode. Only constants|edge|reflect|symmetric modes are supported!";
}
if (padMode == CONSTANT)
pad_value = layer->GetParamAsFloat("pad_value", 0.f);
srcStrides = layer->insData[0].lock()->getTensorDesc().getBlockingDesc().getStrides();
dstStrides = layer->outData[0]->getTensorDesc().getBlockingDesc().getStrides();
work_amount = dst_dims[0] * dstStrides[0];
for (size_t i = 0; i < src_dims.size(); i++)
src_o_dms.push_back(src_dims[i] + pads_begin[i]);
addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
}
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
const float *src_data = inputs[0]->cbuffer().as<const float *>() +
inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
float* dst_data = outputs[0]->cbuffer().as<float *>() +
outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
switch (padMode) {
case CONSTANT:
pad_constant(src_data, dst_data);
break;
case EDGE:
pad_edge(src_data, dst_data);
break;
case REFLECT:
pad_reflect(src_data, dst_data);
break;
case SYMMETRIC:
pad_symmetric(src_data, dst_data);
break;
default:
return GENERAL_ERROR;
}
return OK;
}
private:
enum PadMode {
CONSTANT = 0,
EDGE = 1,
REFLECT = 2,
SYMMETRIC = 3
};
void pad_constant(const float *src_data, float* dst_data);
void pad_edge(const float *src_data, float* dst_data);
void pad_reflect(const float *src_data, float* dst_data);
void pad_symmetric(const float *src_data, float* dst_data);
PadMode padMode = CONSTANT;
float pad_value = 0.f;
SizeVector src_dims;
SizeVector dst_dims;
std::vector<unsigned int> pads_begin;
SizeVector src_o_dms;
SizeVector srcStrides;
SizeVector dstStrides;
size_t work_amount;
};
inline size_t parallel_init(size_t start, size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
for (int j = size - 1; j >= 0; j--) {
counters[j] = start % dims[j];
start = start / dims[j];
}
return start;
}
inline void parallel_step(size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
for (int j = size - 1; j >= 0; j--) {
counters[j] = (counters[j] + 1) % dims[j];
if (counters[j] != 0)
return;
}
}
void PadImpl::pad_constant(const float *src_data, float* dst_data) {
int offset = 0;
for (size_t i = 0; i < srcStrides.size(); ++i)
offset += pads_begin[i] * srcStrides[i];
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
SizeVector counters(dst_dims.size(), 0);
splitter(work_amount, nthr, ithr, start, end);
parallel_init(start, dst_dims.size(), counters, dst_dims);
for (size_t iwork = start; iwork < end; ++iwork) {
int srcIdx = 1;
int dstIdx = 0;
for (size_t i = 0; i < dstStrides.size(); ++i)
dstIdx += counters[i] * dstStrides[i];
for (size_t i = 0; i < counters.size(); ++i) {
if (counters[i] < pads_begin[i] || counters[i] >= src_o_dms[i]) {
dst_data[dstIdx] = pad_value;
srcIdx = 0;
break;
}
}
if (srcIdx) {
int srcIdx = 0;
for (size_t i = 0; i < srcStrides.size(); ++i)
srcIdx += counters[i] * srcStrides[i];
dst_data[dstIdx] = src_data[srcIdx - offset];
}
parallel_step(dst_dims.size(), counters, dst_dims);
}
});
}
void PadImpl::pad_edge(const float *src_data, float* dst_data) {
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
SizeVector counters(dst_dims.size(), 0);
splitter(work_amount, nthr, ithr, start, end);
parallel_init(start, dst_dims.size(), counters, dst_dims);
for (size_t iwork = start; iwork < end; ++iwork) {
int srcIdx = 0;
int dstIdx = 0;
for (size_t i = 0; i < dstStrides.size(); ++i)
dstIdx += counters[i] * dstStrides[i];
for (size_t i = 0; i < srcStrides.size(); ++i) {
int idx = (counters[i] < pads_begin[i]) ? 0 :
((counters[i] >= src_o_dms[i]) ? (src_dims[i] - 1) : (counters[i] - pads_begin[i]));
srcIdx += idx * srcStrides[i];
}
dst_data[dstIdx] = src_data[srcIdx];
parallel_step(dst_dims.size(), counters, dst_dims);
}
});
}
void PadImpl::pad_reflect(const float *src_data, float* dst_data) {
SizeVector src_2;
for (size_t i = 0; i < src_dims.size(); i++)
src_2.push_back(src_dims[i] + src_o_dms[i] - 2);
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
SizeVector counters(dst_dims.size(), 0);
splitter(work_amount, nthr, ithr, start, end);
parallel_init(start, dst_dims.size(), counters, dst_dims);
for (size_t iwork = start; iwork < end; ++iwork) {
int srcIdx = 0;
int dstIdx = 0;
for (size_t i = 0; i < dstStrides.size(); ++i)
dstIdx += counters[i] * dstStrides[i];
for (size_t i = 0; i < srcStrides.size(); ++i) {
int idx = (counters[i] < pads_begin[i]) ? (pads_begin[i] - counters[i]) :
((counters[i] >= src_o_dms[i]) ? (src_2[i] - counters[i]) : (counters[i] - pads_begin[i]));
srcIdx += idx * srcStrides[i];
}
dst_data[dstIdx] = src_data[srcIdx];
parallel_step(dst_dims.size(), counters, dst_dims);
}
});
}
void PadImpl::pad_symmetric(const float *src_data, float* dst_data) {
SizeVector src_2;
for (size_t i = 0; i < src_dims.size(); i++)
src_2.push_back(src_dims[i] + src_o_dms[i] - 1);
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
SizeVector counters(dst_dims.size(), 0);
splitter(work_amount, nthr, ithr, start, end);
parallel_init(start, dst_dims.size(), counters, dst_dims);
for (size_t iwork = start; iwork < end; ++iwork) {
int srcIdx = 0;
int dstIdx = 0;
for (size_t i = 0; i < dstStrides.size(); ++i)
dstIdx += counters[i] * dstStrides[i];
for (size_t i = 0; i < srcStrides.size(); ++i) {
int idx = (counters[i] < pads_begin[i]) ? (pads_begin[i] - 1 - counters[i]) :
((counters[i] >= src_o_dms[i]) ? (src_2[i] - counters[i]) : (counters[i] - pads_begin[i]));
srcIdx += idx * srcStrides[i];
}
dst_data[dstIdx] = src_data[srcIdx];
parallel_step(dst_dims.size(), counters, dst_dims);
}
});
}
REG_FACTORY_FOR(PadImpl, Pad);
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine

View File

@ -21,6 +21,7 @@ const std::vector<float> argPadValue = {0.f, 1.f, 2.f, -1.f};
const std::vector<ngraph::helpers::PadMode> padMode = {
ngraph::helpers::PadMode::EDGE,
ngraph::helpers::PadMode::REFLECT,
ngraph::helpers::PadMode::SYMMETRIC
};
const auto pad2DConstparams = testing::Combine(

View File

@ -191,8 +191,8 @@ namespace ngraph
}
else
{
c[i] = static_cast<size_t>(padding_below[i] + src_dim +
padding_above[i] - pos);
c[i] = static_cast<size_t>(2 * (padding_below[i] + src_dim) -
c[i] - 1);
}
}
}