[CPU] Converted Pad Layer to MKLDNNNode and added more optimized implementations (#2586)
Pad-1: fixed ngraph reference for symmetric mode and added cpu tests
This commit is contained in:
parent
be3df1837c
commit
a8e581ff8b
@ -23,6 +23,7 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_input_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_lrn_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pad_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_permute_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_quantize_node.cpp
|
||||
@ -64,7 +65,6 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/log_softmax.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/math.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/one_hot.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/pad.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/powerfile.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorbox.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorbox_clustered.cpp
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <nodes/mkldnn_softmax_node.h>
|
||||
#include <nodes/mkldnn_tile_node.h>
|
||||
#include <nodes/mkldnn_split_node.h>
|
||||
#include <nodes/mkldnn_pad_node.h>
|
||||
#include <nodes/mkldnn_permute_node.h>
|
||||
#include <nodes/mkldnn_memory_node.hpp>
|
||||
#include <nodes/mkldnn_rnn.h>
|
||||
@ -103,6 +104,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
|
||||
{ "ROIPooling", ROIPooling },
|
||||
{ "BatchNormalization", BatchNormalization },
|
||||
{ "Flatten", Flatten },
|
||||
{ "Pad", Pad },
|
||||
{ "Permute", Permute },
|
||||
{ "Copy", Copy },
|
||||
{ "LSTMCell", RNNCell },
|
||||
|
@ -53,6 +53,7 @@ enum Type {
|
||||
ROIPooling,
|
||||
BatchNormalization,
|
||||
Flatten,
|
||||
Pad,
|
||||
Permute,
|
||||
Copy,
|
||||
MemoryOutput,
|
||||
@ -133,6 +134,8 @@ static std::string NameFromType(Type type) {
|
||||
return "BatchNormalization";
|
||||
case Flatten:
|
||||
return "Flatten";
|
||||
case Pad:
|
||||
return "Pad";
|
||||
case Permute:
|
||||
return "Permute";
|
||||
case Copy:
|
||||
|
@ -68,7 +68,6 @@ MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
|
||||
MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
|
||||
MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
|
||||
MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
|
||||
MKLDNN_EXTENSION_NODE(PadImpl, Pad);
|
||||
MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
|
||||
MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
|
||||
MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
|
||||
|
255
inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
Normal file
255
inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
Normal file
@ -0,0 +1,255 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mkldnn_pad_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
MKLDNNPadNode::MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode(layer, eng, cache) {}
|
||||
|
||||
void MKLDNNPadNode::getSupportedDescriptors() {
|
||||
auto* padLayer = dynamic_cast<PadLayer*>(getCnnLayer().get());
|
||||
if (padLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot convert Pad layer.";
|
||||
|
||||
padsBegin = padLayer->GetParamAsUInts("pads_begin");
|
||||
padsEnd = padLayer->GetParamAsUInts("pads_end");
|
||||
|
||||
SizeVector srcDims = padLayer->insData[0].lock()->getTensorDesc().getDims();
|
||||
SizeVector dstDims = padLayer->outData[0]->getTensorDesc().getDims();
|
||||
if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size())
|
||||
THROW_IE_EXCEPTION << padLayer->name << " Incorrect number of input/output dimensions!";
|
||||
|
||||
std::string pad_mode = padLayer->GetParamAsString("pad_mode");
|
||||
if (pad_mode == "constant") {
|
||||
padMode = CONSTANT;
|
||||
padValue = padLayer->GetParamAsFloat("pad_value", 0.f);
|
||||
} else if (pad_mode == "edge") {
|
||||
padMode = EDGE;
|
||||
} else if (pad_mode == "reflect") {
|
||||
padMode = REFLECT;
|
||||
for (size_t i = 0; i < srcDims.size(); i++) {
|
||||
if ((srcDims[i] - 1) < padsBegin[i] || (srcDims[i] - 1) < padsEnd[i])
|
||||
THROW_IE_EXCEPTION << padLayer->name << " Incorrect padsBegin or padsEnd for 'reflect' pad mode";
|
||||
}
|
||||
} else if (pad_mode == "symmetric") {
|
||||
padMode = SYMMETRIC;
|
||||
for (size_t i = 0; i < srcDims.size(); i++) {
|
||||
if (srcDims[i] < padsBegin[i] || srcDims[i] < padsEnd[i])
|
||||
THROW_IE_EXCEPTION << padLayer->name << " Incorrect padsBegin or padsEnd for 'symmetric' pad mode";
|
||||
}
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << padLayer->name
|
||||
<< " Incorrect pad_mode. Only constants|edge|reflect|symmetric modes are supported!";
|
||||
}
|
||||
|
||||
if (getParentEdges().size() != 1)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
|
||||
if (getChildEdges().empty())
|
||||
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
|
||||
}
|
||||
|
||||
void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
|
||||
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
|
||||
auto srcDims = getParentEdgeAt(0)->getDims();
|
||||
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = false;
|
||||
config.inConfs.resize(1);
|
||||
config.outConfs.resize(1);
|
||||
config.inConfs[0].inPlace = -1;
|
||||
config.inConfs[0].constant = false;
|
||||
config.outConfs[0].inPlace = -1;
|
||||
config.outConfs[0].constant = false;
|
||||
|
||||
auto memoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
|
||||
config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, memoryFormat);
|
||||
config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, memoryFormat);
|
||||
supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memoryFormat});
|
||||
}
|
||||
|
||||
void MKLDNNPadNode::createPrimitive() {
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
|
||||
SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims();
|
||||
SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims();
|
||||
|
||||
params.srcStrides = getParentEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getStrides();
|
||||
params.dstStrides = getChildEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getStrides();
|
||||
|
||||
params.srcODims.resize(srcDims.size());
|
||||
params.padDims.resize(padsBegin.size());
|
||||
for (size_t i = 0; i < srcDims.size(); i++) {
|
||||
params.srcODims[i] = srcDims[i] + padsBegin[i];
|
||||
params.padDims[i] = padsBegin[i] + padsEnd[i];
|
||||
params.padPointsNum += padsBegin[i] + padsEnd[i];
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNPadNode::execute(mkldnn::stream strm) {
|
||||
const float *srcData = getParentEdgeAt(0)->getBlob()->cbuffer().as<const float*>() +
|
||||
getParentEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
float* dstData = getChildEdgeAt(0)->getBlob()->buffer().as<float*>() +
|
||||
getChildEdgeAt(0)->getBlob()->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
|
||||
auto srcDims = getParentEdgeAt(0)->getDims().ToSizeVector();
|
||||
auto dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
|
||||
|
||||
switch (padMode) {
|
||||
case CONSTANT:
|
||||
padConstantOrEdge(srcData, dstData, srcDims, dstDims);
|
||||
break;
|
||||
case EDGE:
|
||||
padConstantOrEdge(srcData, dstData, srcDims, dstDims, true);
|
||||
break;
|
||||
case REFLECT:
|
||||
padReflectOrSymmetric(srcData, dstData, srcDims, dstDims);
|
||||
break;
|
||||
case SYMMETRIC:
|
||||
padReflectOrSymmetric(srcData, dstData, srcDims, dstDims, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t parallel_init(size_t start, size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
|
||||
for (int j = size - 1; j >= 0; j--) {
|
||||
counters[j] = start % dims[j];
|
||||
start = start / dims[j];
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
inline void parallel_step(size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
|
||||
for (int j = size - 1; j >= 0; j--) {
|
||||
counters[j] = (counters[j] + 1) % dims[j];
|
||||
if (counters[j] != 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNPadNode::padConstantOrEdge(const float* srcData, float* dstData, SizeVector srcDims, SizeVector dstDims,
|
||||
const bool isEdge) {
|
||||
size_t dimsSize_1 = dstDims.size() - 1;
|
||||
size_t inputSV = dstDims[dimsSize_1];
|
||||
size_t workAmountDst = getWorkAmountDst();
|
||||
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
SizeVector counters(dimsSize_1, 0);
|
||||
splitter(workAmountDst, nthr, ithr, start, end);
|
||||
|
||||
parallel_init(start, dimsSize_1, counters, dstDims);
|
||||
int dstIdx = 0;
|
||||
for (size_t i = 0; i < dimsSize_1; ++i)
|
||||
dstIdx += counters[i] * params.dstStrides[i];
|
||||
|
||||
for (size_t iwork = start; iwork < end; ++iwork, dstIdx += inputSV) {
|
||||
if (!isEdge) {
|
||||
size_t j;
|
||||
for (j = 0; j < dimsSize_1; ++j) {
|
||||
if ((counters[j] < padsBegin[j]) || (counters[j] >= params.srcODims[j]))
|
||||
break;
|
||||
}
|
||||
|
||||
if (j != dimsSize_1) {
|
||||
std::fill_n(&dstData[dstIdx], dstDims[dimsSize_1], padValue);
|
||||
parallel_step(dimsSize_1, counters, dstDims);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int srcIdx = 0;
|
||||
for (size_t i = 0; i < dimsSize_1; ++i) {
|
||||
int idx = (counters[i] < padsBegin[i]) ? 0 :
|
||||
((counters[i] >= params.srcODims[i]) ? (srcDims[i] - 1) : (counters[i] - padsBegin[i]));
|
||||
srcIdx += idx * params.srcStrides[i];
|
||||
}
|
||||
|
||||
std::fill_n(&dstData[dstIdx], padsBegin[dimsSize_1],
|
||||
isEdge ? srcData[srcIdx] : padValue);
|
||||
cpu_memcpy(&dstData[dstIdx + padsBegin[dimsSize_1]], &srcData[srcIdx],
|
||||
srcDims[dimsSize_1] * sizeof(float));
|
||||
std::fill_n(&dstData[dstIdx + params.srcODims[dimsSize_1]], padsEnd[dimsSize_1],
|
||||
isEdge ? srcData[srcIdx + srcDims[dimsSize_1] - 1] : padValue);
|
||||
|
||||
parallel_step(dimsSize_1, counters, dstDims);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNPadNode::padReflectOrSymmetric(const float *srcData, float* dstData, SizeVector srcDims, SizeVector dstDims,
|
||||
const bool isSymmetric) {
|
||||
int shift = isSymmetric ? 1 : 0;
|
||||
SizeVector src_2;
|
||||
for (size_t i = 0; i < srcDims.size(); i++)
|
||||
src_2.push_back(srcDims[i] + params.srcODims[i] - 2 + shift);
|
||||
|
||||
size_t dimsSize_1 = dstDims.size() - 1;
|
||||
size_t inputSV = dstDims[dimsSize_1];
|
||||
size_t workAmountDst = getWorkAmountDst();
|
||||
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
SizeVector counters(dimsSize_1, 0);
|
||||
splitter(workAmountDst, nthr, ithr, start, end);
|
||||
|
||||
parallel_init(start, dimsSize_1, counters, dstDims);
|
||||
int dstIdx = 0;
|
||||
for (size_t i = 0; i < dimsSize_1; ++i)
|
||||
dstIdx += counters[i] * params.dstStrides[i];
|
||||
|
||||
for (size_t iwork = start; iwork < end; ++iwork, dstIdx += inputSV) {
|
||||
int srcIdx = 0;
|
||||
for (size_t i = 0; i < dimsSize_1; ++i) {
|
||||
int idx = (counters[i] < padsBegin[i]) ? (padsBegin[i] - counters[i] - shift) :
|
||||
((counters[i] >= params.srcODims[i]) ? (src_2[i] - counters[i]) : (counters[i] - padsBegin[i]));
|
||||
srcIdx += idx * params.srcStrides[i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < padsBegin[dimsSize_1]; ++i)
|
||||
dstData[dstIdx + i] = srcData[srcIdx + padsBegin[dimsSize_1] - i - shift];
|
||||
|
||||
cpu_memcpy(&dstData[dstIdx + padsBegin[dimsSize_1]], &srcData[srcIdx],
|
||||
sizeof(float) * srcDims[dimsSize_1]);
|
||||
|
||||
for (size_t i = params.srcODims[dimsSize_1]; i < dstDims[dimsSize_1]; ++i)
|
||||
dstData[dstIdx + i] = srcData[srcIdx + src_2[dimsSize_1] - i];
|
||||
|
||||
parallel_step(dimsSize_1, counters, dstDims);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
size_t MKLDNNPadNode::getWorkAmountDst() const {
|
||||
auto dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
|
||||
return params.dstStrides[0] * dstDims[0] / dstDims[dstDims.size() - 1];
|
||||
}
|
||||
|
||||
bool MKLDNNPadNode::created() const {
|
||||
return getType() == Pad;
|
||||
}
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNPadNode, Pad);
|
55
inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
Normal file
55
inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
Normal file
@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <mkldnn_node.h>
|
||||
#include <string>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNPadNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
~MKLDNNPadNode() override = default;
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
private:
|
||||
enum PadMode {
|
||||
CONSTANT = 0,
|
||||
EDGE = 1,
|
||||
REFLECT = 2,
|
||||
SYMMETRIC = 3
|
||||
};
|
||||
|
||||
void padConstantOrEdge(const float *srcData, float* dstData,
|
||||
InferenceEngine::SizeVector srcDims, InferenceEngine::SizeVector dstDims,
|
||||
const bool isEdge = false);
|
||||
void padReflectOrSymmetric(const float *srcData, float* dstData,
|
||||
InferenceEngine::SizeVector srcDims, InferenceEngine::SizeVector dstDims,
|
||||
const bool isSymmetric = false);
|
||||
|
||||
size_t getWorkAmountDst() const;
|
||||
|
||||
PadMode padMode = CONSTANT;
|
||||
float padValue = 0.f;
|
||||
std::vector<unsigned int> padsBegin;
|
||||
std::vector<unsigned int> padsEnd;
|
||||
|
||||
struct {
|
||||
InferenceEngine::SizeVector srcODims;
|
||||
InferenceEngine::SizeVector srcStrides;
|
||||
InferenceEngine::SizeVector dstStrides;
|
||||
size_t padPointsNum = 0;
|
||||
InferenceEngine::SizeVector padDims;
|
||||
} params;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -1,257 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include "ie_parallel.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
namespace Cpu {
|
||||
|
||||
class PadImpl: public ExtLayerBase {
|
||||
public:
|
||||
explicit PadImpl(const CNNLayer* layer) {
|
||||
try {
|
||||
if (layer->insData.empty() || layer->outData.empty())
|
||||
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
|
||||
|
||||
pads_begin = layer->GetParamAsUInts("pads_begin");
|
||||
std::vector<unsigned int> pads_end = layer->GetParamAsUInts("pads_end");
|
||||
|
||||
src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
|
||||
dst_dims = layer->outData[0]->getTensorDesc().getDims();
|
||||
if (src_dims.size() != dst_dims.size() || pads_begin.size() != src_dims.size())
|
||||
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
|
||||
|
||||
std::string pad_mode = layer->GetParamAsString("pad_mode");
|
||||
if (pad_mode == "constant") {
|
||||
padMode = CONSTANT;
|
||||
} else if (pad_mode == "edge") {
|
||||
padMode = EDGE;
|
||||
} else if (pad_mode == "reflect") {
|
||||
padMode = REFLECT;
|
||||
for (size_t i = 0; i < src_dims.size(); i++) {
|
||||
if ((src_dims[i] - 1) < pads_begin[i] || (src_dims[i] - 1) < pads_end[i])
|
||||
THROW_IE_EXCEPTION << layer->name << " Incorrect pads_begin or pads_end for 'reflect' pad mode";
|
||||
}
|
||||
} else if (pad_mode == "symmetric") {
|
||||
padMode = SYMMETRIC;
|
||||
for (size_t i = 0; i < src_dims.size(); i++) {
|
||||
if (src_dims[i] < pads_begin[i] || src_dims[i] < pads_end[i])
|
||||
THROW_IE_EXCEPTION << layer->name << " Incorrect pads_begin or pads_end for 'symmetric' pad mode";
|
||||
}
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << layer->name
|
||||
<< " Incorrect pad_mode. Only constants|edge|reflect|symmetric modes are supported!";
|
||||
}
|
||||
|
||||
if (padMode == CONSTANT)
|
||||
pad_value = layer->GetParamAsFloat("pad_value", 0.f);
|
||||
|
||||
srcStrides = layer->insData[0].lock()->getTensorDesc().getBlockingDesc().getStrides();
|
||||
dstStrides = layer->outData[0]->getTensorDesc().getBlockingDesc().getStrides();
|
||||
work_amount = dst_dims[0] * dstStrides[0];
|
||||
for (size_t i = 0; i < src_dims.size(); i++)
|
||||
src_o_dms.push_back(src_dims[i] + pads_begin[i]);
|
||||
|
||||
addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
|
||||
} catch (InferenceEngine::details::InferenceEngineException &ex) {
|
||||
errorMsg = ex.what();
|
||||
}
|
||||
}
|
||||
|
||||
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
|
||||
const float *src_data = inputs[0]->cbuffer().as<const float *>() +
|
||||
inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
float* dst_data = outputs[0]->cbuffer().as<float *>() +
|
||||
outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
|
||||
switch (padMode) {
|
||||
case CONSTANT:
|
||||
pad_constant(src_data, dst_data);
|
||||
break;
|
||||
case EDGE:
|
||||
pad_edge(src_data, dst_data);
|
||||
break;
|
||||
case REFLECT:
|
||||
pad_reflect(src_data, dst_data);
|
||||
break;
|
||||
case SYMMETRIC:
|
||||
pad_symmetric(src_data, dst_data);
|
||||
break;
|
||||
default:
|
||||
return GENERAL_ERROR;
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
|
||||
private:
|
||||
enum PadMode {
|
||||
CONSTANT = 0,
|
||||
EDGE = 1,
|
||||
REFLECT = 2,
|
||||
SYMMETRIC = 3
|
||||
};
|
||||
|
||||
void pad_constant(const float *src_data, float* dst_data);
|
||||
void pad_edge(const float *src_data, float* dst_data);
|
||||
void pad_reflect(const float *src_data, float* dst_data);
|
||||
void pad_symmetric(const float *src_data, float* dst_data);
|
||||
|
||||
PadMode padMode = CONSTANT;
|
||||
float pad_value = 0.f;
|
||||
SizeVector src_dims;
|
||||
SizeVector dst_dims;
|
||||
std::vector<unsigned int> pads_begin;
|
||||
SizeVector src_o_dms;
|
||||
SizeVector srcStrides;
|
||||
SizeVector dstStrides;
|
||||
size_t work_amount;
|
||||
};
|
||||
|
||||
|
||||
inline size_t parallel_init(size_t start, size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
|
||||
for (int j = size - 1; j >= 0; j--) {
|
||||
counters[j] = start % dims[j];
|
||||
start = start / dims[j];
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
inline void parallel_step(size_t size, std::vector<size_t> &counters, std::vector<size_t> &dims) {
|
||||
for (int j = size - 1; j >= 0; j--) {
|
||||
counters[j] = (counters[j] + 1) % dims[j];
|
||||
if (counters[j] != 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void PadImpl::pad_constant(const float *src_data, float* dst_data) {
|
||||
int offset = 0;
|
||||
for (size_t i = 0; i < srcStrides.size(); ++i)
|
||||
offset += pads_begin[i] * srcStrides[i];
|
||||
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
SizeVector counters(dst_dims.size(), 0);
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
parallel_init(start, dst_dims.size(), counters, dst_dims);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
int srcIdx = 1;
|
||||
int dstIdx = 0;
|
||||
for (size_t i = 0; i < dstStrides.size(); ++i)
|
||||
dstIdx += counters[i] * dstStrides[i];
|
||||
|
||||
for (size_t i = 0; i < counters.size(); ++i) {
|
||||
if (counters[i] < pads_begin[i] || counters[i] >= src_o_dms[i]) {
|
||||
dst_data[dstIdx] = pad_value;
|
||||
srcIdx = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (srcIdx) {
|
||||
int srcIdx = 0;
|
||||
for (size_t i = 0; i < srcStrides.size(); ++i)
|
||||
srcIdx += counters[i] * srcStrides[i];
|
||||
dst_data[dstIdx] = src_data[srcIdx - offset];
|
||||
}
|
||||
parallel_step(dst_dims.size(), counters, dst_dims);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void PadImpl::pad_edge(const float *src_data, float* dst_data) {
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
SizeVector counters(dst_dims.size(), 0);
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
parallel_init(start, dst_dims.size(), counters, dst_dims);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
int srcIdx = 0;
|
||||
int dstIdx = 0;
|
||||
for (size_t i = 0; i < dstStrides.size(); ++i)
|
||||
dstIdx += counters[i] * dstStrides[i];
|
||||
|
||||
for (size_t i = 0; i < srcStrides.size(); ++i) {
|
||||
int idx = (counters[i] < pads_begin[i]) ? 0 :
|
||||
((counters[i] >= src_o_dms[i]) ? (src_dims[i] - 1) : (counters[i] - pads_begin[i]));
|
||||
srcIdx += idx * srcStrides[i];
|
||||
}
|
||||
|
||||
dst_data[dstIdx] = src_data[srcIdx];
|
||||
parallel_step(dst_dims.size(), counters, dst_dims);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void PadImpl::pad_reflect(const float *src_data, float* dst_data) {
|
||||
SizeVector src_2;
|
||||
for (size_t i = 0; i < src_dims.size(); i++)
|
||||
src_2.push_back(src_dims[i] + src_o_dms[i] - 2);
|
||||
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
SizeVector counters(dst_dims.size(), 0);
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
parallel_init(start, dst_dims.size(), counters, dst_dims);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
int srcIdx = 0;
|
||||
int dstIdx = 0;
|
||||
for (size_t i = 0; i < dstStrides.size(); ++i)
|
||||
dstIdx += counters[i] * dstStrides[i];
|
||||
|
||||
for (size_t i = 0; i < srcStrides.size(); ++i) {
|
||||
int idx = (counters[i] < pads_begin[i]) ? (pads_begin[i] - counters[i]) :
|
||||
((counters[i] >= src_o_dms[i]) ? (src_2[i] - counters[i]) : (counters[i] - pads_begin[i]));
|
||||
srcIdx += idx * srcStrides[i];
|
||||
}
|
||||
|
||||
dst_data[dstIdx] = src_data[srcIdx];
|
||||
parallel_step(dst_dims.size(), counters, dst_dims);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void PadImpl::pad_symmetric(const float *src_data, float* dst_data) {
|
||||
SizeVector src_2;
|
||||
for (size_t i = 0; i < src_dims.size(); i++)
|
||||
src_2.push_back(src_dims[i] + src_o_dms[i] - 1);
|
||||
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
SizeVector counters(dst_dims.size(), 0);
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
parallel_init(start, dst_dims.size(), counters, dst_dims);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
int srcIdx = 0;
|
||||
int dstIdx = 0;
|
||||
for (size_t i = 0; i < dstStrides.size(); ++i)
|
||||
dstIdx += counters[i] * dstStrides[i];
|
||||
|
||||
for (size_t i = 0; i < srcStrides.size(); ++i) {
|
||||
int idx = (counters[i] < pads_begin[i]) ? (pads_begin[i] - 1 - counters[i]) :
|
||||
((counters[i] >= src_o_dms[i]) ? (src_2[i] - counters[i]) : (counters[i] - pads_begin[i]));
|
||||
srcIdx += idx * srcStrides[i];
|
||||
}
|
||||
|
||||
dst_data[dstIdx] = src_data[srcIdx];
|
||||
parallel_step(dst_dims.size(), counters, dst_dims);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
REG_FACTORY_FOR(PadImpl, Pad);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
@ -21,6 +21,7 @@ const std::vector<float> argPadValue = {0.f, 1.f, 2.f, -1.f};
|
||||
const std::vector<ngraph::helpers::PadMode> padMode = {
|
||||
ngraph::helpers::PadMode::EDGE,
|
||||
ngraph::helpers::PadMode::REFLECT,
|
||||
ngraph::helpers::PadMode::SYMMETRIC
|
||||
};
|
||||
|
||||
const auto pad2DConstparams = testing::Combine(
|
||||
|
@ -191,8 +191,8 @@ namespace ngraph
|
||||
}
|
||||
else
|
||||
{
|
||||
c[i] = static_cast<size_t>(padding_below[i] + src_dim +
|
||||
padding_above[i] - pos);
|
||||
c[i] = static_cast<size_t>(2 * (padding_below[i] + src_dim) -
|
||||
c[i] - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user