[CPU] PSROIPooling node enhancements (#3851)

- bf support for PSROIPooling
- nhwc, blocking formats support
- code refactor & performance improvements
- cpu specific tests
This commit is contained in:
Yury Gaydaychuk 2021-01-28 11:55:54 +03:00 committed by GitHub
parent 46f0775c09
commit a1422a49d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 738 additions and 340 deletions

View File

@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
class BF16Transformer {
const InferenceEngine::details::caseless_set<std::string> _initbf16 =
{ "convolution", "fullyconnected", "innerproduct", "gemm", "RegionYolo", "Interpolate" };
{ "convolution", "fullyconnected", "innerproduct", "gemm", "RegionYolo", "Interpolate", "PSROIPooling" };
const InferenceEngine::details::caseless_set<std::string> _complementbf16 =
{ "relu", "tanh", "elu", "square", "abs", "sqrt", "linear", "bounded_relu", "soft_relu", "normalize",
"sigmoid", "ReLU6", "not", "activation", "HSwish", "mish", "logistic", "mod", "resample",

View File

@ -6,8 +6,12 @@
#include <cmath>
#include <vector>
#include <string>
#include <algorithm>
#include <mkldnn_types.h>
#include "ie_parallel.hpp"
#include "utils/bfloat16.hpp"
#include <mkldnn_selective_build.h>
using namespace MKLDNNPlugin;
namespace InferenceEngine {
namespace Extensions {
@ -17,18 +21,18 @@ class PSROIPoolingImpl: public ExtLayerBase {
public:
explicit PSROIPoolingImpl(const CNNLayer* layer) {
try {
mode_ = layer->GetParamAsString("mode", "average");
if (mode_ != "bilinear_deformable")
mode = layer->GetParamAsString("mode", "average");
if (mode != "bilinear_deformable")
if (layer->insData.size() != 2 || layer->outData.size() != 1)
THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
// LayerSetUp
output_dim_ = static_cast<size_t>(layer->GetParamAsInt("output_dim"));
group_size_ = static_cast<size_t>(layer->GetParamAsInt("group_size"));
spatial_scale_ = layer->GetParamAsFloat("spatial_scale");
pooled_height_ = static_cast<size_t>(layer->GetParamAsInt("pooled_height", static_cast<int>(group_size_)));
pooled_width_ = static_cast<size_t>(layer->GetParamAsInt("pooled_width", static_cast<int>(group_size_)));
spatial_bins_x_ = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_x", 1));
spatial_bins_y_ = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_y", 1));
outputDim = static_cast<size_t>(layer->GetParamAsInt("output_dim"));
groupSize = static_cast<size_t>(layer->GetParamAsInt("group_size"));
spatialScale = layer->GetParamAsFloat("spatial_scale");
pooledHeight = static_cast<size_t>(layer->GetParamAsInt("pooled_height", static_cast<int>(groupSize)));
pooledWidth = static_cast<size_t>(layer->GetParamAsInt("pooled_width", static_cast<int>(groupSize)));
spatialBinsX = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_x", 1));
spatialBinsY = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_y", 1));
SizeVector inDims = layer->insData[0].lock()->getTensorDesc().getDims();
channels = static_cast<int>(inDims[1]);
@ -42,250 +46,460 @@ public:
nw = static_cast<int>(outDims[3]);
// for Deformable PSROIPolling
no_trans_ = layer->GetParamAsBool("no_trans", true);
part_size_ = layer->GetParamAsInt("part_size", 1);
trans_std_ = layer->GetParamAsFloat("trans_std", 1);
noTrans = layer->GetParamAsBool("no_trans", true);
partSize = layer->GetParamAsInt("part_size", 1);
transStd = layer->GetParamAsFloat("trans_std", 1);
if (no_trans_) {
addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
auto supportedPrecision = (layer->insData[0].lock()->getTensorDesc().getPrecision() == Precision::BF16 ? Precision::BF16 : Precision::FP32);
std::vector<std::pair<Layout, Layout> > plainConfs{
{NCHW, NCHW},
{NHWC, NHWC}
};
std::vector<std::pair<ConfLayout, ConfLayout> > blockConfs {
{ConfLayout::BLK16, ConfLayout::BLK16},
{ConfLayout::BLK8, ConfLayout::BLK8}
};
if (mode != "bilinear_deformable") {
for (auto conf : plainConfs) {
LayerConfig config;
DataConfig inConfig0, inConfig1, inConfig2;
SizeVector propDims = layer->insData[1].lock()->getTensorDesc().getDims();
inConfig0.desc = TensorDesc(supportedPrecision, inDims, conf.first);
inConfig1.desc = TensorDesc(Precision::FP32, propDims, NC);
config.inConfs.push_back(inConfig0);
config.inConfs.push_back(inConfig1);
DataConfig outConfig;
outConfig.desc = TensorDesc(supportedPrecision, outDims, conf.second);
config.outConfs.push_back(outConfig);
confs.push_back(config);
}
for (auto conf : blockConfs) {
addConfig(layer, {DataConfigurator(conf.first, supportedPrecision),
DataConfigurator(ConfLayout::PLN, Precision::FP32)},
{DataConfigurator(conf.second, supportedPrecision)});
}
} else if (noTrans) {
addConfig(layer, {DataConfigurator(ConfLayout::PLN, supportedPrecision), DataConfigurator(ConfLayout::PLN, Precision::FP32)},
{DataConfigurator(ConfLayout::PLN, supportedPrecision)});
} else {
addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
addConfig(layer, {DataConfigurator(ConfLayout::PLN, supportedPrecision),
DataConfigurator(ConfLayout::PLN, Precision::FP32),
DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN, supportedPrecision)});
}
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
}
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
ResponseDesc *resp) noexcept override {
float* dst_data = outputs[0]->buffer();
const float *bottom_data_beginning = inputs[0]->buffer();
const float *bottom_rois_beginning = inputs[1]->buffer();
struct PSROIPoolingContext {
PSROIPoolingImpl &node;
std::vector<Blob::Ptr>& inputs;
std::vector<Blob::Ptr>& outputs;
};
int real_rois = 0;
for (; real_rois < nn; real_rois++) {
const float *bottom_rois = bottom_rois_beginning + real_rois * 5;
int roi_batch_ind = static_cast<int>(bottom_rois[0]);
if (roi_batch_ind == -1) {
template<typename T>
struct PSROIPoolingExecute {
using srcT = typename std::tuple_element<0, T>::type;
using dstT = typename std::tuple_element<1, T>::type;
void operator()(PSROIPoolingContext & ctx) {
ctx.node.executeSpecified<srcT, dstT>(ctx.inputs, ctx.outputs);
}
};
static void unpackParams(const TensorDesc& srcDesc, const TensorDesc& dstDesc,
int& hInputStride, int& wInputStride,
int& hOutputStride, int& wOutputStride,
Layout& inFmt, Layout& outFmt,
int& inBlockSize, int& outBlockSize,
int& outBlockCount,
unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding) {
inFmt = srcDesc.getLayout();
outFmt = dstDesc.getLayout();
int expectedInBlockDimsSize = (inFmt == Layout::BLOCKED ? 5 : 4);
int expectedOutBlockDimsSize = (outFmt == Layout::BLOCKED ? 5 : 4);
auto inBlkDims = srcDesc.getBlockingDesc().getBlockDims();
auto outBlkDims = dstDesc.getBlockingDesc().getBlockDims();
if (inBlkDims.size() != expectedInBlockDimsSize)
THROW_IE_EXCEPTION << "Unexpected size of blocking dims in input (given " << inBlkDims.size() << ", expected " << expectedInBlockDimsSize << ")";
if (outBlkDims.size() != expectedOutBlockDimsSize)
THROW_IE_EXCEPTION << "Unexpected size of blocking dims in output (given " << outBlkDims.size() << ", expected " << expectedOutBlockDimsSize << ")";
inBlockSize = (inFmt == Layout::BLOCKED ? srcDesc.getBlockingDesc().getBlockDims()[4] : 1);
outBlockSize = (outFmt == Layout::BLOCKED ? dstDesc.getBlockingDesc().getBlockDims()[4] : 1);
inputChannelsPadding = srcDesc.getBlockingDesc().getBlockDims()[1] * inBlockSize;
outputChannelsPadding = dstDesc.getBlockingDesc().getBlockDims()[1] * outBlockSize;
outBlockCount = outputChannelsPadding / outBlockSize;
int hOutStrIndex = 0, wOutStrIndex = 0, hInStrIndex = 0, wInStrIndex = 0;
const auto& outOrder = dstDesc.getBlockingDesc().getOrder();
const auto& inOrder = srcDesc.getBlockingDesc().getOrder();
for (int i = 0; i < outOrder.size(); i++) {
if (outOrder[i] == 2) hOutStrIndex = i;
if (outOrder[i] == 3) wOutStrIndex = i;
}
for (int i = 0; i < inOrder.size(); i++) {
if (inOrder[i] == 2) hInStrIndex = i;
if (inOrder[i] == 3) wInStrIndex = i;
}
hInputStride = srcDesc.getBlockingDesc().getStrides()[hInStrIndex];
wInputStride = srcDesc.getBlockingDesc().getStrides()[wInStrIndex];
hOutputStride = dstDesc.getBlockingDesc().getStrides()[hOutStrIndex];
wOutputStride = dstDesc.getBlockingDesc().getStrides()[wOutStrIndex];
}
template <typename inputType, typename outputType>
void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois,
const int n, const int roiBatchInd,
const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
Layout inFmt, outFmt;
int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
unsigned long inputChannelsPadding, outputChannelsPadding;
unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale;
const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale;
const float roiEndW = static_cast<float>(round(bottomRois[3] + 1.0f)) * spatialScale;
const float roiEndH = static_cast<float>(round(bottomRois[4] + 1.0f)) * spatialScale;
// Force too small ROIs to be 1x1
const float roiWidth = std::max<float>(roiEndW - roiStartW, 0.1f); // avoid 0
const float roiHeight = std::max<float>(roiEndH - roiStartH, 0.1f);
auto avgPsroi = [&] (int c, int h, int w, int binOffIn, int binOffOut, int inBlkRes, int outBlkRes) {
float binSizeH = roiHeight / static_cast<float>(pooledHeight);
float binSizeW = roiWidth / static_cast<float>(pooledWidth);
int hStart = static_cast<int>(floor(static_cast<float>(h + 0) * binSizeH + roiStartH));
int hEnd = static_cast<int>(ceil(static_cast<float>(h + 1) * binSizeH + roiStartH));
hStart = std::min<int>(std::max<int>(hStart, 0), height);
hEnd = std::min<int>(std::max<int>(hEnd, 0), height);
int wStart = static_cast<int>(floor(static_cast<float>(w + 0) * binSizeW + roiStartW));
int wEnd = static_cast<int>(ceil(static_cast<float>(w + 1) * binSizeW + roiStartW));
wStart = std::min<int>(std::max<int>(wStart, 0), width);
wEnd = std::min<int>(std::max<int>(wEnd, 0), width);
const float binArea = static_cast<float>((hEnd - hStart) * (wEnd - wStart));
size_t dstIndex = binOffOut + h * hOutputStride + w * wOutputStride + outBlkRes;
dstData[dstIndex] = 0;
if (binArea) {
float outSum = 0.0f;
const int heightIndexBound = hEnd * hInputStride;
const int widthIndexBound = wEnd * wInputStride;
for (int hh = hStart * hInputStride; hh < heightIndexBound; hh += hInputStride) {
for (int ww = wStart * wInputStride; ww < widthIndexBound; ww += wInputStride) {
outSum += srcData[binOffIn + hh + ww + inBlkRes];
}
}
dstData[dstIndex] = outSum / binArea;
}
};
if (inFmt == Layout::NHWC) {
parallel_for2d(nh, nw, [&](int h, int w) {
const int binOffsetOutput = n * nc * nh * nw;
const int binOffsetInput = roiBatchInd * channels * height * width;
for (int c = 0; c < nc; c++) {
const int gc = (c * groupSize + h) * groupSize + w;
avgPsroi(c, h, w, 0, 0, binOffsetInput + gc, binOffsetOutput + c);
}
});
} else if (inFmt == Layout::NCHW) {
parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
const int gc = (c * groupSize + h) * groupSize + w;
const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
const int binOffsetInput = (roiBatchInd * inputChannelsPadding + gc) * height * width;
const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
avgPsroi(c, h, w, 0, outputBlockResidual, binOffsetInput, binOffsetOutput);
});
} else { // nChw16c, nChw8c
parallel_for3d(outBlockCount, nh, nw, [&](int blkIdx, int h, int w) {
int cStart = blkIdx * outBlockSize;
int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
for (int c = cStart; c < cEnd; c++) {
const int gc = (c * groupSize + h) * groupSize + w;
const int inputBlockResidual = (inFmt == Layout::NCHW ? 0 : gc % inBlockSize);
const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
const int binOffsetInput = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
avgPsroi(c, h, w, inputBlockResidual, outputBlockResidual, binOffsetInput, binOffsetOutput);
}
});
}
}
template <typename inputType, typename outputType>
void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois,
const int currentRoi, const int roiBatchInd,
const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
Layout inFmt, outFmt;
int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
unsigned long inputChannelsPadding, outputChannelsPadding;
unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
const float roiStartW = bottomRois[1] * spatialScale;
const float roiStartH = bottomRois[2] * spatialScale;
const float roiEndW = bottomRois[3] * spatialScale;
const float roiEndH = bottomRois[4] * spatialScale;
const float roiWidth = roiEndW - roiStartW;
const float roiHeight = roiEndH - roiStartH;
size_t numBins = spatialBinsX * spatialBinsY;
const int binCount = nh * nw;
auto bilinearPsroi = [&] (int c, int h, int w, int binOffOut, int outBlkRes) {
float accum = 0.0f;
int binOffIn, inBlkRes;
size_t dstIndex = binOffOut + h * hOutputStride + w * wOutputStride + outBlkRes;
dstData[dstIndex] = 0;
for (size_t binY = 0; binY < spatialBinsY; binY++) {
const float boxYmin = roiStartH + (binY + 0) * (roiHeight / spatialBinsY);
const float boxYmax = roiStartH + (binY + 1) * (roiHeight / spatialBinsY);
const float heightScale = nh > 1 ? (boxYmax - boxYmin) * (height - 1) / (pooledHeight - 1) : 0.0f;
const float inY = nh > 1 ? (h * heightScale + boxYmin * (height - 1)) : 0.5f * (boxYmin + boxYmax) * (height - 1);
for (size_t binX = 0; binX < spatialBinsX; binX++) {
size_t gc = c + (binY * spatialBinsX + binX) * nc;
if (inFmt == Layout::NHWC) {
binOffIn = roiBatchInd * channels * height * width + gc;
inBlkRes = 0;
} else { // nchw, nChw16c, nChw8c
const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
binOffIn = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
inBlkRes = (inFmt == Layout::BLOCKED ? gc % inBlockSize : 0);
}
const auto *bottomData = srcData + binOffIn;
const float boxXmin = roiStartW + (binX + 0) * (roiWidth / spatialBinsX);
const float boxXmax = roiStartW + (binX + 1) * (roiWidth / spatialBinsX);
const float widthScale = nw > 1 ? (boxXmax - boxXmin) * (width - 1) / (pooledWidth - 1) : 0.0f;
const float inX = nw > 1 ? (w * widthScale + boxXmin * (width - 1)) : 0.5f * (boxXmin + boxXmax) * (width - 1);
if (!(inY < 0 || inY > height - 1 || inX < 0 || inX > width - 1)) {
const int topYIndex = static_cast<int>(floorf(inY));
int bottomYIndex = static_cast<int>(ceilf(inY));
const int leftXIndex = static_cast<int>(floorf(inX));
int rightXIndex = static_cast<int>(ceilf(inX));
if (rightXIndex > width - 1) rightXIndex = width - 1;
if (bottomYIndex > height - 1) bottomYIndex = height - 1;
auto topLeftIndex = topYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes;
auto topRightIndex = topYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes;
auto bottomLeftIndex = bottomYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes;
auto bottomRightIndex = bottomYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes;
const float topLeft = bottomData[topLeftIndex];
const float topRight = bottomData[topRightIndex];
const float bottomLeft = bottomData[bottomLeftIndex];
const float bottomRight = bottomData[bottomRightIndex];
const float top = topLeft + (topRight - topLeft) * (inX - leftXIndex);
const float bottom = bottomLeft + (bottomRight - bottomLeft) * (inX - leftXIndex);
accum += top + (bottom - top) * (inY - topYIndex);
}
}
}
accum /= numBins;
dstData[dstIndex] = accum;
};
if (inFmt == Layout::NHWC) {
const int binOffsetOutput = currentRoi * nc * nh * nw;
parallel_for2d(nh, nw, [&](int h, int w) {
for (int c = 0; c < nc; c++) {
bilinearPsroi(c, h, w, 0, binOffsetOutput + c);
}
});
} else if (inFmt == Layout::NCHW) {
parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
bilinearPsroi(c, h, w, 0, (currentRoi * outputChannelsPadding + c) * binCount);
});
} else { // nChw16c, nChw8c
parallel_for3d(outBlockCount, nh, nw, [&](int blkIdx, int h, int w) {
int cStart = blkIdx * outBlockSize;
int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
for (int c = cStart; c < cEnd; c++) {
const int outputBlockIdx = (c / inBlockSize) * inBlockSize;
const int binOffsetOutput = (currentRoi * outputChannelsPadding + outputBlockIdx) * binCount;
const int outputBlockResidual = (inFmt == Layout::BLOCKED ? c % inBlockSize : 0);
bilinearPsroi(c, h, w, outputBlockResidual, binOffsetOutput);
}
});
}
}
template <typename inputType, typename outputType>
void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois,
const float *bottomTrans, const int numClasses, const int channelsEachClass,
const int currentRoi, const int roiBatchInd) {
const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale - 0.5f;
const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale - 0.5f;
const float roiEndW = static_cast<float>(round(bottomRois[3]) + 1.0f) * spatialScale - 0.5f;
const float roiEndH = static_cast<float>(round(bottomRois[4]) + 1.0f) * spatialScale - 0.5f;
// Force too small ROIs to be 1x1
const float roiWidth = std::max<float>(roiEndW - roiStartW, 0.1f); // avoid 0
const float roiHeight = std::max<float>(roiEndH - roiStartH, 0.1f);
parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
size_t dstIndex = ((currentRoi * nc + c) * nh + h) * nw + w;
dstData[dstIndex] = 0;
// Compute w and h at bottom
float binSizeH = roiHeight / static_cast<float>(pooledHeight);
float binSizeW = roiWidth / static_cast<float>(pooledWidth);
float subBinSizeH = binSizeH / static_cast<float>(spatialBinsX);
float subBinSizeW = binSizeW / static_cast<float>(spatialBinsY);
int partH = h * partSize / pooledHeight;
int partW = w * partSize / pooledWidth;
int classId = c / channelsEachClass;
float transX = noTrans ? 0 :
bottomTrans[(((currentRoi * numClasses + classId) * 2) * partSize + partH)
* partSize + partW] * transStd;
float transY = noTrans ? 0 :
bottomTrans[(((currentRoi * numClasses + classId) * 2 + 1) * partSize + partH)
* partSize + partW] * transStd;
float wStart = w * binSizeW + roiStartW + transX * roiWidth;
float hStart = h * binSizeH + roiStartH + transY * roiHeight;
float sum = 0;
int count = 0;
int gw = w * groupSize / pooledWidth;
int gh = h * groupSize / pooledHeight;
gw = (std::min)((std::max)(gw, 0), static_cast<int>(groupSize - 1));
gh = (std::min)((std::max)(gh, 0), static_cast<int>(groupSize - 1));
const inputType* offsetBottomData = srcData + (roiBatchInd * channels) * height * width;
for (size_t ih = 0; ih < spatialBinsY; ih++) {
for (size_t iw = 0; iw < spatialBinsX; iw++) {
float w1 = wStart + iw * subBinSizeW;
float h1 = hStart + ih * subBinSizeH;
// bilinear interpolation
if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5)
continue;
w1 = static_cast<float>((std::min)((std::max)(static_cast<double>(w1), 0.0), width - 1.0));
h1 = static_cast<float>((std::min)((std::max)(static_cast<double>(h1), 0.0), height - 1.0));
int c1 = static_cast<int>((c * groupSize + gh) * groupSize + gw);
float val = bilinearInterp<inputType>(offsetBottomData +
c1 * height * width, w1, h1, width);
sum += val;
count++;
}
}
dstData[dstIndex] = count == 0 ? 0 : sum / count;
});
}
template <typename inputType, typename outputType>
void executeSpecified(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) {
const auto *srcData = inputs[0]->cbuffer().as<const inputType*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
const float *bottomRoisBeginning = inputs[1]->cbuffer().as<const float*>() + inputs[1]->getTensorDesc().getBlockingDesc().getOffsetPadding();
auto *dstData = outputs[0]->buffer().as<outputType*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
auto srcDesc = inputs[0]->getTensorDesc();
auto dstDesc = outputs[0]->getTensorDesc();
int realRois = 0;
for (; realRois < nn; realRois++) {
int roiBatchInd = static_cast<int>(bottomRoisBeginning[realRois * 5]);
if (roiBatchInd == -1) {
break;
}
}
// for Deformable PSROIPooling
float *bottom_trans = nullptr;
int num_classes = 1;
int channels_each_class = output_dim_;
if (!no_trans_) {
bottom_trans = inputs[2]->buffer();
num_classes = static_cast<int>(inputs[2]->getTensorDesc().getDims()[1]) / 2;
channels_each_class /= num_classes;
float *bottomTrans = nullptr;
int numClasses = 1;
int channelsEachClass = outputDim;
if (!noTrans) {
bottomTrans = inputs[2]->cbuffer().as<float*>() + inputs[2]->getTensorDesc().getBlockingDesc().getOffsetPadding();
numClasses = static_cast<int>(inputs[2]->getTensorDesc().getDims()[1]) / 2;
channelsEachClass /= numClasses;
}
size_t num_bins = spatial_bins_x_*spatial_bins_y_;
parallel_for(real_rois, [&](int n) {
const float* bottom_rois = bottom_rois_beginning + n * 5;
int roi_batch_ind = static_cast<int>(bottom_rois[0]);
float roi_start_w = 0.0f;
float roi_start_h = 0.0f;
float roi_end_w = 0.0f;
float roi_end_h = 0.0f;
float roi_width = 0.0f;
float roi_height = 0.0f;
if (mode_ == "bilinear") {
roi_start_w = bottom_rois[1] * spatial_scale_;
roi_start_h = bottom_rois[2] * spatial_scale_;
roi_end_w = bottom_rois[3] * spatial_scale_;
roi_end_h = bottom_rois[4] * spatial_scale_;
roi_width = roi_end_w - roi_start_w;
roi_height = roi_end_h - roi_start_h;
} else if (mode_ == "average") {
roi_start_w = static_cast<float>(round(bottom_rois[1])) * spatial_scale_;
roi_start_h = static_cast<float>(round(bottom_rois[2])) * spatial_scale_;
roi_end_w = static_cast<float>(round(bottom_rois[3]) + 1.0f) * spatial_scale_;
roi_end_h = static_cast<float>(round(bottom_rois[4]) + 1.0f) * spatial_scale_;
// Force too small ROIs to be 1x1
roi_width = std::max<float>(roi_end_w - roi_start_w, 0.1f); // avoid 0
roi_height = std::max<float>(roi_end_h - roi_start_h, 0.1f);
} else if (mode_ == "bilinear_deformable") {
roi_start_w = static_cast<float>(round(bottom_rois[1])) * spatial_scale_ - 0.5f;
roi_start_h = static_cast<float>(round(bottom_rois[2])) * spatial_scale_ - 0.5f;
roi_end_w = static_cast<float>(round(bottom_rois[3]) + 1.0f) * spatial_scale_ - 0.5f;
roi_end_h = static_cast<float>(round(bottom_rois[4]) + 1.0f) * spatial_scale_ - 0.5f;
// Force too small ROIs to be 1x1
roi_width = std::max<float>(roi_end_w - roi_start_w, 0.1f); // avoid 0
roi_height = std::max<float>(roi_end_h - roi_start_h, 0.1f);
}
for (int c = 0; c < nc; c++) {
for (int h = 0; h < nh; h++) {
for (int w = 0; w < nw; w++) {
size_t index = n*nc*nh*nw + c*nh*nw + h*nw + w;
dst_data[index] = 0.0f;
if (mode_ == "average") {
float bin_size_h = roi_height / static_cast<float>(pooled_height_);
float bin_size_w = roi_width / static_cast<float>(pooled_width_);
int hstart = static_cast<int>(floor(static_cast<float>(h + 0) * bin_size_h + roi_start_h));
int hend = static_cast<int>(ceil(static_cast<float>(h + 1) * bin_size_h + roi_start_h));
hstart = std::min<int>(std::max<int>(hstart, 0), height);
hend = std::min<int>(std::max<int>(hend, 0), height);
int wstart = static_cast<int>(floor(static_cast<float>(w + 0) * bin_size_w + roi_start_w));
int wend = static_cast<int>(ceil(static_cast<float>(w + 1) * bin_size_w + roi_start_w));
wstart = std::min<int>(std::max<int>(wstart, 0), width);
wend = std::min<int>(std::max<int>(wend, 0), width);
float bin_area = static_cast<float>((hend - hstart) * (wend - wstart));
if (bin_area) {
int gc = (c * group_size_ + h) * group_size_ + w;
const float *bottom_data =
bottom_data_beginning + ((roi_batch_ind * channels + gc) * height * width);
float out_sum = 0.0f;
for (int hh = hstart; hh < hend; ++hh)
for (int ww = wstart; ww < wend; ++ww)
out_sum += bottom_data[hh * width + ww];
dst_data[index] = out_sum / bin_area;
}
} else if (mode_ == "bilinear") {
for (size_t bin_y = 0; bin_y < spatial_bins_y_; bin_y++) {
for (size_t bin_x = 0; bin_x < spatial_bins_x_; bin_x++) {
float box_xmin = roi_start_w + (bin_x + 0) * (roi_width / spatial_bins_x_);
float box_xmax = roi_start_w + (bin_x + 1) * (roi_width / spatial_bins_x_);
float box_ymin = roi_start_h + (bin_y + 0) * (roi_height / spatial_bins_y_);
float box_ymax = roi_start_h + (bin_y + 1) * (roi_height / spatial_bins_y_);
size_t gc = c + (bin_y*spatial_bins_x_ + bin_x)*nc;
size_t src_idx = (roi_batch_ind * channels + gc) * height * width;
const float *bottom_data = bottom_data_beginning + src_idx;
float height_scale = nh > 1 ? (box_ymax - box_ymin) * (height - 1) / (pooled_height_ - 1)
: 0.0f;
float width_scale = nw > 1 ? (box_xmax - box_xmin) * (width - 1) / (pooled_width_ - 1)
: 0.0f;
float in_y = nh > 1 ? (h * height_scale + box_ymin * (height - 1))
: 0.5f * (box_ymin + box_ymax) * (height - 1);
float in_x = nw > 1 ? (w * width_scale + box_xmin * (width - 1))
: 0.5f * (box_xmin + box_xmax) * (width - 1);
if (!(in_y < 0 || in_y > height - 1 || in_x < 0 || in_x > width - 1)) {
int top_y_index = static_cast<int>(floorf(in_y));
int bottom_y_index = static_cast<int>(ceilf(in_y));
int left_x_index = static_cast<int>(floorf(in_x));
int right_x_index = static_cast<int>(ceilf(in_x));
if (right_x_index > width - 1)
right_x_index = width - 1;
if (bottom_y_index > height - 1)
bottom_y_index = height - 1;
const float top_left = bottom_data[top_y_index * width + left_x_index];
const float top_right = bottom_data[top_y_index * width + right_x_index];
const float bottom_left = bottom_data[bottom_y_index * width + left_x_index];
const float bottom_right = bottom_data[bottom_y_index * width + right_x_index];
const float top = top_left + (top_right - top_left) * (in_x - left_x_index);
const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
dst_data[index] += top + (bottom - top) * (in_y - top_y_index);
}
}
}
dst_data[index] /= num_bins;
} else if (mode_ == "bilinear_deformable") {
// Compute w and h at bottom
float bin_size_h = roi_height / static_cast<float>(pooled_height_);
float bin_size_w = roi_width / static_cast<float>(pooled_width_);
float sub_bin_size_h = bin_size_h / static_cast<float>(spatial_bins_x_);
float sub_bin_size_w = bin_size_w / static_cast<float>(spatial_bins_y_);
int part_h = h * part_size_ / pooled_height_;
int part_w = w * part_size_ / pooled_width_;
int class_id = c / channels_each_class;
float trans_x = no_trans_ ? 0 :
bottom_trans[(((n * num_classes + class_id) * 2) * part_size_ + part_h)
* part_size_ + part_w] * trans_std_;
float trans_y = no_trans_ ? 0 :
bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size_ + part_h)
* part_size_ + part_w] * trans_std_;
float wstart = w * bin_size_w + roi_start_w + trans_x * roi_width;
float hstart = h * bin_size_h + roi_start_h + trans_y * roi_height;
float sum = 0;
int count = 0;
int gw = w * group_size_ / pooled_width_;
int gh = h * group_size_ / pooled_height_;
gw = (std::min)((std::max)(gw, 0), static_cast<int>(group_size_ - 1));
gh = (std::min)((std::max)(gh, 0), static_cast<int>(group_size_ - 1));
const float* offset_bottom_data = bottom_data_beginning + (roi_batch_ind * channels) * height * width;
for (size_t ih = 0; ih < spatial_bins_y_; ih++) {
for (size_t iw = 0; iw < spatial_bins_x_; iw++) {
float w1 = wstart + iw * sub_bin_size_w;
float h1 = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5)
continue;
w1 = static_cast<float>((std::min)((std::max)(static_cast<double>(w1), 0.0), width - 1.0));
h1 = static_cast<float>((std::min)((std::max)(static_cast<double>(h1), 0.0), height - 1.0));
int c1 = static_cast<int>((c * group_size_ + gh) * group_size_ + gw);
float val = bilinear_interp(offset_bottom_data + c1 * height * width, w1, h1, width);
sum += val;
count++;
}
}
dst_data[index] = count == 0 ? 0 : sum / count;
}
}
}
parallel_for(realRois, [&](int currentRoi) {
const float *bottomRois = bottomRoisBeginning + currentRoi * 5;
int roiBatchInd = static_cast<int>(bottomRois[0]);
if (mode == "average") {
executeAverage(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc);
} else if (mode == "bilinear") {
executeBilinear(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc);
} else if (mode == "bilinear_deformable") {
executeBilinearDeformable(srcData, dstData, bottomRois, bottomTrans,
numClasses, channelsEachClass, currentRoi, roiBatchInd);
}
});
for (int n = real_rois; n < nn; n++) {
parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
int index = n * nc * nh * nw + c * nh * nw + h * nw + w;
dst_data[index] = 0.0f;
});
}
return OK;
memset(dstData + realRois * nc * nh * nw, 0, (nn - realRois) * nc * nh * nw * sizeof(outputType));
}
inline float bilinear_interp(const float* data, const float x, const float y, const int width) {
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
try {
auto inputPrec = inputs[0]->getTensorDesc().getPrecision();
auto outputPrec = outputs[0]->getTensorDesc().getPrecision();
if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) ||
(inputPrec == Precision::FP32 && outputPrec == Precision::FP32)))
return NOT_IMPLEMENTED;
PSROIPoolingContext ctx = {
*this,
inputs,
outputs
};
OV_SWITCH(MKLDNNPlugin, PSROIPoolingExecute, ctx, std::tie(inputPrec, outputPrec),
OV_CASE2(Precision::FP32, Precision::FP32, float, float),
OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t))
return OK;
}
catch (const std::exception& excp) {
snprintf(resp->msg, sizeof(resp->msg), "%s", excp.what());
return GENERAL_ERROR;
}
catch(...) {
return GENERAL_ERROR;
}
}
template <typename inputType>
inline float bilinearInterp(const inputType* data, const float x, const float y, const int width_) {
int x1 = static_cast<int>(std::floor(x));
int x2 = static_cast<int>(std::ceil(x));
int y1 = static_cast<int>(std::floor(y));
int y2 = static_cast<int>(std::ceil(y));
float dist_x = x - x1;
float dist_y = y - y1;
float value11 = data[y1 * width + x1];
float value12 = data[y2 * width + x1];
float value21 = data[y1 * width + x2];
float value22 = data[y2 * width + x2];
float value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12
+ dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22;
float distX = x - x1;
float distY = y - y1;
float value11 = data[y1 * width_ + x1];
float value12 = data[y2 * width_ + x1];
float value21 = data[y1 * width_ + x2];
float value22 = data[y2 * width_ + x2];
float value = (1 - distX) * (1 - distY) * value11 + (1 - distX) * distY * value12
+ distX * (1 - distY) * value21 + distX * distY * value22;
return value;
}
private:
size_t output_dim_ = 0;
size_t group_size_ = 0;
float spatial_scale_ = 0;
size_t pooled_height_ = 0;
size_t pooled_width_ = 0;
size_t spatial_bins_x_ = 0;
size_t spatial_bins_y_ = 0;
std::string mode_ = "";
size_t outputDim = 0;
size_t groupSize = 0;
float spatialScale = 0;
size_t pooledHeight = 0;
size_t pooledWidth = 0;
size_t spatialBinsX = 0;
size_t spatialBinsY = 0;
std::string mode = "";
int channels = 0;
int height = 0;
@ -297,9 +511,9 @@ private:
int nw = 0;
// for Deformable PSROIPolling
bool no_trans_;
int part_size_;
float trans_std_;
bool noTrans;
int partSize;
float transStd;
};
REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling);

View File

@ -0,0 +1,184 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils/cpu_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
namespace {
std::vector<float> proposal;
std::vector<size_t> featureMapShape;
size_t spatialBinsX;
size_t spatialBinsY;
float spatialScale;
size_t groupSize;
size_t outputDim;
std::string mode;
} // namespace
typedef std::tuple<
std::vector<size_t>, // feature map shape
std::vector<float>, // coords shape
size_t, // output_dim
size_t, // group_size
float, // Spatial scale
size_t, // spatial_bins_x
size_t, // spatial_bins_y
std::string // mode
> PSROIPoolingSpecificParams;
typedef std::tuple<
PSROIPoolingSpecificParams,
InferenceEngine::Precision, // Net precision
LayerTestsUtils::TargetDevice // Device name
> PSROIPoolingLayerTestParams;
typedef std::tuple<
CPULayerTestsDefinitions::PSROIPoolingLayerTestParams,
CPUSpecificParams> PSROIPoolingLayerCPUTestParamsSet;
class PSROIPoolingLayerCPUTest : public testing::WithParamInterface<PSROIPoolingLayerCPUTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<PSROIPoolingLayerCPUTestParamsSet> obj) {
CPULayerTestsDefinitions::PSROIPoolingLayerTestParams basicParamsSet;
CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = obj.param;
std::string td;
Precision netPr;
PSROIPoolingSpecificParams psroiPar;
std::tie(psroiPar, netPr, td) = basicParamsSet;
std::tie(featureMapShape, proposal, outputDim, groupSize,
spatialScale, spatialBinsX, spatialBinsY, mode) = psroiPar;
std::ostringstream result;
result << "PSROIPoolingTest_";
result << std::to_string(obj.index) << "_";
result << "binsX=" << spatialBinsX << "_";
result << "binsY=" << spatialBinsY << "_";
result << "spatialScale=" << spatialScale << "_";
result << "outputD=" << outputDim << "_";
result << "groupS=" << groupSize << "_";
result << netPr.name() << "_";
result << mode << "_";
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
CPULayerTestsDefinitions::PSROIPoolingLayerTestParams basicParamsSet;
CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
CPULayerTestsDefinitions::PSROIPoolingSpecificParams psroiPoolingParams;
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
std::tie(psroiPoolingParams, netPrecision, targetDevice) = basicParamsSet;
inPrc = outPrc = netPrecision;
std::tie(featureMapShape, proposal, outputDim, groupSize,
spatialScale, spatialBinsX, spatialBinsY, mode) = psroiPoolingParams;
ngraph::Shape proposalShape = { proposal.size() / 5, 5 };
auto coords = ngraph::builder::makeConstant<float>(ngraph::element::f32, proposalShape, proposal);
auto params = ngraph::builder::makeParams(ngraph::element::f32, {featureMapShape});
auto psroi = std::make_shared<ngraph::op::v0::PSROIPooling>(params[0], coords, outputDim, groupSize,
spatialScale, spatialBinsX, spatialBinsY, mode);
psroi->get_rt_info() = getCPUInfo();
selectedType = std::string("unknown_") + inPrc.name();
threshold = 0.001f;
const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(psroi)};
function = std::make_shared<ngraph::Function>(results, params, "PSROIPooling");
}
};
TEST_P(PSROIPoolingLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "PSROIPooling");
}
namespace {
/* CPU PARAMS */
std::vector<CPUSpecificParams> resCPUParams {
CPUSpecificParams{{nchw, nc}, {nchw}, {}, {}},
CPUSpecificParams{{nhwc, nc}, {nhwc}, {}, {}},
CPUSpecificParams{{nChw16c, nc}, {nChw16c}, {}, {}}
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::BF16
};
const std::vector<float> spatialScaleVector = { 1.0f };
const std::vector<std::vector<size_t>> inputShapeVector = {
SizeVector({ 2, 200, 20, 20 }),
SizeVector({ 2, 200, 20, 16 }),
SizeVector({ 2, 200, 16, 20 }),
SizeVector({ 3, 200, 16, 16 })
};
const std::vector<std::vector<float>> averagePropVector = {
{ 0, 0.9, 0.9, 18.9, 18.9,
1, 0.9, 0.9, 18.9, 18.9 },
{ 1, 1, 1, 15, 15 }
};
const std::vector<std::vector<float>> bilinearPropVector = {
{ 0, 0.1, 0.1, 0.9, 0.9,
1, 0.1, 0.1, 0.9, 0.9 },
{ 1, 0.1, 0.1, 0.9, 0.9 }
};
const auto psroiPoolingAverageParams = ::testing::Combine(
::testing::ValuesIn(inputShapeVector),
::testing::ValuesIn(averagePropVector),
::testing::Values(50),
::testing::Values(2),
::testing::ValuesIn(spatialScaleVector),
::testing::Values(1),
::testing::Values(1),
::testing::Values("average")
);
const auto psroiPoolingBilinearParams = ::testing::Combine(
::testing::Values(std::vector<size_t>{3, 32, 20, 20}),
::testing::ValuesIn(bilinearPropVector),
::testing::Values(4),
::testing::Values(3),
::testing::ValuesIn(spatialScaleVector),
::testing::Values(4),
::testing::Values(2),
::testing::Values("bilinear")
);
INSTANTIATE_TEST_CASE_P(smoke_PSROIPoolingAverageLayoutTest, PSROIPoolingLayerCPUTest,
::testing::Combine(
::testing::Combine(
psroiPoolingAverageParams,
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUSpecificParams(resCPUParams))),
PSROIPoolingLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_PSROIPoolingBilinearLayoutTest, PSROIPoolingLayerCPUTest,
::testing::Combine(
::testing::Combine(
psroiPoolingBilinearParams,
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUSpecificParams(resCPUParams))),
PSROIPoolingLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -34,7 +34,7 @@ namespace CPUTestUtils {
ncdhw = abcde,
nCdhw8c = aBcde8b,
nCdhw16c = aBcde16b,
ndhwc = acdeb,
ndhwc = acdeb
} cpu_memory_format_t;
using CPUSpecificParams = std::tuple<

View File

@ -7,124 +7,124 @@
namespace LayerTestsDefinitions {
std::string PSROIPoolingLayerTest::getTestCaseName(testing::TestParamInfo<psroiParams> obj) {
std::vector<size_t> inputShape;
std::vector<size_t> coordsShape;
size_t outputDim;
size_t groupSize;
float spatialScale;
size_t spatialBinsX;
size_t spatialBinsY;
std::string mode;
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::tie(inputShape, coordsShape, outputDim, groupSize, spatialScale, spatialBinsX, spatialBinsY, mode, netPrecision, targetDevice) = obj.param;
std::string PSROIPoolingLayerTest::getTestCaseName(testing::TestParamInfo<psroiParams> obj) {
std::vector<size_t> inputShape;
std::vector<size_t> coordsShape;
size_t outputDim;
size_t groupSize;
float spatialScale;
size_t spatialBinsX;
size_t spatialBinsY;
std::string mode;
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::tie(inputShape, coordsShape, outputDim, groupSize, spatialScale, spatialBinsX, spatialBinsY, mode, netPrecision, targetDevice) = obj.param;
std::ostringstream result;
std::ostringstream result;
result << "in_shape=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "coord_shape=" << CommonTestUtils::vec2str(coordsShape) << "_";
result << "out_dim=" << outputDim << "_";
result << "group_size=" << groupSize << "_";
result << "scale=" << spatialScale << "_";
result << "bins_x=" << spatialBinsX << "_";
result << "bins_y=" << spatialBinsY << "_";
result << "mode=" << mode << "_";
result << "prec=" << netPrecision.name() << "_";
result << "dev=" << targetDevice;
return result.str();
result << "in_shape=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "coord_shape=" << CommonTestUtils::vec2str(coordsShape) << "_";
result << "out_dim=" << outputDim << "_";
result << "group_size=" << groupSize << "_";
result << "scale=" << spatialScale << "_";
result << "bins_x=" << spatialBinsX << "_";
result << "bins_y=" << spatialBinsY << "_";
result << "mode=" << mode << "_";
result << "prec=" << netPrecision.name() << "_";
result << "dev=" << targetDevice;
return result.str();
}
static int randInt(int low, int high) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(low, high);
return dis(gen);
}
static void fillROITensor(float* buffer, int numROIs, int batchSize,
int height, int width, int groupSize,
float spatialScale, int spatialBinsX, int spatialBinsY, const std::string& mode) {
int minRoiWidth = groupSize;
int maxRoiWidth = width / groupSize * groupSize;
int minRoiHeight = groupSize;
int maxRoiHeight = height / groupSize * groupSize;
float scaleX = spatialScale;
float scaleY = spatialScale;
if (mode == "bilinear") {
minRoiWidth = spatialBinsX;
maxRoiWidth = width / spatialBinsX * spatialBinsX;
minRoiHeight = spatialBinsY;
maxRoiHeight = height / spatialBinsY * spatialBinsY;
scaleX *= width;
scaleY *= height;
}
int batchId = 0;
for (int i = 0; i < numROIs; i++) {
int sizeX = std::min(width, randInt(minRoiWidth, maxRoiWidth));
int sizeY = std::min(height, randInt(minRoiHeight, maxRoiHeight));
int startX = randInt(0, std::max(1, width - sizeX - 1));
int startY = randInt(0, std::max(1, height - sizeY - 1));
static int randInt(int low, int high) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(low, high);
return dis(gen);
float* roi = buffer + i * 5;
roi[0] = batchId;
roi[1] = startX / scaleX;
roi[2] = startY / scaleY;
roi[3] = (startX + sizeX - 1) / scaleX;
roi[4] = (startY + sizeY - 1) / scaleY;
batchId = (batchId + 1) % batchSize;
}
}
static void fillROITensor(float* buffer, int numROIs, int batchSize,
int height, int width, int groupSize,
float spatialScale, int spatialBinsX, int spatialBinsY, const std::string& mode) {
int minRoiWidth = groupSize;
int maxRoiWidth = width / groupSize * groupSize;
int minRoiHeight = groupSize;
int maxRoiHeight = height / groupSize * groupSize;
float scaleX = spatialScale;
float scaleY = spatialScale;
if (mode == "bilinear") {
minRoiWidth = spatialBinsX;
maxRoiWidth = width / spatialBinsX * spatialBinsX;
minRoiHeight = spatialBinsY;
maxRoiHeight = height / spatialBinsY * spatialBinsY;
scaleX *= width;
scaleY *= height;
}
int batchId = 0;
for (int i = 0; i < numROIs; i++) {
int sizeX = std::min(width, randInt(minRoiWidth, maxRoiWidth));
int sizeY = std::min(height, randInt(minRoiHeight, maxRoiHeight));
int startX = randInt(0, std::max(1, width - sizeX - 1));
int startY = randInt(0, std::max(1, height - sizeY - 1));
float* roi = buffer + i * 5;
roi[0] = batchId;
roi[1] = startX / scaleX;
roi[2] = startY / scaleY;
roi[3] = (startX + sizeX - 1) / scaleX;
roi[4] = (startY + sizeY - 1) / scaleY;
batchId = (batchId + 1) % batchSize;
void PSROIPoolingLayerTest::Infer() {
inferRequest = executableNetwork.CreateInferRequest();
inputs.clear();
auto inputShape = cnnNetwork.getInputShapes().begin()->second;
size_t it = 0;
for (const auto &input : cnnNetwork.getInputsInfo()) {
const auto &info = input.second;
InferenceEngine::Blob::Ptr blob;
if (it == 1) {
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
fillROITensor(blob->buffer(), blob->size() / 5,
inputShape[0], inputShape[2], inputShape[3], groupSize_,
spatialScale_, spatialBinsX_, spatialBinsY_, mode_);
} else {
blob = GenerateInput(*info);
}
inferRequest.SetBlob(info->name(), blob);
inputs.push_back(blob);
it++;
}
inferRequest.Infer();
}
void PSROIPoolingLayerTest::Infer() {
inferRequest = executableNetwork.CreateInferRequest();
inputs.clear();
void PSROIPoolingLayerTest::SetUp() {
std::vector<size_t> inputShape;
std::vector<size_t> coordsShape;
size_t outputDim;
InferenceEngine::Precision netPrecision;
std::tie(inputShape, coordsShape, outputDim, groupSize_, spatialScale_,
spatialBinsX_, spatialBinsY_, mode_, netPrecision, targetDevice) = this->GetParam();
auto inputShape = cnnNetwork.getInputShapes().begin()->second;
size_t it = 0;
for (const auto &input : cnnNetwork.getInputsInfo()) {
const auto &info = input.second;
InferenceEngine::Blob::Ptr blob;
if (it == 1) {
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
fillROITensor(blob->buffer(), blob->size() / 5,
inputShape[0], inputShape[2], inputShape[3], groupSize_,
spatialScale_, spatialBinsX_, spatialBinsY_, mode_);
} else {
blob = GenerateInput(*info);
}
inferRequest.SetBlob(info->name(), blob);
inputs.push_back(blob);
it++;
}
inferRequest.Infer();
}
void PSROIPoolingLayerTest::SetUp() {
std::vector<size_t> inputShape;
std::vector<size_t> coordsShape;
size_t outputDim;
InferenceEngine::Precision netPrecision;
std::tie(inputShape, coordsShape, outputDim, groupSize_, spatialScale_,
spatialBinsX_, spatialBinsY_, mode_, netPrecision, targetDevice) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape, coordsShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
std::shared_ptr<ngraph::Node> psroiPooling = std::make_shared<ngraph::op::v0::PSROIPooling>(paramOuts[0],
paramOuts[1],
outputDim,
groupSize_,
spatialScale_,
spatialBinsX_,
spatialBinsY_,
mode_);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(psroiPooling)};
function = std::make_shared<ngraph::Function>(results, params, "psroi_pooling");
}
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape, coordsShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
std::shared_ptr<ngraph::Node> psroiPooling = std::make_shared<ngraph::op::v0::PSROIPooling>(paramOuts[0],
paramOuts[1],
outputDim,
groupSize_,
spatialScale_,
spatialBinsX_,
spatialBinsY_,
mode_);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(psroiPooling)};
function = std::make_shared<ngraph::Function>(results, params, "psroi_pooling");
}
} // namespace LayerTestsDefinitions