[CPU] Improved Split layer (#3449)
* [CPU] Added more optimal Split implementation
This commit is contained in:
parent
9509244729
commit
95f531e9e0
@ -210,7 +210,7 @@ void BF16Transformer::optimizeToFloat(InferenceEngine::CNNNetwork &network) {
|
||||
}
|
||||
bool marked = tryToMarkFP32(inputTo.second->outData[o], immutable);
|
||||
if (marked) {
|
||||
toAnalyzeTensors.insert(layer->outData[o]);
|
||||
toAnalyzeTensors.insert(inputTo.second->outData[o]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ class BF16Transformer {
|
||||
{ "concat", "eltwise" };
|
||||
// prevent fallback to fp32 without considering both input and output nodes
|
||||
const InferenceEngine::details::caseless_set<std::string> _skipmarking =
|
||||
{ "memory" };
|
||||
{ "memory", "Split" };
|
||||
|
||||
/**
|
||||
* Tries to mark tensor as FP32 by analyzing of local consumers of the tensor. Do not mark if
|
||||
|
@ -93,7 +93,7 @@ bool MKLDNNEdge::needReorder() {
|
||||
};
|
||||
|
||||
const auto portChildEdges = getParent()->getChildEdgesAtPort(inNumber);
|
||||
if (in_place && detectInPlaceChildsNum(portChildEdges) > 1 && childCanChangeMem)
|
||||
if (in_place && childCanChangeMem && portChildEdges.size() > 1 && detectInPlaceChildsNum(portChildEdges) > 1)
|
||||
canBeInPlaceConflicts = true;
|
||||
if (!canBeInPlaceConflicts && in_place && !getParent()->getChildEdges().empty()) {
|
||||
for (auto &p_edge_peer : portChildEdges) {
|
||||
|
@ -288,7 +288,6 @@ bool MKLDNNMemory::IsGroupedFormat(memory::format format) {
|
||||
memory::format MKLDNNMemory::GetPlainFormat(memory::dims dims) {
|
||||
switch (dims.size()) {
|
||||
case 0:
|
||||
return memory::x;
|
||||
case 1:
|
||||
return memory::x;
|
||||
case 2:
|
||||
@ -576,6 +575,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
|
||||
blkDims = dims;
|
||||
break;
|
||||
case memory::tnc:
|
||||
case memory::ncw:
|
||||
layout = Layout::CHW;
|
||||
order = {0, 1, 2};
|
||||
blkDims = dims;
|
||||
@ -587,6 +587,13 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
|
||||
static_cast<size_t>(dims[0]),
|
||||
static_cast<size_t>(dims[2])};
|
||||
break;
|
||||
case memory::nwc:
|
||||
layout = Layout::CHW;
|
||||
order = {0, 2, 1};
|
||||
blkDims = {static_cast<size_t>(dims[0]),
|
||||
static_cast<size_t>(dims[2]),
|
||||
static_cast<size_t>(dims[1])};
|
||||
break;
|
||||
case memory::oihw:
|
||||
case memory::nchw:
|
||||
layout = Layout::NCHW;
|
||||
|
@ -3,136 +3,206 @@
|
||||
//
|
||||
|
||||
#include "mkldnn_split_node.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <limits>
|
||||
#include <ie_parallel.hpp>
|
||||
|
||||
#define THROW_ERROR THROW_IE_EXCEPTION << "Split layer with name '" << getName() <<"' "
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
static TensorDesc makePlainTensorDesc(const Precision& precision, const SizeVector& srcDims) {
|
||||
SizeVector order(srcDims.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
return TensorDesc(precision, srcDims, {srcDims, order});
|
||||
}
|
||||
|
||||
static TensorDesc makePerChannelTensorDesc(const Precision& precision, const SizeVector& srcDims) {
|
||||
constexpr size_t channelsPos = 1lu;
|
||||
SizeVector order(srcDims.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
SizeVector blkDims = srcDims;
|
||||
if (srcDims.size() > 2) {
|
||||
auto moveElementBack = [](SizeVector& vector, size_t indx) {
|
||||
auto itr = vector.begin() + indx;
|
||||
std::rotate(itr, itr + 1, vector.end());
|
||||
};
|
||||
|
||||
moveElementBack(order, channelsPos);
|
||||
moveElementBack(blkDims, channelsPos);
|
||||
}
|
||||
|
||||
return TensorDesc(precision, srcDims, {blkDims, order});
|
||||
}
|
||||
|
||||
static TensorDesc makeChannelBlockedTensorDesc(const Precision& precision, const SizeVector& srcDims, size_t blockSize) {
|
||||
if (srcDims.size() < 2) {
|
||||
THROW_IE_EXCEPTION << "Can't create blocked tensor descriptor!";
|
||||
}
|
||||
|
||||
constexpr size_t channelsPos = 1lu;
|
||||
SizeVector order(srcDims.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
order.push_back(channelsPos);
|
||||
|
||||
SizeVector blkDims = srcDims;
|
||||
blkDims[1] = blkDims[1] / blockSize + (blkDims[1] % blockSize ? 1 : 0);
|
||||
blkDims.push_back(blockSize);
|
||||
|
||||
return TensorDesc(precision, srcDims, {blkDims, order});
|
||||
}
|
||||
|
||||
static inline uint8_t* getDataPtr(const MKLDNNMemory& memoryPtr) {
|
||||
return reinterpret_cast<uint8_t*>(memoryPtr.GetData()) + memoryPtr.GetDescriptor().data.layout_desc.blocking.offset_padding *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(memoryPtr.GetDescriptor().data.data_type));
|
||||
}
|
||||
|
||||
MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(layer, eng, cache) {}
|
||||
|
||||
void MKLDNNSplitNode::getSupportedDescriptors() {
|
||||
auto * splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
|
||||
auto splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
|
||||
|
||||
if (splitLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot convert split layer.";
|
||||
THROW_ERROR << "can not convert from CNN layer.";
|
||||
|
||||
if (getParentEdges().size() != 1)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input nodes.";
|
||||
THROW_ERROR << "has incorrect number of input nodes.";
|
||||
if (getChildEdges().empty())
|
||||
THROW_IE_EXCEPTION << "Incorrect number of output nodes.";
|
||||
THROW_ERROR << "has incorrect number of output nodes.";
|
||||
|
||||
axis = splitLayer->_axis;
|
||||
if (axis >= getParentEdgeAt(0)->getDims().ndims())
|
||||
THROW_IE_EXCEPTION << "Invalid value of axis parameter in split layer";
|
||||
THROW_ERROR << "has invalid value of axis parameter.";
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
using TensorDescFactory = std::function<TensorDesc(const Precision&, const SizeVector&)>;
|
||||
constexpr size_t channelsPos = 1lu;
|
||||
// perform guard checks
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
precision = getCnnLayer()->outData[0]->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
if (getCnnLayer()->insData.empty()) {
|
||||
THROW_ERROR << "has an empty input in the CNN layer";
|
||||
}
|
||||
|
||||
auto inpData = getCnnLayer()->insData[0].lock();
|
||||
if (!inpData) {
|
||||
THROW_ERROR << "input data is empty";
|
||||
}
|
||||
|
||||
auto srcDims = getParentEdgeAt(0)->getDims();
|
||||
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = true;
|
||||
config.inConfs.resize(1);
|
||||
config.inConfs[0].inPlace = -1;
|
||||
config.inConfs[0].constant = false;
|
||||
config.inConfs[0].desc = MKLDNNMemoryDesc(srcDims, inputDataType, memory::format::any);
|
||||
config.outConfs.resize(outDims.size());
|
||||
|
||||
std::vector<memory::format> outFormats;
|
||||
|
||||
auto axis_size = 0;
|
||||
auto dstFirstDims = getChildEdgeAt(0)->getDims();
|
||||
for (size_t i = 0; i < outDims.size(); i++) {
|
||||
auto o_Dims = outDims[i];
|
||||
if (dstFirstDims.ndims() != o_Dims.ndims()) {
|
||||
THROW_IE_EXCEPTION << "Split " << getName() << " supports only output blob with equal number of dimensions";
|
||||
THROW_ERROR << "only supports output blobs with equal number of dimensions";
|
||||
}
|
||||
|
||||
config.outConfs[i].inPlace = -1;
|
||||
config.outConfs[i].constant = false;
|
||||
config.outConfs[i].desc = MKLDNNMemoryDesc(o_Dims, outputDataType, memory::format::any);
|
||||
outFormats.push_back(memory::format::any);
|
||||
|
||||
axis_size += o_Dims[axis];
|
||||
for (size_t j = 0; j < dstFirstDims.ndims(); j++) {
|
||||
if (j == axis)
|
||||
continue;
|
||||
if (o_Dims[j] != dstFirstDims[j])
|
||||
THROW_IE_EXCEPTION << "Split " << getName() << " has incorrect output dimensions";
|
||||
THROW_ERROR << "has incorrect output dimensions";
|
||||
}
|
||||
}
|
||||
dstFirstDims[axis] = axis_size;
|
||||
if (dstFirstDims.size() != srcDims.size())
|
||||
THROW_IE_EXCEPTION << "The sizes of input blob and sum of output blobs are not equal.";
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats);
|
||||
THROW_ERROR << "sizes of input blob and sum of output blobs are not equal.";
|
||||
|
||||
auto numOfDim = static_cast<size_t>(srcDims.ndims());
|
||||
|
||||
SizeVector order;
|
||||
SizeVector offsets(numOfDim, 0lu);
|
||||
size_t offset = (std::numeric_limits<size_t>::max)();
|
||||
for (size_t i = 0; i < numOfDim; i++) {
|
||||
order.push_back(i);
|
||||
InferenceEngine::Precision inpPrecision = inpData->getPrecision();
|
||||
auto outPrecision = inpPrecision; // the split layer doesn't convert precisions
|
||||
|
||||
// make primitive descriptor factory function for different configurations
|
||||
bool dynBatchSupport = true;
|
||||
if (axis < 1) {
|
||||
dynBatchSupport = false;
|
||||
}
|
||||
auto makePdInfo = [dynBatchSupport](TensorDescFactory getTensorDesc, const Precision& precision, const MKLDNNDims& srcDims,
|
||||
const std::vector<MKLDNNDims>& outDims, impl_desc_type type) -> PrimitiveDescInfo {
|
||||
InferenceEngine::LayerConfig config;
|
||||
|
||||
SizeVector strides(numOfDim);
|
||||
strides[numOfDim - 1] = 1;
|
||||
for (size_t i = 2; i <= numOfDim; i++) {
|
||||
if (numOfDim - i < axis) {
|
||||
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
|
||||
} else {
|
||||
strides[numOfDim - i] = strides[numOfDim - i + 1] * srcDims[numOfDim - i + 1];
|
||||
config.dynBatchSupport = dynBatchSupport;
|
||||
config.inConfs.resize(1);
|
||||
config.inConfs[0].inPlace = -1;
|
||||
config.inConfs[0].constant = false;
|
||||
config.inConfs[0].desc = getTensorDesc(precision, srcDims.ToSizeVector());
|
||||
config.outConfs.resize(outDims.size());
|
||||
|
||||
std::vector<memory::format> outFormats;
|
||||
|
||||
for (size_t i = 0; i < outDims.size(); i++) {
|
||||
auto o_Dims = outDims[i];
|
||||
|
||||
config.outConfs[i].inPlace = -1;
|
||||
config.outConfs[i].constant = false;
|
||||
config.outConfs[i].desc = getTensorDesc(precision, o_Dims.ToSizeVector());
|
||||
outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
|
||||
}
|
||||
return {config, type, outFormats};
|
||||
};
|
||||
|
||||
//Set plain format
|
||||
supportedPrimitiveDescriptors.push_back(makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref));
|
||||
|
||||
//Set per channel format.
|
||||
supportedPrimitiveDescriptors.push_back(makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref));
|
||||
|
||||
//Support channel blocked format
|
||||
std::vector<size_t> blockedPdIndexes;
|
||||
if (srcDims.ndims() > channelsPos) {
|
||||
for (size_t sizeS : {8lu, 16lu}) {
|
||||
SizeVector blkDims = srcDims.ToSizeVector();
|
||||
if (blkDims[channelsPos] % sizeS)
|
||||
continue;
|
||||
|
||||
bool blocked = true;
|
||||
for (size_t i = 0; i < outDims.size(); i++) {
|
||||
if (outDims[i].ToSizeVector()[channelsPos] % sizeS) {
|
||||
blocked = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (blocked) {
|
||||
using std::placeholders::_1;
|
||||
using std::placeholders::_2;
|
||||
supportedPrimitiveDescriptors.push_back(makePdInfo(std::bind(&makeChannelBlockedTensorDesc, _1, _2, sizeS),
|
||||
inpPrecision, srcDims, outDims, impl_desc_type::ref));
|
||||
blockedPdIndexes.push_back(supportedPrimitiveDescriptors.size() - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {srcDims.ToSizeVector(), order, offset, offsets, strides});
|
||||
outFormats.clear();
|
||||
for (size_t i = 0; i < outDims.size(); i++) {
|
||||
auto dims = outDims[i].ToSizeVector();
|
||||
config.outConfs[i].inPlace = 0;
|
||||
config.outConfs[i].desc = TensorDesc(Precision::FP32, dims,
|
||||
{dims, order, offset, offsets, strides});
|
||||
outFormats.push_back(MKLDNNMemory::Convert(config.outConfs[i].desc.getLayout()));
|
||||
// Optimized inplace case
|
||||
std::vector<size_t> pdIndexesToReuse(1, 0); // at least the first plain layout can be optimized inplace.
|
||||
if (axis < 2) {
|
||||
pdIndexesToReuse.insert(pdIndexesToReuse.end(), blockedPdIndexes.begin(), blockedPdIndexes.end());
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
|
||||
|
||||
if ((numOfDim != 4 && numOfDim != 5) || axis != 1)
|
||||
return;
|
||||
for (auto refPdIndex : pdIndexesToReuse) {
|
||||
const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
|
||||
auto config = refConfig;
|
||||
|
||||
order.push_back(1);
|
||||
numOfDim = order.size();
|
||||
offsets = SizeVector(numOfDim, 0lu);
|
||||
const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder();
|
||||
const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims();
|
||||
auto numOfDim = blkDims.size();
|
||||
|
||||
// nChw8c and nChw16c
|
||||
for (size_t sizeS : {8lu, 16lu}) {
|
||||
SizeVector blkDims = srcDims.ToSizeVector();
|
||||
if (blkDims[1] % sizeS)
|
||||
continue;
|
||||
blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
|
||||
blkDims.push_back(sizeS);
|
||||
std::vector<memory::format> outFormats;
|
||||
SizeVector offsets(numOfDim, 0lu);
|
||||
SizeVector strides(numOfDim);
|
||||
strides.back() = 1lu;
|
||||
size_t offset = (std::numeric_limits<size_t>::max)();
|
||||
|
||||
strides.resize(numOfDim);
|
||||
strides[numOfDim - 1] = 1lu;
|
||||
for (size_t i = 2; i <= numOfDim; i++) {
|
||||
if (numOfDim - i < axis) {
|
||||
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
|
||||
@ -140,318 +210,60 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
|
||||
}
|
||||
}
|
||||
config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
|
||||
|
||||
outFormats.clear();
|
||||
bool canInplace = true;
|
||||
config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
|
||||
|
||||
for (size_t i = 0; i < outDims.size(); i++) {
|
||||
auto dims = outDims[i].ToSizeVector();
|
||||
blkDims = dims;
|
||||
const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims();
|
||||
const auto& dims = refConfig.outConfs[i].desc.getDims();
|
||||
|
||||
if (blkDims[1] % sizeS) {
|
||||
canInplace = false;
|
||||
break;
|
||||
}
|
||||
blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
|
||||
blkDims.push_back(sizeS);
|
||||
config.outConfs[i].desc = TensorDesc(Precision::FP32, dims, {blkDims, order, offset, offsets, strides});
|
||||
|
||||
outFormats.emplace_back(MKLDNNMemory::Convert(config.outConfs[i].desc.getLayout()));
|
||||
config.outConfs[i].inPlace = 0;
|
||||
config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides});
|
||||
outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
|
||||
}
|
||||
if (canInplace)
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::createPrimitive() {
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
|
||||
THROW_ERROR << "Input memory has not been allocated.";
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
|
||||
THROW_ERROR << "Destination memory has not been allocated.";
|
||||
}
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
|
||||
canUseOptimizedImpl = true;
|
||||
if (axis != 1)
|
||||
canUseOptimizedImpl = false;
|
||||
|
||||
if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC &&
|
||||
getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC)
|
||||
canUseOptimizedImpl = false;
|
||||
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW &&
|
||||
getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW)
|
||||
canUseOptimizedImpl = false;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::optimizedImpl(size_t MB) {
|
||||
const int ndims = getParentEdgeAt(0)->getDims().ndims();
|
||||
const size_t IC = getParentEdgeAt(0)->getDims()[1];
|
||||
const size_t D = ndims == 5 ? getParentEdgeAt(0)->getDims()[ndims - 3] : 1;
|
||||
const size_t H = getParentEdgeAt(0)->getDims()[ndims - 2];
|
||||
const size_t W = getParentEdgeAt(0)->getDims()[ndims - 1];
|
||||
|
||||
auto srcBlob = getParentEdgeAt(0)->getBlob();
|
||||
const auto *srcData = srcBlob->cbuffer().as<const float *>();
|
||||
for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
|
||||
auto dstBlob = getChildEdgeAt(i)->getBlob();
|
||||
auto *dstData = dstBlob->buffer().as<float *>();
|
||||
|
||||
const size_t OC = getChildEdgeAt(i)->getDims()[1];
|
||||
|
||||
size_t innerSize = 1;
|
||||
for (size_t j = axis; j < dstBlob->getTensorDesc().getDims().size(); j++) {
|
||||
innerSize *= dstBlob->getTensorDesc().getDims()[j];
|
||||
}
|
||||
|
||||
auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx);
|
||||
|
||||
parallel_for4d(MB, D, H, W, [&](size_t b, size_t d, size_t h, size_t w) {
|
||||
for (size_t c = 0; c < OC; c++) {
|
||||
size_t srcOff = b*D*H*W*IC + d*H*W*IC + h*W*IC + w*IC + c;
|
||||
size_t dstOff = b*OC*D*H*W + c*D*H*W + d*H*W + h*W + w;
|
||||
|
||||
dstData[dstOff] = srcPtr[srcOff];
|
||||
}
|
||||
});
|
||||
|
||||
sIdx += innerSize;
|
||||
}
|
||||
if (!isOptimized())
|
||||
prepareOptimizedParams();
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::execute(mkldnn::stream strm) {
|
||||
if (isOptimized())
|
||||
return;
|
||||
|
||||
// FIXME: add more optimal implementation
|
||||
MKLDNNDims par_dims = getParentEdgeAt(0)->getDims();
|
||||
int MB = batchToProcess();
|
||||
auto srcBlob = getParentEdgeAt(0)->getBlob();
|
||||
const auto *srcData = srcBlob->cbuffer().as<const float *>();
|
||||
uint8_t* srcData = getDataPtr(this->getParentEdgeAt(0)->getMemory());
|
||||
size_t batch = this->getParentEdgeAt(0)->getDims()[0];
|
||||
|
||||
size_t outerSize = 1;
|
||||
for (int i = 0; i < axis; i++) {
|
||||
if (i == 0)
|
||||
outerSize *= MB;
|
||||
else
|
||||
outerSize *= srcBlob->getTensorDesc().getDims()[i];
|
||||
}
|
||||
if (batch != MB)
|
||||
optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;
|
||||
|
||||
if (canUseOptimizedImpl) {
|
||||
optimizedImpl(MB);
|
||||
return;
|
||||
}
|
||||
parallel_for2d(this->getChildEdges().size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
|
||||
uint8_t* dstData = optimizedParams.dstMemPtrs[i];
|
||||
|
||||
size_t srcSize = getParentEdgeAt(0)->getMemory().GetSize();
|
||||
size_t src_batch_off = srcBlob->getTensorDesc().offset(srcBlob->size() / outerSize)
|
||||
- srcBlob->getTensorDesc().offset(0);
|
||||
|
||||
for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
|
||||
auto dstBlob = getChildEdgeAt(i)->getBlob();
|
||||
auto *dstData = dstBlob->buffer().as<float *>();
|
||||
|
||||
size_t innerSize = 1;
|
||||
for (size_t j = axis; j < dstBlob->getTensorDesc().getDims().size(); j++) {
|
||||
innerSize *= dstBlob->getTensorDesc().getDims()[j];
|
||||
}
|
||||
|
||||
size_t dst_batch_off = dstBlob->getTensorDesc().offset(innerSize) - dstBlob->getTensorDesc().offset(0);
|
||||
|
||||
for (size_t dIdx = 0; dIdx < innerSize; dIdx++, sIdx++) {
|
||||
for (unsigned b = 0; b < outerSize; b++) {
|
||||
if (sIdx + b*src_batch_off >= srcSize)
|
||||
THROW_IE_EXCEPTION << "Incorrect configuration of split layer " << getName() << "!";
|
||||
dstData[b * dst_batch_off + dstBlob->getTensorDesc().offset(dIdx)] =
|
||||
srcData[b * src_batch_off + srcBlob->getTensorDesc().offset(sIdx)];
|
||||
}
|
||||
}
|
||||
}
|
||||
cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]],
|
||||
&srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride],
|
||||
optimizedParams.dataSize[i]);
|
||||
});
|
||||
}
|
||||
|
||||
bool MKLDNNSplitNode::created() const {
|
||||
return getType() == Split;
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
|
||||
selectPrimitiveDescriptorByIndex(0);
|
||||
return;
|
||||
}
|
||||
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
precision = getCnnLayer()->outData[0]->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
|
||||
bool hasUnknown = false;
|
||||
std::vector<size_t> canSelectPrimitive;
|
||||
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
|
||||
bool hasAny = true;
|
||||
auto &primDescInfo = supportedPrimitiveDescriptors[i];
|
||||
if (primDescInfo.getImplementationType() != impl_desc_type::unknown ||
|
||||
primDescInfo.getConfig().outConfs[0].inPlace < 0)
|
||||
continue;
|
||||
hasUnknown = true;
|
||||
for (auto iInfo : primDescInfo.getConfig().inConfs) {
|
||||
if (iInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
|
||||
hasAny = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasAny) {
|
||||
for (auto oInfo : primDescInfo.getConfig().outConfs) {
|
||||
if (oInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
|
||||
hasAny = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasAny) {
|
||||
canSelectPrimitive.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
bool canOptimize = false;
|
||||
if (hasUnknown) {
|
||||
canOptimize = true;
|
||||
|
||||
if (canSelectPrimitive.size() == 1) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::map<mkldnn::memory::format, size_t> formatFrequency;
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
auto parentEdge = getParentEdgeAt(i);
|
||||
auto parent = parentEdge->getParent();
|
||||
|
||||
if (parent->getSelectedPrimitiveDescriptor() == nullptr)
|
||||
continue;
|
||||
|
||||
int outputIndex = parentEdge->getOutputNum();
|
||||
if (outputIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot find index of output node";
|
||||
if (outputIndex >= parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
|
||||
outputIndex = 0;
|
||||
auto outDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIndex].desc);
|
||||
if (!outDesc)
|
||||
continue;
|
||||
if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
|
||||
formatFrequency[outDesc.getFormat()] += 1;
|
||||
else
|
||||
formatFrequency[outDesc.getFormat()] = 1;
|
||||
}
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
auto childEdge = getChildEdgeAt(i);
|
||||
auto child = childEdge->getChild();
|
||||
if (child->getSelectedPrimitiveDescriptor() == nullptr)
|
||||
continue;
|
||||
int inputIndex = childEdge->getOutputNum();
|
||||
if (inputIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot find index of output node";
|
||||
if (inputIndex >= child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size())
|
||||
inputIndex = 0;
|
||||
auto outDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIndex].desc);
|
||||
if (!outDesc)
|
||||
continue;
|
||||
if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
|
||||
formatFrequency[outDesc.getFormat()] += 1;
|
||||
else
|
||||
formatFrequency[outDesc.getFormat()] = 1;
|
||||
}
|
||||
|
||||
size_t maxCount = 0;
|
||||
mkldnn::memory::format convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
|
||||
for (auto &it : formatFrequency) {
|
||||
if (it.second > maxCount && !MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, it.first).blocksExtended()) {
|
||||
maxCount = it.second;
|
||||
convertTo = it.first;
|
||||
}
|
||||
}
|
||||
|
||||
// This logic is needed to cover cases when Split node cannot be optimized out for particular block size
|
||||
// In general it is significantly better to have additional reorders in graph than to use reference Split implementation
|
||||
if (convertTo == memory::nChw16c || convertTo == memory::nCdhw16c ||
|
||||
convertTo == memory::nChw8c || convertTo == memory::nCdhw8c) {
|
||||
int blockSize = convertTo == memory::nChw16c || convertTo == memory::nCdhw16c ? 16 : 8;
|
||||
bool shouldDecreaseBlockSize = false;
|
||||
for (auto& parentEdge : getParentEdges()) {
|
||||
if (parentEdge.lock()->getDims()[1] % blockSize != 0)
|
||||
shouldDecreaseBlockSize = true;
|
||||
}
|
||||
|
||||
for (auto& childEdge : getChildEdges()) {
|
||||
if (childEdge.lock()->getDims()[1] % blockSize != 0)
|
||||
shouldDecreaseBlockSize = true;
|
||||
}
|
||||
|
||||
if (shouldDecreaseBlockSize) {
|
||||
int decreasedBlockSize = 8;
|
||||
bool canDecreaseBlockSize = true;
|
||||
for (auto &parentEdge : getParentEdges()) {
|
||||
if (parentEdge.lock()->getDims()[1] % decreasedBlockSize != 0)
|
||||
canDecreaseBlockSize = false;
|
||||
}
|
||||
|
||||
for (auto &childEdge : getChildEdges()) {
|
||||
if (childEdge.lock()->getDims()[1] % decreasedBlockSize != 0)
|
||||
canDecreaseBlockSize = false;
|
||||
}
|
||||
|
||||
if (canDecreaseBlockSize)
|
||||
convertTo = getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::nCdhw8c : memory::nChw8c;
|
||||
else
|
||||
convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
|
||||
}
|
||||
}
|
||||
|
||||
if (canOptimize && MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, convertTo).blocksExtended())
|
||||
canOptimize = false;
|
||||
for (size_t i = 0; canOptimize && i < getChildEdges().size(); i++) {
|
||||
if (MKLDNNMemoryDesc(getChildEdgeAt(i)->getDims(), outputDataType, convertTo).blocksExtended())
|
||||
canOptimize = false;
|
||||
}
|
||||
|
||||
if (canOptimize) {
|
||||
for (auto supportedPdIndex : canSelectPrimitive) {
|
||||
if (MKLDNNMemoryDesc(supportedPrimitiveDescriptors[supportedPdIndex].getConfig().inConfs[0].desc).getFormat() == convertTo) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(supportedPdIndex));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
|
||||
auto &primDescInfo = supportedPrimitiveDescriptors[i];
|
||||
if (primDescInfo.getImplementationType() == impl_desc_type::unknown)
|
||||
continue;
|
||||
if (convertTo == MKLDNNMemoryDesc(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc).getFormat()) {
|
||||
size_t num = 0;
|
||||
for (num = 0; num < getParentEdges().size(); num++) {
|
||||
if (MKLDNNMemoryDesc(getParentEdgeAt(num)->getDims(), inputDataType, convertTo).blocksExtended())
|
||||
break;
|
||||
}
|
||||
if (num == getParentEdges().size()) {
|
||||
selectPrimitiveDescriptorByIndex(i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
selectPrimitiveDescriptorByIndex(0);
|
||||
}
|
||||
|
||||
bool MKLDNNSplitNode::isOptimized() {
|
||||
return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace >= 0;
|
||||
}
|
||||
@ -464,7 +276,7 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
|
||||
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
auto config = selected_pd->getConfig();
|
||||
if (isInitConfig(config))
|
||||
return;
|
||||
@ -497,12 +309,11 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
|
||||
}
|
||||
const auto& cnnLayer = getCnnLayer();
|
||||
if (!cnnLayer)
|
||||
THROW_IE_EXCEPTION << "Cannot create Split layer " << getName() << " without CNNLayer!";
|
||||
THROW_ERROR << "cannot be created without CNNLayer!";
|
||||
if (config.outConfs.size() != outDims.size())
|
||||
THROW_IE_EXCEPTION << "Invalid config for Split layer " << getName();
|
||||
THROW_ERROR << "has invalid config";
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
|
||||
size_t confNum = i;
|
||||
config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
|
||||
config.outConfs[i].desc.getDims(), {
|
||||
config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
|
||||
@ -512,21 +323,119 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
|
||||
config.inConfs[0].desc.getBlockingDesc().getStrides()
|
||||
});
|
||||
size_t axisSize = 1;
|
||||
for (size_t j = axis; j < config.outConfs[confNum].desc.getBlockingDesc().getBlockDims().size(); j++) {
|
||||
axisSize *= config.outConfs[confNum].desc.getBlockingDesc().getBlockDims()[j];
|
||||
for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) {
|
||||
axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j];
|
||||
}
|
||||
offset += axisSize;
|
||||
}
|
||||
initDescriptor(config);
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
|
||||
selectPrimitiveDescriptorByIndex(0);
|
||||
return;
|
||||
}
|
||||
|
||||
//check the descriptors and select the ones that have the same data format as the input
|
||||
|
||||
std::vector<size_t> canSelectPrimitive;
|
||||
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
|
||||
auto parentEdge = getParentEdgeAt(0);
|
||||
auto parentPtr = parentEdge->getParent();
|
||||
auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor();
|
||||
|
||||
if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) {
|
||||
int inNum = parentEdge->getInputNum();
|
||||
if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) {
|
||||
inNum = 0;
|
||||
}
|
||||
if (MKLDNNExtensionUtils::initTensorsAreEqual(
|
||||
getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[0].desc,
|
||||
parent_spd->getConfig().outConfs[inNum].desc)) {
|
||||
canSelectPrimitive.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (canSelectPrimitive.size() == 1) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
|
||||
return;
|
||||
}
|
||||
// if there are more then one PD with similar data layouts - select the optimized one
|
||||
for (auto indx : canSelectPrimitive) {
|
||||
if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(indx));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// if there are no matching data layouts, select first optimized implementation
|
||||
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
|
||||
if (supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(i));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
selectPrimitiveDescriptorByIndex(0);
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
|
||||
if (axis == 0)
|
||||
THROW_IE_EXCEPTION << "Dynamic batch is not supported by split layer with axis == 0 parameter";
|
||||
THROW_ERROR << "Dynamic batch is not supported by split layer with axis == 0 parameter";
|
||||
|
||||
dynBatchLim = lim;
|
||||
if (prim) {
|
||||
prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::prepareOptimizedParams() {
|
||||
const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
|
||||
|
||||
//find axis order position
|
||||
const auto& order = inpTensorDesc.getBlockingDesc().getOrder();
|
||||
unsigned axisOrderPos = UINT_MAX;
|
||||
for (size_t i = 0; i < order.size(); ++i) {
|
||||
if (order[i] == axis) {
|
||||
axisOrderPos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (UINT_MAX == axisOrderPos) {
|
||||
THROW_ERROR << "Can't find the axis in the input tensor order list";
|
||||
}
|
||||
|
||||
uint8_t srcDataSize = inpTensorDesc.getPrecision().size();
|
||||
const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims();
|
||||
int nDims = srcDims.size();
|
||||
|
||||
optimizedParams.countStrides = 1;
|
||||
for (int i = 0; i < axisOrderPos; i++)
|
||||
optimizedParams.countStrides *= srcDims[i];
|
||||
|
||||
optimizedParams.srcDataStride = 0;
|
||||
optimizedParams.dataSize.resize(this->getChildEdges().size());
|
||||
optimizedParams.dstMemPtrs.clear();
|
||||
for (int i = 0; i < this->getChildEdges().size(); i++) {
|
||||
if (uint8_t* dstData = getDataPtr(this->getChildEdgeAt(i)->getMemory())) {
|
||||
optimizedParams.dstMemPtrs.push_back(dstData);
|
||||
} else {
|
||||
THROW_ERROR << "can't get child edge indx " << i << "data.";
|
||||
}
|
||||
|
||||
optimizedParams.dataSize[i] = srcDataSize;
|
||||
|
||||
for (int j = axisOrderPos; j < nDims; j++)
|
||||
optimizedParams.dataSize[i] *= this->getChildEdgeAt(i)->getDesc().getBlockingDesc().getBlockDims()[j];
|
||||
|
||||
optimizedParams.srcDataStride += optimizedParams.dataSize[i];
|
||||
}
|
||||
|
||||
optimizedParams.srcDataOffsets.resize(this->getChildEdges().size());
|
||||
optimizedParams.srcDataOffsets[0] = 0;
|
||||
for (int i = 1; i < this->getChildEdges().size(); i++) {
|
||||
optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1];
|
||||
}
|
||||
}
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split);
|
||||
|
@ -28,10 +28,17 @@ public:
|
||||
void setDynamicBatchLim(int lim) override;
|
||||
|
||||
private:
|
||||
void optimizedImpl(size_t MB);
|
||||
void prepareOptimizedParams();
|
||||
|
||||
bool canUseOptimizedImpl = true;
|
||||
size_t axis = 1;
|
||||
|
||||
struct {
|
||||
std::vector<size_t> dataSize;
|
||||
std::vector<size_t> srcDataOffsets;
|
||||
std::vector<uint8_t *> dstMemPtrs;
|
||||
size_t srcDataStride;
|
||||
size_t countStrides;
|
||||
} optimizedParams;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <shared_test_classes/single_layer/region_yolo.hpp>
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
|
@ -0,0 +1,239 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
size_t, // Num splits
|
||||
int64_t, // Axis
|
||||
InferenceEngine::Precision, // Net precision
|
||||
std::vector<size_t>, // Input shapes
|
||||
std::vector<size_t>, // Used outputs indices
|
||||
std::string, // Target device name
|
||||
CPUSpecificParams
|
||||
> splitCPUTestParams;
|
||||
|
||||
class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<splitCPUTestParams> obj) {
|
||||
size_t numSplits;
|
||||
int64_t axis;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape, outIndices;
|
||||
std::string targetDevice;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||
result << "numSplits=" << numSplits << "_";
|
||||
result << "axis=" << axis << "_";
|
||||
if (!outIndices.empty()) {
|
||||
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
|
||||
}
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "trgDev=" << targetDevice;
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
return result.str();
|
||||
}
|
||||
protected:
|
||||
void SetUp() override {
|
||||
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
|
||||
size_t axis, numSplits;
|
||||
std::vector<size_t> inputShape, outIndices;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = this->GetParam();
|
||||
inPrc = outPrc = netPrecision;
|
||||
if (outIndices.empty()) {
|
||||
for (int i = 0; i < numSplits; ++i) {
|
||||
outIndices.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
selectedType += std::string("_") + inPrc.name();
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto split = std::dynamic_pointer_cast<ngraph::opset5::Split>(ngraph::builder::makeSplit(paramOuts[0],
|
||||
ngPrc, numSplits, axis));
|
||||
ngraph::ResultVector results;
|
||||
for (int i = 0; i < outIndices.size(); i++) {
|
||||
results.push_back(std::make_shared<ngraph::opset5::Result>(split->output(outIndices[i])));
|
||||
}
|
||||
split->get_rt_info() = getCPUInfo();
|
||||
function = std::make_shared<ngraph::Function>(results, params, "split");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(SplitLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
CheckCPUImpl(executableNetwork, "Split");
|
||||
}
|
||||
|
||||
namespace {
|
||||
const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {"ref"}, "ref"};
|
||||
const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"};
|
||||
|
||||
const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"};
|
||||
const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"};
|
||||
|
||||
const auto planarChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
|
||||
const auto planarChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
|
||||
|
||||
const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"};
|
||||
const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"};
|
||||
|
||||
const auto blocked8_4D_ref = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "ref"};
|
||||
const auto blocked8_5D_ref = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "ref"};
|
||||
|
||||
const auto blocked16_4D = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "unknown"};
|
||||
const auto blocked16_5D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "unknown"};
|
||||
|
||||
const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"};
|
||||
const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
|
||||
|
||||
// List of precisions natively supported by mkldnn.
|
||||
const std::vector<Precision> netPrecisions = {
|
||||
Precision::I8,
|
||||
Precision::I16,
|
||||
Precision::I32,
|
||||
Precision::FP32,
|
||||
Precision::BF16
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(2, 3),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(2, 3),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_4D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_5D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split3D, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(7),
|
||||
::testing::Values(0, 1, 2),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({14, 42, 21})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split2D, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(2),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({6, 12})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split1D, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(5),
|
||||
::testing::Values(0),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({10})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -230,171 +230,75 @@ INSTANTIATE_TEST_CASE_P(
|
||||
split_test_params {
|
||||
{1, 24, 2, 5},
|
||||
{{1, 16, 2, 5}, {1, 8, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::unknown, {}, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
}
|
||||
}
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 20, 2, 5},
|
||||
{{1, 13, 2, 5}, {1, 7, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
}
|
||||
}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 20, 2, 5},
|
||||
{{1, 10, 2, 5}, {1, 10, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
}
|
||||
}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 10, 2, 5}, {2, 10, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
}
|
||||
}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 24, 2, 5},
|
||||
{{1, 16, 2, 5}, {1, 8, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 20, 2, 5},
|
||||
{{1, 13, 2, 5}, {1, 7, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 20, 2, 5},
|
||||
{{1, 10, 2, 5}, {1, 10, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 10, 2, 5}, {2, 10, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 15, 2, 5}, {2, 5, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{9, 11, 7, 5},
|
||||
{{3, 11, 7, 5}, {6, 11, 7, 5}},
|
||||
0, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
0, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{3, 11, 7, 5},
|
||||
{{3, 11, 4, 5}, {3, 11, 3, 5}},
|
||||
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{3, 11, 7, 5},
|
||||
{{3, 11, 7, 1}, {3, 11, 7, 4}},
|
||||
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{1, 6, 7, 15}, {2, 6, 7, 15}, {1, 6, 7, 15}, {1, 6, 7, 15}},
|
||||
0, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
0, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 6, 3, 15}, {5, 6, 1, 15}, {5, 6, 2, 15}, {5, 6, 1, 15}},
|
||||
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 6, 7, 5}, {5, 6, 7, 3}, {5, 6, 7, 4}, {5, 6, 7, 3}},
|
||||
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 6, 7, 15}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
split_test_params {
|
||||
{1, 32, 16, 16, 16},
|
||||
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
split_test_params {
|
||||
{1, 32, 16, 16, 16},
|
||||
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::unknown, {}}));
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
split_test_params {
|
||||
{1, 32, 16, 16, 16},
|
||||
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::unknown, {}}));
|
||||
|
||||
class MKLDNNGraphDynBatchSplitTests: public MKLDNNGraphSplitTests {
|
||||
protected:
|
||||
@ -544,32 +448,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
// }
|
||||
// }
|
||||
// },
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 10, 2, 5}, {2, 10, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(2, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
|
||||
}
|
||||
}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 24, 2, 5},
|
||||
{{2, 16, 2, 5}, {2, 8, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
// TODO: rewrite to ngraph to have reshape functionality
|
||||
// split_test_params {
|
||||
@ -586,34 +468,34 @@ INSTANTIATE_TEST_CASE_P(
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 10, 2, 5}, {2, 10, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 15, 2, 5}, {2, 5, 2, 5}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{3, 11, 7, 5},
|
||||
{{3, 11, 4, 5}, {3, 11, 3, 5}},
|
||||
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{3, 11, 7, 5},
|
||||
{{3, 11, 7, 1}, {3, 11, 7, 4}},
|
||||
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
|
||||
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 6, 3, 15}, {5, 6, 1, 15}, {5, 6, 2, 15}, {5, 6, 1, 15}},
|
||||
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 6, 7, 5}, {5, 6, 7, 3}, {5, 6, 7, 4}, {5, 6, 7, 3}},
|
||||
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}));
|
||||
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}));
|
||||
|
Loading…
Reference in New Issue
Block a user