[CPU] Improved Split layer (#3449)

* [CPU] Added more optimal Split implementation
This commit is contained in:
Maksim Kutakov 2020-12-16 16:51:01 +03:00 committed by GitHub
parent 9509244729
commit 95f531e9e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 551 additions and 508 deletions

View File

@ -210,7 +210,7 @@ void BF16Transformer::optimizeToFloat(InferenceEngine::CNNNetwork &network) {
}
bool marked = tryToMarkFP32(inputTo.second->outData[o], immutable);
if (marked) {
toAnalyzeTensors.insert(layer->outData[o]);
toAnalyzeTensors.insert(inputTo.second->outData[o]);
}
}
}

View File

@ -28,7 +28,7 @@ class BF16Transformer {
{ "concat", "eltwise" };
// prevent fallback to fp32 without considering both input and output nodes
const InferenceEngine::details::caseless_set<std::string> _skipmarking =
{ "memory" };
{ "memory", "Split" };
/**
* Tries to mark tensor as FP32 by analyzing of local consumers of the tensor. Do not mark if

View File

@ -93,7 +93,7 @@ bool MKLDNNEdge::needReorder() {
};
const auto portChildEdges = getParent()->getChildEdgesAtPort(inNumber);
if (in_place && detectInPlaceChildsNum(portChildEdges) > 1 && childCanChangeMem)
if (in_place && childCanChangeMem && portChildEdges.size() > 1 && detectInPlaceChildsNum(portChildEdges) > 1)
canBeInPlaceConflicts = true;
if (!canBeInPlaceConflicts && in_place && !getParent()->getChildEdges().empty()) {
for (auto &p_edge_peer : portChildEdges) {

View File

@ -288,7 +288,6 @@ bool MKLDNNMemory::IsGroupedFormat(memory::format format) {
memory::format MKLDNNMemory::GetPlainFormat(memory::dims dims) {
switch (dims.size()) {
case 0:
return memory::x;
case 1:
return memory::x;
case 2:
@ -576,6 +575,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
blkDims = dims;
break;
case memory::tnc:
case memory::ncw:
layout = Layout::CHW;
order = {0, 1, 2};
blkDims = dims;
@ -587,6 +587,13 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
static_cast<size_t>(dims[0]),
static_cast<size_t>(dims[2])};
break;
case memory::nwc:
layout = Layout::CHW;
order = {0, 2, 1};
blkDims = {static_cast<size_t>(dims[0]),
static_cast<size_t>(dims[2]),
static_cast<size_t>(dims[1])};
break;
case memory::oihw:
case memory::nchw:
layout = Layout::NCHW;

View File

@ -3,136 +3,206 @@
//
#include "mkldnn_split_node.h"
#include "common/cpu_memcpy.h"
#include <legacy/ie_layers.h>
#include <string>
#include <vector>
#include <map>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include <limits>
#include <ie_parallel.hpp>
#define THROW_ERROR THROW_IE_EXCEPTION << "Split layer with name '" << getName() <<"' "
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
static TensorDesc makePlainTensorDesc(const Precision& precision, const SizeVector& srcDims) {
SizeVector order(srcDims.size());
std::iota(order.begin(), order.end(), 0);
return TensorDesc(precision, srcDims, {srcDims, order});
}
static TensorDesc makePerChannelTensorDesc(const Precision& precision, const SizeVector& srcDims) {
constexpr size_t channelsPos = 1lu;
SizeVector order(srcDims.size());
std::iota(order.begin(), order.end(), 0);
SizeVector blkDims = srcDims;
if (srcDims.size() > 2) {
auto moveElementBack = [](SizeVector& vector, size_t indx) {
auto itr = vector.begin() + indx;
std::rotate(itr, itr + 1, vector.end());
};
moveElementBack(order, channelsPos);
moveElementBack(blkDims, channelsPos);
}
return TensorDesc(precision, srcDims, {blkDims, order});
}
static TensorDesc makeChannelBlockedTensorDesc(const Precision& precision, const SizeVector& srcDims, size_t blockSize) {
if (srcDims.size() < 2) {
THROW_IE_EXCEPTION << "Can't create blocked tensor descriptor!";
}
constexpr size_t channelsPos = 1lu;
SizeVector order(srcDims.size());
std::iota(order.begin(), order.end(), 0);
order.push_back(channelsPos);
SizeVector blkDims = srcDims;
blkDims[1] = blkDims[1] / blockSize + (blkDims[1] % blockSize ? 1 : 0);
blkDims.push_back(blockSize);
return TensorDesc(precision, srcDims, {blkDims, order});
}
static inline uint8_t* getDataPtr(const MKLDNNMemory& memoryPtr) {
return reinterpret_cast<uint8_t*>(memoryPtr.GetData()) + memoryPtr.GetDescriptor().data.layout_desc.blocking.offset_padding *
MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(memoryPtr.GetDescriptor().data.data_type));
}
MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(layer, eng, cache) {}
void MKLDNNSplitNode::getSupportedDescriptors() {
auto * splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
auto splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
if (splitLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot convert split layer.";
THROW_ERROR << "can not convert from CNN layer.";
if (getParentEdges().size() != 1)
THROW_IE_EXCEPTION << "Incorrect number of input nodes.";
THROW_ERROR << "has incorrect number of input nodes.";
if (getChildEdges().empty())
THROW_IE_EXCEPTION << "Incorrect number of output nodes.";
THROW_ERROR << "has incorrect number of output nodes.";
axis = splitLayer->_axis;
if (axis >= getParentEdgeAt(0)->getDims().ndims())
THROW_IE_EXCEPTION << "Invalid value of axis parameter in split layer";
THROW_ERROR << "has invalid value of axis parameter.";
}
void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
using TensorDescFactory = std::function<TensorDesc(const Precision&, const SizeVector&)>;
constexpr size_t channelsPos = 1lu;
// perform guard checks
if (!supportedPrimitiveDescriptors.empty())
return;
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
precision = getCnnLayer()->outData[0]->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
if (getCnnLayer()->insData.empty()) {
THROW_ERROR << "has an empty input in the CNN layer";
}
auto inpData = getCnnLayer()->insData[0].lock();
if (!inpData) {
THROW_ERROR << "input data is empty";
}
auto srcDims = getParentEdgeAt(0)->getDims();
InferenceEngine::LayerConfig config;
config.dynBatchSupport = true;
config.inConfs.resize(1);
config.inConfs[0].inPlace = -1;
config.inConfs[0].constant = false;
config.inConfs[0].desc = MKLDNNMemoryDesc(srcDims, inputDataType, memory::format::any);
config.outConfs.resize(outDims.size());
std::vector<memory::format> outFormats;
auto axis_size = 0;
auto dstFirstDims = getChildEdgeAt(0)->getDims();
for (size_t i = 0; i < outDims.size(); i++) {
auto o_Dims = outDims[i];
if (dstFirstDims.ndims() != o_Dims.ndims()) {
THROW_IE_EXCEPTION << "Split " << getName() << " supports only output blob with equal number of dimensions";
THROW_ERROR << "only supports output blobs with equal number of dimensions";
}
config.outConfs[i].inPlace = -1;
config.outConfs[i].constant = false;
config.outConfs[i].desc = MKLDNNMemoryDesc(o_Dims, outputDataType, memory::format::any);
outFormats.push_back(memory::format::any);
axis_size += o_Dims[axis];
for (size_t j = 0; j < dstFirstDims.ndims(); j++) {
if (j == axis)
continue;
if (o_Dims[j] != dstFirstDims[j])
THROW_IE_EXCEPTION << "Split " << getName() << " has incorrect output dimensions";
THROW_ERROR << "has incorrect output dimensions";
}
}
dstFirstDims[axis] = axis_size;
if (dstFirstDims.size() != srcDims.size())
THROW_IE_EXCEPTION << "The sizes of input blob and sum of output blobs are not equal.";
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats);
THROW_ERROR << "sizes of input blob and sum of output blobs are not equal.";
auto numOfDim = static_cast<size_t>(srcDims.ndims());
SizeVector order;
SizeVector offsets(numOfDim, 0lu);
size_t offset = (std::numeric_limits<size_t>::max)();
for (size_t i = 0; i < numOfDim; i++) {
order.push_back(i);
InferenceEngine::Precision inpPrecision = inpData->getPrecision();
auto outPrecision = inpPrecision; // the split layer doesn't convert precisions
// make primitive descriptor factory function for different configurations
bool dynBatchSupport = true;
if (axis < 1) {
dynBatchSupport = false;
}
auto makePdInfo = [dynBatchSupport](TensorDescFactory getTensorDesc, const Precision& precision, const MKLDNNDims& srcDims,
const std::vector<MKLDNNDims>& outDims, impl_desc_type type) -> PrimitiveDescInfo {
InferenceEngine::LayerConfig config;
SizeVector strides(numOfDim);
strides[numOfDim - 1] = 1;
for (size_t i = 2; i <= numOfDim; i++) {
if (numOfDim - i < axis) {
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
} else {
strides[numOfDim - i] = strides[numOfDim - i + 1] * srcDims[numOfDim - i + 1];
config.dynBatchSupport = dynBatchSupport;
config.inConfs.resize(1);
config.inConfs[0].inPlace = -1;
config.inConfs[0].constant = false;
config.inConfs[0].desc = getTensorDesc(precision, srcDims.ToSizeVector());
config.outConfs.resize(outDims.size());
std::vector<memory::format> outFormats;
for (size_t i = 0; i < outDims.size(); i++) {
auto o_Dims = outDims[i];
config.outConfs[i].inPlace = -1;
config.outConfs[i].constant = false;
config.outConfs[i].desc = getTensorDesc(precision, o_Dims.ToSizeVector());
outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
}
return {config, type, outFormats};
};
//Set plain format
supportedPrimitiveDescriptors.push_back(makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref));
//Set per channel format.
supportedPrimitiveDescriptors.push_back(makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref));
//Support channel blocked format
std::vector<size_t> blockedPdIndexes;
if (srcDims.ndims() > channelsPos) {
for (size_t sizeS : {8lu, 16lu}) {
SizeVector blkDims = srcDims.ToSizeVector();
if (blkDims[channelsPos] % sizeS)
continue;
bool blocked = true;
for (size_t i = 0; i < outDims.size(); i++) {
if (outDims[i].ToSizeVector()[channelsPos] % sizeS) {
blocked = false;
break;
}
}
if (blocked) {
using std::placeholders::_1;
using std::placeholders::_2;
supportedPrimitiveDescriptors.push_back(makePdInfo(std::bind(&makeChannelBlockedTensorDesc, _1, _2, sizeS),
inpPrecision, srcDims, outDims, impl_desc_type::ref));
blockedPdIndexes.push_back(supportedPrimitiveDescriptors.size() - 1);
}
}
}
config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {srcDims.ToSizeVector(), order, offset, offsets, strides});
outFormats.clear();
for (size_t i = 0; i < outDims.size(); i++) {
auto dims = outDims[i].ToSizeVector();
config.outConfs[i].inPlace = 0;
config.outConfs[i].desc = TensorDesc(Precision::FP32, dims,
{dims, order, offset, offsets, strides});
outFormats.push_back(MKLDNNMemory::Convert(config.outConfs[i].desc.getLayout()));
// Optimized inplace case
std::vector<size_t> pdIndexesToReuse(1, 0); // at least the first plain layout can be optimized inplace.
if (axis < 2) {
pdIndexesToReuse.insert(pdIndexesToReuse.end(), blockedPdIndexes.begin(), blockedPdIndexes.end());
}
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
if ((numOfDim != 4 && numOfDim != 5) || axis != 1)
return;
for (auto refPdIndex : pdIndexesToReuse) {
const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
auto config = refConfig;
order.push_back(1);
numOfDim = order.size();
offsets = SizeVector(numOfDim, 0lu);
const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder();
const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims();
auto numOfDim = blkDims.size();
// nChw8c and nChw16c
for (size_t sizeS : {8lu, 16lu}) {
SizeVector blkDims = srcDims.ToSizeVector();
if (blkDims[1] % sizeS)
continue;
blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
blkDims.push_back(sizeS);
std::vector<memory::format> outFormats;
SizeVector offsets(numOfDim, 0lu);
SizeVector strides(numOfDim);
strides.back() = 1lu;
size_t offset = (std::numeric_limits<size_t>::max)();
strides.resize(numOfDim);
strides[numOfDim - 1] = 1lu;
for (size_t i = 2; i <= numOfDim; i++) {
if (numOfDim - i < axis) {
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
@ -140,318 +210,60 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
}
}
config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
outFormats.clear();
bool canInplace = true;
config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
for (size_t i = 0; i < outDims.size(); i++) {
auto dims = outDims[i].ToSizeVector();
blkDims = dims;
const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims();
const auto& dims = refConfig.outConfs[i].desc.getDims();
if (blkDims[1] % sizeS) {
canInplace = false;
break;
}
blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
blkDims.push_back(sizeS);
config.outConfs[i].desc = TensorDesc(Precision::FP32, dims, {blkDims, order, offset, offsets, strides});
outFormats.emplace_back(MKLDNNMemory::Convert(config.outConfs[i].desc.getLayout()));
config.outConfs[i].inPlace = 0;
config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides});
outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
}
if (canInplace)
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
}
}
void MKLDNNSplitNode::createPrimitive() {
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
THROW_ERROR << "Input memory has not been allocated.";
for (size_t i = 0; i < getChildEdges().size(); i++) {
if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
THROW_ERROR << "Destination memory has not been allocated.";
}
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
THROW_ERROR << "Preferable primitive descriptor is not set.";
canUseOptimizedImpl = true;
if (axis != 1)
canUseOptimizedImpl = false;
if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC &&
getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC)
canUseOptimizedImpl = false;
for (size_t i = 0; i < getChildEdges().size(); i++) {
if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW &&
getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW)
canUseOptimizedImpl = false;
}
}
void MKLDNNSplitNode::optimizedImpl(size_t MB) {
const int ndims = getParentEdgeAt(0)->getDims().ndims();
const size_t IC = getParentEdgeAt(0)->getDims()[1];
const size_t D = ndims == 5 ? getParentEdgeAt(0)->getDims()[ndims - 3] : 1;
const size_t H = getParentEdgeAt(0)->getDims()[ndims - 2];
const size_t W = getParentEdgeAt(0)->getDims()[ndims - 1];
auto srcBlob = getParentEdgeAt(0)->getBlob();
const auto *srcData = srcBlob->cbuffer().as<const float *>();
for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
auto dstBlob = getChildEdgeAt(i)->getBlob();
auto *dstData = dstBlob->buffer().as<float *>();
const size_t OC = getChildEdgeAt(i)->getDims()[1];
size_t innerSize = 1;
for (size_t j = axis; j < dstBlob->getTensorDesc().getDims().size(); j++) {
innerSize *= dstBlob->getTensorDesc().getDims()[j];
}
auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx);
parallel_for4d(MB, D, H, W, [&](size_t b, size_t d, size_t h, size_t w) {
for (size_t c = 0; c < OC; c++) {
size_t srcOff = b*D*H*W*IC + d*H*W*IC + h*W*IC + w*IC + c;
size_t dstOff = b*OC*D*H*W + c*D*H*W + d*H*W + h*W + w;
dstData[dstOff] = srcPtr[srcOff];
}
});
sIdx += innerSize;
}
if (!isOptimized())
prepareOptimizedParams();
}
void MKLDNNSplitNode::execute(mkldnn::stream strm) {
if (isOptimized())
return;
// FIXME: add more optimal implementation
MKLDNNDims par_dims = getParentEdgeAt(0)->getDims();
int MB = batchToProcess();
auto srcBlob = getParentEdgeAt(0)->getBlob();
const auto *srcData = srcBlob->cbuffer().as<const float *>();
uint8_t* srcData = getDataPtr(this->getParentEdgeAt(0)->getMemory());
size_t batch = this->getParentEdgeAt(0)->getDims()[0];
size_t outerSize = 1;
for (int i = 0; i < axis; i++) {
if (i == 0)
outerSize *= MB;
else
outerSize *= srcBlob->getTensorDesc().getDims()[i];
}
if (batch != MB)
optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;
if (canUseOptimizedImpl) {
optimizedImpl(MB);
return;
}
parallel_for2d(this->getChildEdges().size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
uint8_t* dstData = optimizedParams.dstMemPtrs[i];
size_t srcSize = getParentEdgeAt(0)->getMemory().GetSize();
size_t src_batch_off = srcBlob->getTensorDesc().offset(srcBlob->size() / outerSize)
- srcBlob->getTensorDesc().offset(0);
for (size_t i = 0, sIdx = 0; i < getChildEdges().size(); i++) {
auto dstBlob = getChildEdgeAt(i)->getBlob();
auto *dstData = dstBlob->buffer().as<float *>();
size_t innerSize = 1;
for (size_t j = axis; j < dstBlob->getTensorDesc().getDims().size(); j++) {
innerSize *= dstBlob->getTensorDesc().getDims()[j];
}
size_t dst_batch_off = dstBlob->getTensorDesc().offset(innerSize) - dstBlob->getTensorDesc().offset(0);
for (size_t dIdx = 0; dIdx < innerSize; dIdx++, sIdx++) {
for (unsigned b = 0; b < outerSize; b++) {
if (sIdx + b*src_batch_off >= srcSize)
THROW_IE_EXCEPTION << "Incorrect configuration of split layer " << getName() << "!";
dstData[b * dst_batch_off + dstBlob->getTensorDesc().offset(dIdx)] =
srcData[b * src_batch_off + srcBlob->getTensorDesc().offset(sIdx)];
}
}
}
cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]],
&srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride],
optimizedParams.dataSize[i]);
});
}
bool MKLDNNSplitNode::created() const {
return getType() == Split;
}
void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
selectPrimitiveDescriptorByIndex(0);
return;
}
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
precision = getCnnLayer()->outData[0]->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
bool hasUnknown = false;
std::vector<size_t> canSelectPrimitive;
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
bool hasAny = true;
auto &primDescInfo = supportedPrimitiveDescriptors[i];
if (primDescInfo.getImplementationType() != impl_desc_type::unknown ||
primDescInfo.getConfig().outConfs[0].inPlace < 0)
continue;
hasUnknown = true;
for (auto iInfo : primDescInfo.getConfig().inConfs) {
if (iInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
hasAny = false;
break;
}
}
if (hasAny) {
for (auto oInfo : primDescInfo.getConfig().outConfs) {
if (oInfo.desc.getLayout() != InferenceEngine::Layout::ANY) {
hasAny = false;
break;
}
}
}
if (!hasAny) {
canSelectPrimitive.push_back(i);
}
}
bool canOptimize = false;
if (hasUnknown) {
canOptimize = true;
if (canSelectPrimitive.size() == 1) {
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
return;
}
}
std::map<mkldnn::memory::format, size_t> formatFrequency;
for (size_t i = 0; i < getParentEdges().size(); i++) {
auto parentEdge = getParentEdgeAt(i);
auto parent = parentEdge->getParent();
if (parent->getSelectedPrimitiveDescriptor() == nullptr)
continue;
int outputIndex = parentEdge->getOutputNum();
if (outputIndex < 0)
THROW_IE_EXCEPTION << "Cannot find index of output node";
if (outputIndex >= parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
outputIndex = 0;
auto outDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIndex].desc);
if (!outDesc)
continue;
if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
formatFrequency[outDesc.getFormat()] += 1;
else
formatFrequency[outDesc.getFormat()] = 1;
}
for (size_t i = 0; i < getChildEdges().size(); i++) {
auto childEdge = getChildEdgeAt(i);
auto child = childEdge->getChild();
if (child->getSelectedPrimitiveDescriptor() == nullptr)
continue;
int inputIndex = childEdge->getOutputNum();
if (inputIndex < 0)
THROW_IE_EXCEPTION << "Cannot find index of output node";
if (inputIndex >= child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size())
inputIndex = 0;
auto outDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIndex].desc);
if (!outDesc)
continue;
if (formatFrequency.find(outDesc.getFormat()) != formatFrequency.end())
formatFrequency[outDesc.getFormat()] += 1;
else
formatFrequency[outDesc.getFormat()] = 1;
}
size_t maxCount = 0;
mkldnn::memory::format convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
for (auto &it : formatFrequency) {
if (it.second > maxCount && !MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, it.first).blocksExtended()) {
maxCount = it.second;
convertTo = it.first;
}
}
// This logic is needed to cover cases when Split node cannot be optimized out for particular block size
// In general it is significantly better to have additional reorders in graph than to use reference Split implementation
if (convertTo == memory::nChw16c || convertTo == memory::nCdhw16c ||
convertTo == memory::nChw8c || convertTo == memory::nCdhw8c) {
int blockSize = convertTo == memory::nChw16c || convertTo == memory::nCdhw16c ? 16 : 8;
bool shouldDecreaseBlockSize = false;
for (auto& parentEdge : getParentEdges()) {
if (parentEdge.lock()->getDims()[1] % blockSize != 0)
shouldDecreaseBlockSize = true;
}
for (auto& childEdge : getChildEdges()) {
if (childEdge.lock()->getDims()[1] % blockSize != 0)
shouldDecreaseBlockSize = true;
}
if (shouldDecreaseBlockSize) {
int decreasedBlockSize = 8;
bool canDecreaseBlockSize = true;
for (auto &parentEdge : getParentEdges()) {
if (parentEdge.lock()->getDims()[1] % decreasedBlockSize != 0)
canDecreaseBlockSize = false;
}
for (auto &childEdge : getChildEdges()) {
if (childEdge.lock()->getDims()[1] % decreasedBlockSize != 0)
canDecreaseBlockSize = false;
}
if (canDecreaseBlockSize)
convertTo = getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::nCdhw8c : memory::nChw8c;
else
convertTo = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
}
}
if (canOptimize && MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, convertTo).blocksExtended())
canOptimize = false;
for (size_t i = 0; canOptimize && i < getChildEdges().size(); i++) {
if (MKLDNNMemoryDesc(getChildEdgeAt(i)->getDims(), outputDataType, convertTo).blocksExtended())
canOptimize = false;
}
if (canOptimize) {
for (auto supportedPdIndex : canSelectPrimitive) {
if (MKLDNNMemoryDesc(supportedPrimitiveDescriptors[supportedPdIndex].getConfig().inConfs[0].desc).getFormat() == convertTo) {
selectPrimitiveDescriptorByIndex(static_cast<int>(supportedPdIndex));
return;
}
}
}
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
auto &primDescInfo = supportedPrimitiveDescriptors[i];
if (primDescInfo.getImplementationType() == impl_desc_type::unknown)
continue;
if (convertTo == MKLDNNMemoryDesc(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc).getFormat()) {
size_t num = 0;
for (num = 0; num < getParentEdges().size(); num++) {
if (MKLDNNMemoryDesc(getParentEdgeAt(num)->getDims(), inputDataType, convertTo).blocksExtended())
break;
}
if (num == getParentEdges().size()) {
selectPrimitiveDescriptorByIndex(i);
return;
}
}
}
selectPrimitiveDescriptorByIndex(0);
}
bool MKLDNNSplitNode::isOptimized() {
return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace >= 0;
}
@ -464,7 +276,7 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
THROW_ERROR << "Preferable primitive descriptor is not set.";
auto config = selected_pd->getConfig();
if (isInitConfig(config))
return;
@ -497,12 +309,11 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
}
const auto& cnnLayer = getCnnLayer();
if (!cnnLayer)
THROW_IE_EXCEPTION << "Cannot create Split layer " << getName() << " without CNNLayer!";
THROW_ERROR << "cannot be created without CNNLayer!";
if (config.outConfs.size() != outDims.size())
THROW_IE_EXCEPTION << "Invalid config for Split layer " << getName();
THROW_ERROR << "has invalid config";
size_t offset = 0;
for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
size_t confNum = i;
config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
config.outConfs[i].desc.getDims(), {
config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
@ -512,21 +323,119 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
config.inConfs[0].desc.getBlockingDesc().getStrides()
});
size_t axisSize = 1;
for (size_t j = axis; j < config.outConfs[confNum].desc.getBlockingDesc().getBlockDims().size(); j++) {
axisSize *= config.outConfs[confNum].desc.getBlockingDesc().getBlockDims()[j];
for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) {
axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j];
}
offset += axisSize;
}
initDescriptor(config);
}
void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
if (implPriorities.size() > 0 && implPriorities[0] == impl_desc_type::ref) {
selectPrimitiveDescriptorByIndex(0);
return;
}
//check the descriptors and select the ones that have the same data format as the input
std::vector<size_t> canSelectPrimitive;
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
auto parentEdge = getParentEdgeAt(0);
auto parentPtr = parentEdge->getParent();
auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor();
if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) {
int inNum = parentEdge->getInputNum();
if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) {
inNum = 0;
}
if (MKLDNNExtensionUtils::initTensorsAreEqual(
getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[0].desc,
parent_spd->getConfig().outConfs[inNum].desc)) {
canSelectPrimitive.push_back(i);
}
}
}
if (canSelectPrimitive.size() == 1) {
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
return;
}
// if there are more then one PD with similar data layouts - select the optimized one
for (auto indx : canSelectPrimitive) {
if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) {
selectPrimitiveDescriptorByIndex(static_cast<int>(indx));
return;
}
}
// if there are no matching data layouts, select first optimized implementation
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
if (supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown) {
selectPrimitiveDescriptorByIndex(static_cast<int>(i));
return;
}
}
selectPrimitiveDescriptorByIndex(0);
}
void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
if (axis == 0)
THROW_IE_EXCEPTION << "Dynamic batch is not supported by split layer with axis == 0 parameter";
THROW_ERROR << "Dynamic batch is not supported by split layer with axis == 0 parameter";
dynBatchLim = lim;
if (prim) {
prim.setBatchLimit(batchToProcess(), getParentEdges().size(), getChildEdges().size());
}
}
void MKLDNNSplitNode::prepareOptimizedParams() {
const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
//find axis order position
const auto& order = inpTensorDesc.getBlockingDesc().getOrder();
unsigned axisOrderPos = UINT_MAX;
for (size_t i = 0; i < order.size(); ++i) {
if (order[i] == axis) {
axisOrderPos = i;
break;
}
}
if (UINT_MAX == axisOrderPos) {
THROW_ERROR << "Can't find the axis in the input tensor order list";
}
uint8_t srcDataSize = inpTensorDesc.getPrecision().size();
const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims();
int nDims = srcDims.size();
optimizedParams.countStrides = 1;
for (int i = 0; i < axisOrderPos; i++)
optimizedParams.countStrides *= srcDims[i];
optimizedParams.srcDataStride = 0;
optimizedParams.dataSize.resize(this->getChildEdges().size());
optimizedParams.dstMemPtrs.clear();
for (int i = 0; i < this->getChildEdges().size(); i++) {
if (uint8_t* dstData = getDataPtr(this->getChildEdgeAt(i)->getMemory())) {
optimizedParams.dstMemPtrs.push_back(dstData);
} else {
THROW_ERROR << "can't get child edge indx " << i << "data.";
}
optimizedParams.dataSize[i] = srcDataSize;
for (int j = axisOrderPos; j < nDims; j++)
optimizedParams.dataSize[i] *= this->getChildEdgeAt(i)->getDesc().getBlockingDesc().getBlockDims()[j];
optimizedParams.srcDataStride += optimizedParams.dataSize[i];
}
optimizedParams.srcDataOffsets.resize(this->getChildEdges().size());
optimizedParams.srcDataOffsets[0] = 0;
for (int i = 1; i < this->getChildEdges().size(); i++) {
optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1];
}
}
REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split);

View File

@ -28,10 +28,17 @@ public:
void setDynamicBatchLim(int lim) override;
private:
void optimizedImpl(size_t MB);
void prepareOptimizedParams();
bool canUseOptimizedImpl = true;
size_t axis = 1;
struct {
std::vector<size_t> dataSize;
std::vector<size_t> srcDataOffsets;
std::vector<uint8_t *> dstMemPtrs;
size_t srcDataStride;
size_t countStrides;
} optimizedParams;
};
} // namespace MKLDNNPlugin

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <shared_test_classes/single_layer/region_yolo.hpp>
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"

View File

@ -0,0 +1,239 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
size_t, // Num splits
int64_t, // Axis
InferenceEngine::Precision, // Net precision
std::vector<size_t>, // Input shapes
std::vector<size_t>, // Used outputs indices
std::string, // Target device name
CPUSpecificParams
> splitCPUTestParams;
class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<splitCPUTestParams> obj) {
size_t numSplits;
int64_t axis;
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape, outIndices;
std::string targetDevice;
CPUSpecificParams cpuParams;
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "numSplits=" << numSplits << "_";
result << "axis=" << axis << "_";
if (!outIndices.empty()) {
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
}
result << "netPRC=" << netPrecision.name() << "_";
result << "trgDev=" << targetDevice;
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
size_t axis, numSplits;
std::vector<size_t> inputShape, outIndices;
InferenceEngine::Precision netPrecision;
CPUSpecificParams cpuParams;
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = this->GetParam();
inPrc = outPrc = netPrecision;
if (outIndices.empty()) {
for (int i = 0; i < numSplits; ++i) {
outIndices.push_back(i);
}
}
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType += std::string("_") + inPrc.name();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto split = std::dynamic_pointer_cast<ngraph::opset5::Split>(ngraph::builder::makeSplit(paramOuts[0],
ngPrc, numSplits, axis));
ngraph::ResultVector results;
for (int i = 0; i < outIndices.size(); i++) {
results.push_back(std::make_shared<ngraph::opset5::Result>(split->output(outIndices[i])));
}
split->get_rt_info() = getCPUInfo();
function = std::make_shared<ngraph::Function>(results, params, "split");
}
};
TEST_P(SplitLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckCPUImpl(executableNetwork, "Split");
}
namespace {
const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {"ref"}, "ref"};
const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"};
const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"};
const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"};
const auto planarChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
const auto planarChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"};
const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"};
const auto blocked8_4D_ref = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "ref"};
const auto blocked8_5D_ref = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "ref"};
const auto blocked16_4D = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "unknown"};
const auto blocked16_5D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "unknown"};
const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"};
const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
// List of precisions natively supported by mkldnn.
const std::vector<Precision> netPrecisions = {
Precision::I8,
Precision::I16,
Precision::I32,
Precision::FP32,
Precision::BF16
};
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(2, 3),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_4D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(2, 3),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_4D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(2, 3, 4),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_5D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(2, 3, 4),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_5D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split3D, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(7),
::testing::Values(0, 1, 2),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({14, 42, 21})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split2D, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(2),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({6, 12})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Split1D, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(5),
::testing::Values(0),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({10})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
SplitLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -230,171 +230,75 @@ INSTANTIATE_TEST_CASE_P(
split_test_params {
{1, 24, 2, 5},
{{1, 16, 2, 5}, {1, 8, 2, 5}},
1, 3, MKLDNNPlugin::impl_desc_type::unknown, {}, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(1).desc.getLayout());
}
}
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{1, 20, 2, 5},
{{1, 13, 2, 5}, {1, 7, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
}
}
},
split_test_params {
{1, 20, 2, 5},
{{1, 10, 2, 5}, {1, 10, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
}
}
},
split_test_params {
{2, 20, 2, 5},
{{2, 10, 2, 5}, {2, 10, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
}
}
},
split_test_params {
{1, 24, 2, 5},
{{1, 16, 2, 5}, {1, 8, 2, 5}},
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{1, 20, 2, 5},
{{1, 13, 2, 5}, {1, 7, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{1, 20, 2, 5},
{{1, 10, 2, 5}, {1, 10, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{2, 20, 2, 5},
{{2, 10, 2, 5}, {2, 10, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{2, 20, 2, 5},
{{2, 15, 2, 5}, {2, 5, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{9, 11, 7, 5},
{{3, 11, 7, 5}, {6, 11, 7, 5}},
0, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
0, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{3, 11, 7, 5},
{{3, 11, 4, 5}, {3, 11, 3, 5}},
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{3, 11, 7, 5},
{{3, 11, 7, 1}, {3, 11, 7, 4}},
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{1, 6, 7, 15}, {2, 6, 7, 15}, {1, 6, 7, 15}, {1, 6, 7, 15}},
0, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
0, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 6, 3, 15}, {5, 6, 1, 15}, {5, 6, 2, 15}, {5, 6, 1, 15}},
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 6, 7, 5}, {5, 6, 7, 3}, {5, 6, 7, 4}, {5, 6, 7, 3}},
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 6, 7, 15}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
split_test_params {
{1, 32, 16, 16, 16},
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
split_test_params {
{1, 32, 16, 16, 16},
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
1, 3, MKLDNNPlugin::impl_desc_type::unknown, {}}));
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
split_test_params {
{1, 32, 16, 16, 16},
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
1, 5, MKLDNNPlugin::impl_desc_type::unknown, {}}));
class MKLDNNGraphDynBatchSplitTests: public MKLDNNGraphSplitTests {
protected:
@ -544,32 +448,10 @@ INSTANTIATE_TEST_CASE_P(
// }
// }
// },
split_test_params {
{2, 20, 2, 5},
{{2, 10, 2, 5}, {2, 10, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(2, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
}
}
},
split_test_params {
{2, 24, 2, 5},
{{2, 16, 2, 5}, {2, 8, 2, 5}},
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
// TODO: rewrite to ngraph to have reshape functionality
// split_test_params {
@ -586,34 +468,34 @@ INSTANTIATE_TEST_CASE_P(
split_test_params {
{2, 20, 2, 5},
{{2, 10, 2, 5}, {2, 10, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{2, 20, 2, 5},
{{2, 15, 2, 5}, {2, 5, 2, 5}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{3, 11, 7, 5},
{{3, 11, 4, 5}, {3, 11, 3, 5}},
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{3, 11, 7, 5},
{{3, 11, 7, 1}, {3, 11, 7, 4}},
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 6, 3, 15}, {5, 6, 1, 15}, {5, 6, 2, 15}, {5, 6, 1, 15}},
2, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
},
split_test_params {
{5, 6, 7, 15},
{{5, 6, 7, 5}, {5, 6, 7, 3}, {5, 6, 7, 4}, {5, 6, 7, 3}},
3, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}));
3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}));