[CPU] Split dynamism support (#8419)

This commit is contained in:
Maxim Andronov 2021-11-16 14:13:02 +03:00 committed by GitHub
parent da0b8a84b4
commit 46fea0fe48
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 984 additions and 290 deletions

View File

@ -21,11 +21,6 @@ using namespace InferenceEngine;
bool MKLDNNSplitNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v1::Split::get_type_info_static(), ngraph::op::v1::VariadicSplit::get_type_info_static())) {
errorMessage = "Only opset1 Split and VariadicSplit operations are supported";
return false;
@ -61,12 +56,13 @@ MKLDNNSplitNode::MKLDNNSplitNode(const std::shared_ptr<ngraph::Node>& op, const
INPUTS_NUM = 3;
}
const auto inRank = getInputShapeAtPort(0).getRank();
auto axisOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
auto axis = axisOp->cast_vector<int64_t>()[0];
if (axis < 0) {
axis += op->get_input_shape(0).size();
axis += inRank;
}
if (axis >= op->get_input_shape(0).size()) {
if (axis >= inRank) {
THROW_ERROR << "Split node with name '" << op->get_friendly_name() << "' has invalid value of axis parameter: " << axis;
}
this->axis = axis;
@ -81,26 +77,21 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
auto srcShape = getInputShapeAtPort(0);
auto axis_size = 0;
auto dstFirstDims = getOutputShapeAtPort(0).getStaticDims();
const auto &srcShape = getInputShapeAtPort(0);
const auto &dstFirstDims = getOutputShapeAtPort(0).getDims();
for (size_t i = 0; i < outputShapes.size(); i++) {
auto o_Dims = outputShapes[i].getStaticDims();
const auto &o_Dims = outputShapes[i].getDims();
if (dstFirstDims.size() != o_Dims.size()) {
THROW_ERROR << "only supports output blobs with equal number of dimensions";
}
axis_size += o_Dims[axis];
for (size_t j = 0; j < dstFirstDims.size(); j++) {
if (j == axis)
continue;
if (o_Dims[j] != dstFirstDims[j])
if (!dimsEqualWeak(o_Dims[j], dstFirstDims[j]))
THROW_ERROR << "has incorrect output dimensions";
}
}
dstFirstDims[axis] = axis_size;
if (std::accumulate(dstFirstDims.begin(), dstFirstDims.end(), 1, std::multiplies<size_t>()) != srcShape.getElementsCount())
THROW_ERROR << "sizes of input blob and sum of output blobs are not equal.";
InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0);
const auto axisPrecision = getOriginalInputPrecisionAtPort(1);
@ -111,19 +102,20 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
dynBatchSupport = false;
}
//Set plain and tailC formats
// Set plain and tailC formats
std::vector<LayoutType> tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc };
//Support channel blocked format
// Support channel blocked format
if (srcShape.getRank() > 2) {
for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) {
SizeVector blkDims = srcShape.getStaticDims();
if (blkDims[channelsPos] % item.first)
const auto &blkDims = srcShape.getDims();
if (blkDims[channelsPos] == Shape::UNDEFINED_DIM || blkDims[channelsPos] % item.first != 0)
continue;
bool blocked = true;
for (size_t i = 0; i < outputShapes.size(); i++) {
if (outputShapes[i].getStaticDims()[channelsPos] % item.first) {
const auto &outBlkDims = getOutputShapeAtPort(i).getDims();
if (outBlkDims[channelsPos] == Shape::UNDEFINED_DIM || outBlkDims[channelsPos] % item.first != 0) {
blocked = false;
break;
}
@ -148,9 +140,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(itr->second->createDesc(inpPrecision, srcShape));
config.inConfs[1].inPlace = -1;
config.inConfs[1].constant = true;
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector {1}));
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{1}));
if (INPUTS_NUM == 3) {
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector{outputShapes.size()}));
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{outputShapes.size()}));
config.inConfs[2].constant = true;
}
@ -174,38 +166,41 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
}
// Optimized inplace case
for (auto refPdIndex : pdIndexesToReuse) {
const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
auto config = refConfig;
const auto inBlockingDesc = refConfig.inConfs[0].desc->as<CpuBlockedMemoryDesc>();
const auto& order = inBlockingDesc->getOrder();
const auto& blkDims = inBlockingDesc->getBlockDims();
auto numOfDim = blkDims.size();
// TODO [DS]: inplace
if (!isDynamicNode()) {
for (auto refPdIndex : pdIndexesToReuse) {
const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
auto config = refConfig;
const auto inBlockingDesc = refConfig.inConfs[0].desc->as<CpuBlockedMemoryDesc>();
const auto& order = inBlockingDesc->getOrder();
const auto& blkDims = inBlockingDesc->getBlockDims();
auto numOfDim = blkDims.size();
SizeVector offsets(numOfDim, 0lu);
SizeVector strides(numOfDim);
strides.back() = 1lu;
size_t offset = (std::numeric_limits<size_t>::max)();
SizeVector offsets(numOfDim, 0lu);
SizeVector strides(numOfDim);
strides.back() = 1lu;
size_t offset = (std::numeric_limits<size_t>::max)();
for (size_t i = 2; i <= numOfDim; i++) {
if (numOfDim - i < axis) {
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
} else {
strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
for (size_t i = 2; i <= numOfDim; i++) {
if (numOfDim - i < axis) {
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
} else {
strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
}
}
config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(inpPrecision, srcShape, blkDims, order, offset, offsets, strides);
for (size_t i = 0; i < outputShapes.size(); i++) {
auto outBlockingDesc = refConfig.outConfs[i].desc->as<CpuBlockedMemoryDesc>();
const auto& outBlkDims = outBlockingDesc->getBlockDims();
const auto& dims = outBlockingDesc->getShape().getStaticDims();
config.outConfs[i].inPlace = 0;
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides);
}
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
}
config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(inpPrecision, srcShape, blkDims, order, offset, offsets, strides);
for (size_t i = 0; i < outputShapes.size(); i++) {
auto outBlockingDesc = refConfig.outConfs[i].desc->as<CpuBlockedMemoryDesc>();
const auto& outBlkDims = outBlockingDesc->getBlockDims();
const auto& dims = outBlockingDesc->getShape().getStaticDims();
config.outConfs[i].inPlace = 0;
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides);
}
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
}
// Special nspc -> ncsp case when splitting channels
@ -219,9 +214,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createSharedDesc(inpPrecision, srcShape);
config.inConfs[1].inPlace = -1;
config.inConfs[1].constant = true;
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector{1}));
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{1}));
if (INPUTS_NUM == 3) {
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector{outputShapes.size()}));
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{outputShapes.size()}));
config.inConfs[2].constant = true;
}
config.outConfs.resize(outputShapes.size());
@ -235,63 +230,75 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNSplitNode::createPrimitive() {
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
THROW_ERROR << "Input memory has not been allocated.";
for (size_t i = 0; i < getChildEdges().size(); i++) {
if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
THROW_ERROR << "Destination memory has not been allocated.";
bool MKLDNNSplitNode::needPrepareParams() const {
if (isOptimized()) {
return false;
}
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_ERROR << "Preferable primitive descriptor is not set.";
return MKLDNNNode::inputShapesModified();
}
auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->getDesc();
void MKLDNNSplitNode::prepareParams() {
const auto &srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) {
THROW_ERROR << "has not allocated input memory";
}
canUseOptimizedNspc2Ncsp = false;
if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) {
canUseOptimizedNspc2Ncsp = true;
for (size_t i = 0; i < getChildEdges().size(); i++) {
auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->getDesc();
if (!childMemDesc.hasLayoutType(LayoutType::ncsp))
canUseOptimizedNspc2Ncsp = false;
dstMemPtrs.clear();
std::vector<BlockedMemoryDescCPtr> outDescs;
for (size_t i = 0; i < outputShapes.size(); ++i) {
const auto &outMemPtr = this->getChildEdgesAtPort(i)[0]->getMemoryPtr();
if (!outMemPtr || !outMemPtr->GetPrimitivePtr()) {
THROW_ERROR << "has not allocated destination memory";
}
if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outMemPtr->GetPtr())) {
dstMemPtrs.push_back(dstData);
} else {
THROW_ERROR << "can't get child edge indx " << i << "data.";
}
if (!canUseOptimizedNspc2Ncsp) {
outDescs.push_back(outMemPtr->GetDescWithType<BlockedMemoryDesc>());
}
}
if (!isOptimized()) {
initializeDstMemPtrs();
if (!canUseOptimizedNspc2Ncsp)
prepareOptimizedParams();
if (!canUseOptimizedNspc2Ncsp) {
const auto inDesc = srcMemPtr->GetDescWithType<BlockedMemoryDesc>();
execPtr = std::make_shared<SplitOptimizedExecutor>(inDesc, outDescs, axis);
}
}
void MKLDNNSplitNode::createPrimitive() {
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_ERROR << "Preferable primitive descriptor is not set.";
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
void MKLDNNSplitNode::execute(mkldnn::stream strm) {
if (isOptimized())
if (isOptimized()) {
return;
}
if (dstMemPtrs.empty())
THROW_ERROR << "Output data pointers have not been initialized.";
int MB = batchToProcess();
const auto &srcMem = getParentEdgesAtPort(0)[0]->getMemory();
size_t batch = srcMem.getStaticDims()[0];
Dim MB = isDynamicNode() ? batch : batchToProcess();
if (canUseOptimizedNspc2Ncsp) {
optimizedNspc2Ncsp(MB);
return;
}
uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
size_t batch = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[0];
if (batch != MB)
optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;
parallel_for2d(dstMemPtrs.size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
uint8_t* dstData = dstMemPtrs[i];
cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]],
&srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride],
optimizedParams.dataSize[i]);
});
uint8_t* srcData = reinterpret_cast<uint8_t*>(srcMem.GetPtr());
IE_ASSERT(execPtr != nullptr);
execPtr->exec(srcData, dstMemPtrs, batch, MB);
}
bool MKLDNNSplitNode::created() const {
@ -303,61 +310,70 @@ bool MKLDNNSplitNode::isOptimized() const {
}
void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
if (!isOptimized()) {
MKLDNNNode::initOptimalPrimitiveDescriptor();
return;
}
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
THROW_ERROR << "Preferable primitive descriptor is not set.";
auto config = selected_pd->getConfig();
if (isConfigDefined(config))
return;
for (size_t i = 0; i < config.inConfs.size(); i++) {
if (config.inConfs[i].desc->isDefined())
continue;
if (!isOptimized()) {
MKLDNNNode::initOptimalPrimitiveDescriptor();
} else if (!isConfigDefined(config)) {
for (size_t i = 0; i < config.inConfs.size(); i++) {
if (config.inConfs[i].desc->isDefined())
continue;
int num = getParentEdgeAt(i)->getOutputNum();
if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
if (num >= 0) {
const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0)
getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) {
config.inConfs[i].desc = parentConfig.desc;
continue;
int num = getParentEdgeAt(i)->getOutputNum();
if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
if (num >= 0) {
const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0)
getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) {
config.inConfs[i].desc = parentConfig.desc;
continue;
}
}
}
// reset undefined offsets
config.inConfs[i].desc = config.inConfs[i].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
}
if (config.outConfs.size() != outputShapes.size())
THROW_ERROR << "has invalid config";
// reset undefined offsets
config.inConfs[i].desc = config.inConfs[i].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
}
if (config.outConfs.size() != outputShapes.size())
THROW_ERROR << "has invalid config";
auto firstInBlockingDesc = config.inConfs[0].desc->as<BlockedMemoryDesc>();
size_t offset = 0;
for (size_t i = 0; i < outputShapes.size(); i++) {
auto oldDesc = config.outConfs[i].desc;
auto outBlockingDesc = oldDesc->as<BlockedMemoryDesc>();
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outBlockingDesc->getPrecision(),
outBlockingDesc->getShape(),
outBlockingDesc->getBlockDims(),
outBlockingDesc->getOrder(),
firstInBlockingDesc->getOffsetPadding() + offset,
firstInBlockingDesc->getOffsetPaddingToData(),
firstInBlockingDesc->getStrides());
auto firstInBlockingDesc = config.inConfs[0].desc->as<BlockedMemoryDesc>();
size_t offset = 0;
for (size_t i = 0; i < outputShapes.size(); i++) {
auto oldDesc = config.outConfs[i].desc;
auto outBlockingDesc = oldDesc->as<BlockedMemoryDesc>();
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outBlockingDesc->getPrecision(),
outBlockingDesc->getShape(),
outBlockingDesc->getBlockDims(),
outBlockingDesc->getOrder(),
firstInBlockingDesc->getOffsetPadding() + offset,
firstInBlockingDesc->getOffsetPaddingToData(),
firstInBlockingDesc->getStrides());
size_t axisSize = 1;
for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) {
axisSize *= outBlockingDesc->getBlockDims()[j];
size_t axisSize = 1;
for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) {
axisSize *= outBlockingDesc->getBlockDims()[j];
}
offset += axisSize;
}
initDescriptor(config);
}
config = selected_pd->getConfig();
canUseOptimizedNspc2Ncsp = false;
IE_ASSERT(config.inConfs.size() > 0);
const auto inConfDesc = config.inConfs[0].desc;
if (axis == 1 && one_of(inConfDesc->getShape().getRank(), 4, 5) && inConfDesc->hasLayoutType(LayoutType::nspc)) {
canUseOptimizedNspc2Ncsp = true;
for (size_t i = 0; i < config.outConfs.size(); i++) {
if (!config.outConfs[i].desc->hasLayoutType(LayoutType::ncsp))
canUseOptimizedNspc2Ncsp = false;
}
offset += axisSize;
}
initDescriptor(config);
}
void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
@ -375,8 +391,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
}
}
//check the descriptors and select the ones that have the same data format as the input
// check the descriptors and select the ones that have the same data format as the input
std::vector<size_t> canSelectPrimitive;
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
auto parentEdge = getParentEdgeAt(0);
@ -397,6 +412,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
return;
}
// if there are more then one PD with similar data layouts - select the optimized one
for (auto indx : canSelectPrimitive) {
if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) {
@ -463,55 +479,6 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
dynBatchLim = lim;
}
void MKLDNNSplitNode::prepareOptimizedParams() {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors.";
const auto inpTensorDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
const auto outputPortsCount = outputShapes.size();
//find axis order position
const auto& order = inpTensorDesc->getOrder();
unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
for (size_t i = 0; i < order.size(); ++i) {
if (order[i] == axis) {
axisOrderPos = i;
break;
}
}
if (std::numeric_limits<unsigned>::max() == axisOrderPos) {
THROW_ERROR << "Can't find the axis in the input tensor order list";
}
uint8_t srcDataSize = inpTensorDesc->getPrecision().size();
const auto& srcDims = inpTensorDesc->getBlockDims();
const auto getRank = srcDims.size();
optimizedParams.countStrides = 1;
for (int i = 0; i < axisOrderPos; i++)
optimizedParams.countStrides *= srcDims[i];
optimizedParams.srcDataStride = 0;
optimizedParams.dataSize.resize(outputPortsCount);
for (size_t i = 0; i < outputPortsCount; i++) {
auto outputEdge = this->getChildEdgesAtPort(i).front();
optimizedParams.dataSize[i] = srcDataSize;
auto desc = outputEdge->getMemory().getDesc().as<CpuBlockedMemoryDesc>();
for (size_t j = axisOrderPos; j < getRank; j++)
optimizedParams.dataSize[i] *= desc->getBlockDims()[j];
optimizedParams.srcDataStride += optimizedParams.dataSize[i];
}
optimizedParams.srcDataOffsets.resize(outputPortsCount);
optimizedParams.srcDataOffsets[0] = 0;
for (size_t i = 1; i < outputPortsCount; i++) {
optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1];
}
}
void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
auto parentEdge = getParentEdgeAt(0);
const int rank = parentEdge->getMemory().GetShape().getRank();
@ -534,7 +501,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
auto dstData = dstMemPtrs[i];
size_t innerSize = 1;
auto dims = outputShapes[i].getStaticDims();
auto dims = getChildEdgesAtPort(i)[0]->getMemory().getStaticDims();
for (size_t j = axis; j < dims.size(); j++) {
innerSize *= dims[j];
@ -558,17 +525,62 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
}
}
void MKLDNNSplitNode::initializeDstMemPtrs() {
dstMemPtrs.clear();
for (size_t i = 0; i < outputShapes.size(); ++i) {
auto outputEdges = this->getChildEdgesAtPort(i);
if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outputEdges.front()->getMemoryPtr()->GetPtr())) {
dstMemPtrs.push_back(dstData);
} else {
THROW_ERROR << "can't get child edge indx " << i << "data.";
MKLDNNSplitNode::SplitOptimizedExecutor::SplitOptimizedExecutor(BlockedMemoryDescCPtr inDesc, const std::vector<BlockedMemoryDescCPtr> &outDescs,
const size_t axis) {
// find axis order position
const auto& order = inDesc->getOrder();
unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
for (size_t i = 0; i < order.size(); ++i) {
if (order[i] == axis) {
axisOrderPos = i;
break;
}
}
if (std::numeric_limits<unsigned>::max() == axisOrderPos) {
IE_THROW() << "Can't create split executor, because can't find the axis in the input tensor order list";
}
const auto outputPortsCount = outDescs.size();
uint8_t srcDataSize = inDesc->getPrecision().size();
const auto& srcDims = inDesc->getBlockDims();
const auto getRank = srcDims.size();
countStrides = 1;
for (int i = 0; i < axisOrderPos; i++)
countStrides *= srcDims[i];
srcDataStride = 0;
dataSize.resize(outputPortsCount);
for (size_t i = 0; i < outputPortsCount; i++) {
dataSize[i] = srcDataSize;
for (size_t j = axisOrderPos; j < getRank; j++)
dataSize[i] *= outDescs[i]->getBlockDims()[j];
srcDataStride += dataSize[i];
}
srcDataOffsets.resize(outputPortsCount);
srcDataOffsets[0] = 0;
for (size_t i = 1; i < outputPortsCount; i++) {
srcDataOffsets[i] = srcDataOffsets[i - 1] + dataSize[i - 1];
}
}
void MKLDNNSplitNode::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vector<uint8_t*> &dstMemPtrs,
const Dim origBatch, const Dim perInferBatch) {
size_t execCountStrides = countStrides;
if (origBatch != perInferBatch)
execCountStrides = execCountStrides / origBatch * perInferBatch;
parallel_for2d(dstMemPtrs.size(), execCountStrides, [&](size_t i, size_t j) {
uint8_t* dstData = dstMemPtrs[i];
cpu_memcpy(&dstData[j * dataSize[i]],
&srcData[srcDataOffsets[i] + j * srcDataStride],
dataSize[i]);
});
}
REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split);

View File

@ -30,9 +30,31 @@ public:
return !isOptimized();
}
bool needPrepareParams() const override;
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
private:
void prepareOptimizedParams();
void initializeDstMemPtrs();
struct SplitExecutor {
virtual void exec(const uint8_t* srcData, const std::vector<uint8_t*> &dstMemPtrs,
const Dim origBatch, const Dim perInferBatch) = 0;
virtual ~SplitExecutor() = default;
};
std::shared_ptr<SplitExecutor> execPtr = nullptr;
struct SplitOptimizedExecutor : public SplitExecutor {
public:
SplitOptimizedExecutor(BlockedMemoryDescCPtr inDesc, const std::vector<BlockedMemoryDescCPtr> &outDescs, const size_t axis);
void exec(const uint8_t* srcData, const std::vector<uint8_t*> &dstMemPtrs,
const Dim origBatch, const Dim perInferBatch) override;
private:
std::vector<size_t> dataSize;
std::vector<size_t> srcDataOffsets;
size_t srcDataStride;
size_t countStrides;
};
void optimizedNspc2Ncsp(size_t MB);
bool canUseOptimizedNspc2Ncsp;
@ -40,13 +62,6 @@ private:
size_t axis = 1;
std::vector<uint8_t*> dstMemPtrs;
struct {
std::vector<size_t> dataSize;
std::vector<size_t> srcDataOffsets;
size_t srcDataStride;
size_t countStrides;
} optimizedParams;
size_t INPUTS_NUM = 2;
};

View File

@ -39,7 +39,7 @@ std::vector<std::string> disabledTestPatterns() {
// TODO: Issue 43417 sporadic issue, looks like an issue in test, reproducible only on Windows platform
R"(.*decomposition1_batch=5_hidden_size=10_input_size=30_.*tanh.relu.*_clip=0_linear_before_reset=1.*_targetDevice=CPU_.*)",
// Skip platforms that do not support BF16 (i.e. sse, avx, avx2)
R"(.*BF16.*(jit_avx(?!5)|jit_sse|ref).*)",
R"(.*(BF|bf)16.*(jit_avx(?!5)|jit_sse|ref).*)",
// TODO: Incorrect blob sizes for node BinaryConvolution_X
R"(.*BinaryConvolutionLayerTest.*)",
R"(.*ClampLayerTest.*netPrc=(I64|I32).*)",
@ -165,7 +165,7 @@ std::vector<std::string> disabledTestPatterns() {
if (!InferenceEngine::with_cpu_x86_avx512_core()) {
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
// tests are useless on such platforms
retVector.emplace_back(R"(.*BF16.*)");
retVector.emplace_back(R"(.*(BF|bf)16.*)");
retVector.emplace_back(R"(.*bfloat16.*)");
}

View File

@ -2,56 +2,63 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
using namespace InferenceEngine;
using namespace ov::test;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
size_t, // Num splits
int64_t, // Axis
InferenceEngine::Precision, // Net precision
std::vector<size_t>, // Input shapes
std::vector<size_t>, // Used outputs indices
std::string, // Target device name
size_t, // Num splits
int64_t, // Axis
ElementType, // Net precision
InputShape, // Input shapes
std::vector<size_t>, // Used outputs indices
CPUSpecificParams
> splitCPUTestParams;
class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
virtual public SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<splitCPUTestParams> obj) {
size_t numSplits;
int64_t axis;
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape, outIndices;
std::string targetDevice;
ElementType netPrecision;
InputShape inputShapes;
InferenceEngine::SizeVector outIndices;
CPUSpecificParams cpuParams;
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = obj.param;
std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, cpuParams) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "IS=";
result << CommonTestUtils::partialShape2str({inputShapes.first}) << "_";
result << "TS=";
for (const auto& shape : inputShapes.second) {
result << CommonTestUtils::vec2str(shape) << "_";
}
result << "numSplits=" << numSplits << "_";
result << "axis=" << axis << "_";
if (!outIndices.empty()) {
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
}
result << "netPRC=" << netPrecision.name() << "_";
result << "trgDev=" << targetDevice;
result << "netPRC=" << netPrecision << "_";
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
size_t axis, numSplits;
std::vector<size_t> inputShape, outIndices;
InferenceEngine::Precision netPrecision;
ElementType netPrecision;
InputShape inputShapes;
InferenceEngine::SizeVector outIndices;
CPUSpecificParams cpuParams;
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = this->GetParam();
inPrc = outPrc = netPrecision;
std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, cpuParams) = this->GetParam();
if (outIndices.empty()) {
for (int i = 0; i < numSplits; ++i) {
outIndices.push_back(i);
@ -59,14 +66,15 @@ protected:
}
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType += std::string("_") + inPrc.name();
selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
init_input_shapes({inputShapes});
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto split = std::dynamic_pointer_cast<ngraph::opset5::Split>(ngraph::builder::makeSplit(paramOuts[0],
ngPrc, numSplits, axis));
netPrecision, numSplits, axis));
ngraph::ResultVector results;
for (int i = 0; i < outIndices.size(); i++) {
@ -85,8 +93,8 @@ protected:
TEST_P(SplitLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "Split");
run();
// CheckPluginRelatedResults(executableNetwork, "Split");
}
namespace {
@ -115,70 +123,150 @@ const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"}
const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
// List of precisions natively supported by mkldnn.
const std::vector<Precision> netPrecisions = {
Precision::I8,
Precision::I32,
Precision::FP32,
Precision::BF16
const std::vector<ElementType> netPrecisions = {
ElementType::i8,
ElementType::i32,
ElementType::f32,
ElementType::bf16
};
const std::vector<std::vector<size_t>> outIndices3 = {{0, 1, 2}, {0, 1, 1, 0, 2}, {0, 0, 0, 2}};
const std::vector<std::vector<size_t>> outIndices4 = {{0, 1, 2, 3}, {0, 1, 1, 0, 2, 3}, {0, 0, 0, 2, 3}};
const std::vector<InputShape> inputShapes4D_Nspc2NcspSpecial = {
{ {}, {{3, 8, 11, 9}} },
{
// dynamic
{-1, -1, -1, -1},
// target
{
{1, 4, 5, 7},
{3, 8, 5, 9},
{5, 16, 1, 8}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 25}, {2, 10}},
// target
{
{2, 8, 5, 7},
{1, 4, 10, 2},
{3, 16, 5, 9}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 28, 24, 9})),
::testing::ValuesIn(inputShapes4D_Nspc2NcspSpecial),
::testing::ValuesIn(outIndices4),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(perChannelsToPlanar_4D)),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes5D_Nspc2NcspSpecial = {
{ {}, {{3, 9, 5, 9, 11}} },
{
// dynamic
{-1, -1, -1, -1, -1},
// target
{
{1, 12, 5, 7, 5},
{3, 6, 8, 9, 1},
{5, 9, 1, 8, 2}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 25}, {2, 10}, {1, 64}},
// target
{
{2, 6, 5, 7, 7},
{1, 3, 10, 2, 11},
{3, 9, 4, 9, 8}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 21, 24, 9, 15})),
::testing::ValuesIn(inputShapes5D_Nspc2NcspSpecial),
::testing::ValuesIn(outIndices3),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(perChannelsToPlanar_5D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
::testing::ValuesIn(outIndices3),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes4D_planar = {
{ {}, {{3, 24, 24, 9}} },
{
// dynamic
{-1, -1, -1, -1},
// target
{
{1, 15, 12, 9},
{3, 1, 9, 12},
{5, 5, 6, 6}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 48}, {2, 48}},
// target
{
{2, 5, 6, 9},
{1, 7, 12, 6},
{3, 11, 9, 3}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_planar, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(2, 3),
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes4D_planar),
::testing::ValuesIn(outIndices3),
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes4D_block = {
{ {}, {{3, 16, 12, 12}} },
{
// dynamic
{-1, 16, -1, -1},
// target
{
{1, 16, 12, 12},
{3, 16, 12, 12},
{5, 16, 12, 12}
}
},
{
// dynamic
{{1, 5}, 16, {1, 48}, {2, 24}},
// target
{
{2, 16, 12, 12},
{1, 16, 12, 12},
{3, 16, 12, 12}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(2, 3),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
::testing::ValuesIn(inputShapes4D_block),
::testing::ValuesIn(outIndices3),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
::testing::ValuesIn(outIndices3),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_4D)),
::testing::Values(blocked8_4D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
@ -186,43 +274,77 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
::testing::Values(4),
::testing::Values(2, 3),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
::testing::ValuesIn(inputShapes4D_block),
::testing::ValuesIn(outIndices4),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_4D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
const std::vector<InputShape> inputShapes5D_planar = {
{ {}, {{3, 5, 3, 6, 12}} },
{
// dynamic
{-1, -1, -1, -1, -1},
// target
{
{1, 15, 12, 3, 9},
{3, 1, 6, 12, 3},
{5, 5, 6, 6, 6}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 48}, {2, 48}, {1, 40}},
// target
{
{2, 5, 12, 3, 6},
{1, 7, 12, 6, 9},
{3, 11, 9, 3, 30}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_planar, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(0, 1),
::testing::Values(2, 3, 4),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
::testing::ValuesIn(inputShapes5D_planar),
::testing::ValuesIn(outIndices3),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)),
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D)),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes5D_block = {
{ {}, {{3, 16, 24, 12, 36}} },
{
// dynamic
{-1, 16, -1, -1, -1},
// target
{
{1, 16, 12, 24, 24},
{3, 16, 12, 12, 12},
{5, 16, 12, 12, 24}
}
},
{
// dynamic
{{1, 5}, 16, {1, 48}, {2, 24}, {3, 64}},
// target
{
{2, 16, 12, 12, 24},
{1, 16, 12, 12, 24},
{3, 16, 12, 12, 12}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(2, 3, 4),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
::testing::ValuesIn(inputShapes5D_block),
::testing::ValuesIn(outIndices3),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
::testing::ValuesIn(outIndices4),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_5D)),
::testing::Values(blocked8_5D_ref)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
@ -230,43 +352,154 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
::testing::Values(4),
::testing::Values(2, 3, 4),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
::testing::ValuesIn(inputShapes5D_block),
::testing::ValuesIn(outIndices4),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(blocked16_5D_ref)),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes3D = {
{ {}, {{14, 28, 21}} },
{
// dynamic
{-1, -1, -1},
// target
{
{7, 21, 14},
{21, 7, 14},
{21, 14, 7},
}
},
{
// dynamic
{{1, 60}, {1, 50}, {1, 48}},
// target
{
{14, 21, 7},
{21, 7, 14},
{7, 14, 21},
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split3D, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(7),
::testing::Values(0, 1, 2),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({14, 42, 21})),
::testing::ValuesIn(inputShapes3D),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes2D = {
{ {}, {{6, 12}} },
{
// dynamic
{-1, -1},
// target
{
{2, 8},
{10, 4},
{2, 6},
}
},
{
// dynamic
{{1, 60}, {1, 50}},
// target
{
{2, 4},
{4, 4},
{6, 12},
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split2D, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(2),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({6, 12})),
::testing::ValuesIn(inputShapes2D),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
SplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes1D = {
{ {}, {{10}} },
{
// dynamic
{-1},
// target
{
{5},
{15},
{10},
}
},
{
// dynamic
{{1, 60}},
// target
{
{15},
{5},
{10},
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_Split1D, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(5),
::testing::Values(0),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({10})),
::testing::ValuesIn(inputShapes1D),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
SplitLayerCPUTest::getTestCaseName);
// ============================================== inPlace cases ============================================
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(InputShape{ {}, {{3, 24, 24, 9}} }),
::testing::ValuesIn(outIndices3),
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(InputShape{ {}, {{4, 64, 32, 12}} }),
::testing::ValuesIn(outIndices3),
::testing::Values(blocked16_4D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(3),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(InputShape{ {}, {{3, 24, 24, 9, 15}} }),
::testing::ValuesIn(outIndices3),
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)),
SplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
::testing::Combine(
::testing::Values(4),
::testing::Values(0, 1),
::testing::ValuesIn(netPrecisions),
::testing::Values(InputShape{ {}, {{4, 64, 32, 12, 20}} }),
::testing::ValuesIn(outIndices4),
::testing::Values(blocked16_5D)),
SplitLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,434 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
using namespace ov::test;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
InputShape,
int64_t, // Axis
std::vector<int>, // Split lengths
ElementType, // Net precision
CPUSpecificParams
> varSplitCPUTestParams;
class VariadicSplitLayerCPUTest : public testing::WithParamInterface<varSplitCPUTestParams>,
virtual public SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<varSplitCPUTestParams> obj) {
InputShape shapes;
int64_t axis;
std::vector<int> splitLenght;
ElementType netPrecision;
CPUSpecificParams cpuParams;
std::tie(shapes, axis, splitLenght, netPrecision, cpuParams) = obj.param;
std::ostringstream result;
result << "IS=";
result << CommonTestUtils::partialShape2str({shapes.first}) << "_";
result << "TS=";
for (const auto& shape : shapes.second) {
result << CommonTestUtils::vec2str(shape) << "_";
}
result << "axis=" << axis << "_";
result << "splitLenght=" << CommonTestUtils::vec2str(splitLenght) << "_";
result << "netPRC=" << netPrecision << "_";
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
InputShape inputShapes;
int64_t axis;
std::vector<int> splitLenght;
ElementType netPrecision;
CPUSpecificParams cpuParams;
std::tie(inputShapes, axis, splitLenght, netPrecision, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
init_input_shapes({inputShapes});
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto splitAxisOp = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i64, ngraph::Shape{}, std::vector<int64_t>{axis});
auto splitLengthsOp = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i32, ngraph::Shape{splitLenght.size()}, splitLenght);
auto varSplit = std::make_shared<ngraph::opset3::VariadicSplit>(paramOuts[0], splitAxisOp, splitLengthsOp);
varSplit->get_rt_info() = getCPUInfo();
function = std::make_shared<ngraph::Function>(varSplit, params, "VariadicSplitCPU");
}
};
TEST_P(VariadicSplitLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
// CheckPluginRelatedResults(executableNetwork, "Split");
}
namespace {
const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {"ref"}, "ref"};
const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"};
const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"};
const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"};
const auto perChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
const auto perChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
const auto perChannelsToPlanar_4D = CPUSpecificParams{{nhwc}, {nchw}, {}, "ref"};
const auto perChannelsToPlanar_5D = CPUSpecificParams{{ndhwc}, {ncdhw}, {}, "ref"};
const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"};
const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"};
const auto blocked8_4D_ref = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "ref"};
const auto blocked8_5D_ref = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "ref"};
const auto blocked16_4D = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "unknown"};
const auto blocked16_5D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "unknown"};
const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"};
const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
// List of precisions natively supported by mkldnn.
const std::vector<ElementType> netPrecisions = {
ElementType::i8,
ElementType::i32,
ElementType::f32,
ElementType::bf16
};
const std::vector<InputShape> inputShapes4D_Nspc2NcspSpecial = {
{ {}, {{3, 5, 24, 9}} },
{
// dynamic
{-1, -1, -1, -1},
// target
{
{1, 8, 5, 7},
{3, 9, 7, 9},
{5, 6, 1, 8}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 25}, {2, 10}},
// target
{
{2, 7, 5, 7},
{1, 10, 10, 2},
{3, 5, 6, 9}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_Nspc2NcspSpecial, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes4D_Nspc2NcspSpecial),
::testing::Values(1),
::testing::Values(std::vector<int>{1, 2, -1, 1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(perChannelsToPlanar_4D)),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes5D_Nspc2NcspSpecial = {
{ {}, {{3, 4, 7, 9, 3}} },
{
// dynamic
{-1, -1, -1, -1, -1},
// target
{
{1, 6, 5, 7, 5},
{3, 8, 6, 9, 1},
{5, 9, 1, 8, 2}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 25}, {2, 10}, {1, 64}},
// target
{
{2, 5, 5, 7, 7},
{1, 4, 10, 2, 11},
{3, 7, 5, 9, 8}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Nspc2NcspSpecial, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes5D_Nspc2NcspSpecial),
::testing::Values(1),
::testing::Values(std::vector<int>{2, 1, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(perChannelsToPlanar_5D)),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes4D_planar = {
{ {}, {{3, 6, 5, 6}} },
{
// dynamic
{-1, -1, -1, -1},
// target
{
{1, 9, 8, 7},
{3, 8, 6, 5},
{5, 3, 7, 6}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 48}, {2, 48}},
// target
{
{2, 9, 5, 6},
{1, 6, 9, 8},
{3, 1, 6, 7}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_planar, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes4D_planar),
::testing::Values(2, 3),
::testing::Values(std::vector<int>{1, 3, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes4D_block = {
{ {}, {{3, 16, 6, 7}} },
{
// dynamic
{-1, 16, -1, -1},
// target
{
{1, 16, 8, 7},
{3, 16, 7, 8},
{5, 16, 9, 8}
}
},
{
// dynamic
{{1, 5}, 16, {1, 48}, {2, 24}},
// target
{
{2, 16, 12, 6},
{1, 16, 6, 9},
{3, 16, 7, 6}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_Block8, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes4D_block),
::testing::Values(2, 3),
::testing::Values(std::vector<int>{2, 2, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(blocked8_4D_ref)),
VariadicSplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_Block16, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes4D_block),
::testing::Values(2, 3),
::testing::Values(std::vector<int>{2, 2, -1, 1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(blocked16_4D_ref)),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes5D_planar = {
{ {}, {{3, 24, 4, 5, 6}} },
{
// dynamic
{-1, -1, -1, -1, -1},
// target
{
{1, 2, 4, 6, 5},
{3, 1, 6, 4, 5},
{5, 6, 5, 7, 4}
}
},
{
// dynamic
{{1, 5}, {1, 64}, {1, 48}, {2, 48}, {2, 40}},
// target
{
{2, 5, 4, 5, 6},
{1, 7, 5, 4, 7},
{3, 3, 5, 6, 4}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_planar, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes5D_planar),
::testing::Values(2, 3, 4),
::testing::Values(std::vector<int>{2, 1, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D)),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes5D_block = {
{ {}, {{3, 16, 8, 5, 6}} },
{
// dynamic
{-1, 16, -1, -1, -1},
// target
{
{1, 16, 5, 6, 7},
{3, 16, 24, 5, 8},
{5, 16, 6, 7, 5}
}
},
{
// dynamic
{{1, 5}, 16, {1, 48}, {2, 24}, {2, 64}},
// target
{
{2, 16, 7, 6, 5},
{1, 16, 6, 5, 7},
{3, 16, 5, 7, 6}
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Block8, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes5D_block),
::testing::Values(2, 3, 4),
::testing::Values(std::vector<int>{1, 2, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(blocked8_5D_ref)),
VariadicSplitLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Block16, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes5D_block),
::testing::Values(2, 3, 4),
::testing::Values(std::vector<int>{2, 1, -1, 1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(blocked16_5D_ref)),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes3D = {
{ {}, {{14, 7, 21}} },
{
// dynamic
{-1, -1, -1},
// target
{
{7, 21, 14},
{21, 7, 14},
{21, 14, 7},
}
},
{
// dynamic
{{1, 60}, {1, 50}, {1, 48}},
// target
{
{14, 21, 7},
{21, 7, 14},
{7, 14, 21},
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit3D, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes3D),
::testing::Values(0, 1, 2),
::testing::Values(std::vector<int>{2, 4, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes2D = {
{ {}, {{6, 12}} },
{
// dynamic
{-1, -1},
// target
{
{3, 8},
{10, 4},
{3, 6},
}
},
{
// dynamic
{{1, 60}, {1, 50}},
// target
{
{3, 4},
{4, 4},
{6, 12},
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit2D, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes2D),
::testing::Values(0, 1),
::testing::Values(std::vector<int>{2, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
VariadicSplitLayerCPUTest::getTestCaseName);
const std::vector<InputShape> inputShapes1D = {
{ {}, {{10}} },
{
// dynamic
{-1},
// target
{
{5},
{15},
{10},
}
},
{
// dynamic
{{1, 60}},
// target
{
{15},
{5},
{10},
}
},
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit1D, VariadicSplitLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapes1D),
::testing::Values(0),
::testing::Values(std::vector<int>{2, 1, 1, -1}),
::testing::ValuesIn(netPrecisions),
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
VariadicSplitLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions