[CPU] Split dynamism support (#8419)
This commit is contained in:
parent
da0b8a84b4
commit
46fea0fe48
@ -21,11 +21,6 @@ using namespace InferenceEngine;
|
||||
|
||||
bool MKLDNNSplitNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v1::Split::get_type_info_static(), ngraph::op::v1::VariadicSplit::get_type_info_static())) {
|
||||
errorMessage = "Only opset1 Split and VariadicSplit operations are supported";
|
||||
return false;
|
||||
@ -61,12 +56,13 @@ MKLDNNSplitNode::MKLDNNSplitNode(const std::shared_ptr<ngraph::Node>& op, const
|
||||
INPUTS_NUM = 3;
|
||||
}
|
||||
|
||||
const auto inRank = getInputShapeAtPort(0).getRank();
|
||||
auto axisOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
|
||||
auto axis = axisOp->cast_vector<int64_t>()[0];
|
||||
if (axis < 0) {
|
||||
axis += op->get_input_shape(0).size();
|
||||
axis += inRank;
|
||||
}
|
||||
if (axis >= op->get_input_shape(0).size()) {
|
||||
if (axis >= inRank) {
|
||||
THROW_ERROR << "Split node with name '" << op->get_friendly_name() << "' has invalid value of axis parameter: " << axis;
|
||||
}
|
||||
this->axis = axis;
|
||||
@ -81,26 +77,21 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
auto srcShape = getInputShapeAtPort(0);
|
||||
auto axis_size = 0;
|
||||
auto dstFirstDims = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto &srcShape = getInputShapeAtPort(0);
|
||||
const auto &dstFirstDims = getOutputShapeAtPort(0).getDims();
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
auto o_Dims = outputShapes[i].getStaticDims();
|
||||
const auto &o_Dims = outputShapes[i].getDims();
|
||||
if (dstFirstDims.size() != o_Dims.size()) {
|
||||
THROW_ERROR << "only supports output blobs with equal number of dimensions";
|
||||
}
|
||||
|
||||
axis_size += o_Dims[axis];
|
||||
for (size_t j = 0; j < dstFirstDims.size(); j++) {
|
||||
if (j == axis)
|
||||
continue;
|
||||
if (o_Dims[j] != dstFirstDims[j])
|
||||
if (!dimsEqualWeak(o_Dims[j], dstFirstDims[j]))
|
||||
THROW_ERROR << "has incorrect output dimensions";
|
||||
}
|
||||
}
|
||||
dstFirstDims[axis] = axis_size;
|
||||
if (std::accumulate(dstFirstDims.begin(), dstFirstDims.end(), 1, std::multiplies<size_t>()) != srcShape.getElementsCount())
|
||||
THROW_ERROR << "sizes of input blob and sum of output blobs are not equal.";
|
||||
|
||||
InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0);
|
||||
const auto axisPrecision = getOriginalInputPrecisionAtPort(1);
|
||||
@ -111,19 +102,20 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
dynBatchSupport = false;
|
||||
}
|
||||
|
||||
//Set plain and tailC formats
|
||||
// Set plain and tailC formats
|
||||
std::vector<LayoutType> tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc };
|
||||
|
||||
//Support channel blocked format
|
||||
// Support channel blocked format
|
||||
if (srcShape.getRank() > 2) {
|
||||
for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) {
|
||||
SizeVector blkDims = srcShape.getStaticDims();
|
||||
if (blkDims[channelsPos] % item.first)
|
||||
const auto &blkDims = srcShape.getDims();
|
||||
if (blkDims[channelsPos] == Shape::UNDEFINED_DIM || blkDims[channelsPos] % item.first != 0)
|
||||
continue;
|
||||
|
||||
bool blocked = true;
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
if (outputShapes[i].getStaticDims()[channelsPos] % item.first) {
|
||||
const auto &outBlkDims = getOutputShapeAtPort(i).getDims();
|
||||
if (outBlkDims[channelsPos] == Shape::UNDEFINED_DIM || outBlkDims[channelsPos] % item.first != 0) {
|
||||
blocked = false;
|
||||
break;
|
||||
}
|
||||
@ -148,9 +140,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(itr->second->createDesc(inpPrecision, srcShape));
|
||||
config.inConfs[1].inPlace = -1;
|
||||
config.inConfs[1].constant = true;
|
||||
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector {1}));
|
||||
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{1}));
|
||||
if (INPUTS_NUM == 3) {
|
||||
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector{outputShapes.size()}));
|
||||
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{outputShapes.size()}));
|
||||
config.inConfs[2].constant = true;
|
||||
}
|
||||
|
||||
@ -174,38 +166,41 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
// Optimized inplace case
|
||||
for (auto refPdIndex : pdIndexesToReuse) {
|
||||
const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
|
||||
auto config = refConfig;
|
||||
const auto inBlockingDesc = refConfig.inConfs[0].desc->as<CpuBlockedMemoryDesc>();
|
||||
const auto& order = inBlockingDesc->getOrder();
|
||||
const auto& blkDims = inBlockingDesc->getBlockDims();
|
||||
auto numOfDim = blkDims.size();
|
||||
// TODO [DS]: inplace
|
||||
if (!isDynamicNode()) {
|
||||
for (auto refPdIndex : pdIndexesToReuse) {
|
||||
const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
|
||||
auto config = refConfig;
|
||||
const auto inBlockingDesc = refConfig.inConfs[0].desc->as<CpuBlockedMemoryDesc>();
|
||||
const auto& order = inBlockingDesc->getOrder();
|
||||
const auto& blkDims = inBlockingDesc->getBlockDims();
|
||||
auto numOfDim = blkDims.size();
|
||||
|
||||
SizeVector offsets(numOfDim, 0lu);
|
||||
SizeVector strides(numOfDim);
|
||||
strides.back() = 1lu;
|
||||
size_t offset = (std::numeric_limits<size_t>::max)();
|
||||
SizeVector offsets(numOfDim, 0lu);
|
||||
SizeVector strides(numOfDim);
|
||||
strides.back() = 1lu;
|
||||
size_t offset = (std::numeric_limits<size_t>::max)();
|
||||
|
||||
for (size_t i = 2; i <= numOfDim; i++) {
|
||||
if (numOfDim - i < axis) {
|
||||
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
|
||||
} else {
|
||||
strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
|
||||
for (size_t i = 2; i <= numOfDim; i++) {
|
||||
if (numOfDim - i < axis) {
|
||||
strides[numOfDim - i] = (std::numeric_limits<size_t>::max)();
|
||||
} else {
|
||||
strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(inpPrecision, srcShape, blkDims, order, offset, offsets, strides);
|
||||
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
auto outBlockingDesc = refConfig.outConfs[i].desc->as<CpuBlockedMemoryDesc>();
|
||||
const auto& outBlkDims = outBlockingDesc->getBlockDims();
|
||||
const auto& dims = outBlockingDesc->getShape().getStaticDims();
|
||||
|
||||
config.outConfs[i].inPlace = 0;
|
||||
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides);
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
|
||||
}
|
||||
|
||||
config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(inpPrecision, srcShape, blkDims, order, offset, offsets, strides);
|
||||
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
auto outBlockingDesc = refConfig.outConfs[i].desc->as<CpuBlockedMemoryDesc>();
|
||||
const auto& outBlkDims = outBlockingDesc->getBlockDims();
|
||||
const auto& dims = outBlockingDesc->getShape().getStaticDims();
|
||||
|
||||
config.outConfs[i].inPlace = 0;
|
||||
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides);
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
|
||||
}
|
||||
|
||||
// Special nspc -> ncsp case when splitting channels
|
||||
@ -219,9 +214,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createSharedDesc(inpPrecision, srcShape);
|
||||
config.inConfs[1].inPlace = -1;
|
||||
config.inConfs[1].constant = true;
|
||||
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector{1}));
|
||||
config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{1}));
|
||||
if (INPUTS_NUM == 3) {
|
||||
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(SizeVector{outputShapes.size()}));
|
||||
config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(axisPrecision, Shape(VectorDims{outputShapes.size()}));
|
||||
config.inConfs[2].constant = true;
|
||||
}
|
||||
config.outConfs.resize(outputShapes.size());
|
||||
@ -235,63 +230,75 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::createPrimitive() {
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
THROW_ERROR << "Input memory has not been allocated.";
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
if (!getChildEdgeAt(i)->getMemoryPtr() || !getChildEdgeAt(i)->getMemory().GetPrimitivePtr())
|
||||
THROW_ERROR << "Destination memory has not been allocated.";
|
||||
bool MKLDNNSplitNode::needPrepareParams() const {
|
||||
if (isOptimized()) {
|
||||
return false;
|
||||
}
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
return MKLDNNNode::inputShapesModified();
|
||||
}
|
||||
|
||||
auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->getDesc();
|
||||
void MKLDNNSplitNode::prepareParams() {
|
||||
const auto &srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) {
|
||||
THROW_ERROR << "has not allocated input memory";
|
||||
}
|
||||
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) {
|
||||
canUseOptimizedNspc2Ncsp = true;
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->getDesc();
|
||||
if (!childMemDesc.hasLayoutType(LayoutType::ncsp))
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
dstMemPtrs.clear();
|
||||
std::vector<BlockedMemoryDescCPtr> outDescs;
|
||||
for (size_t i = 0; i < outputShapes.size(); ++i) {
|
||||
const auto &outMemPtr = this->getChildEdgesAtPort(i)[0]->getMemoryPtr();
|
||||
if (!outMemPtr || !outMemPtr->GetPrimitivePtr()) {
|
||||
THROW_ERROR << "has not allocated destination memory";
|
||||
}
|
||||
|
||||
if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outMemPtr->GetPtr())) {
|
||||
dstMemPtrs.push_back(dstData);
|
||||
} else {
|
||||
THROW_ERROR << "can't get child edge indx " << i << "data.";
|
||||
}
|
||||
|
||||
if (!canUseOptimizedNspc2Ncsp) {
|
||||
outDescs.push_back(outMemPtr->GetDescWithType<BlockedMemoryDesc>());
|
||||
}
|
||||
}
|
||||
|
||||
if (!isOptimized()) {
|
||||
initializeDstMemPtrs();
|
||||
if (!canUseOptimizedNspc2Ncsp)
|
||||
prepareOptimizedParams();
|
||||
if (!canUseOptimizedNspc2Ncsp) {
|
||||
const auto inDesc = srcMemPtr->GetDescWithType<BlockedMemoryDesc>();
|
||||
execPtr = std::make_shared<SplitOptimizedExecutor>(inDesc, outDescs, axis);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::createPrimitive() {
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::execute(mkldnn::stream strm) {
|
||||
if (isOptimized())
|
||||
if (isOptimized()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (dstMemPtrs.empty())
|
||||
THROW_ERROR << "Output data pointers have not been initialized.";
|
||||
|
||||
int MB = batchToProcess();
|
||||
const auto &srcMem = getParentEdgesAtPort(0)[0]->getMemory();
|
||||
size_t batch = srcMem.getStaticDims()[0];
|
||||
Dim MB = isDynamicNode() ? batch : batchToProcess();
|
||||
|
||||
if (canUseOptimizedNspc2Ncsp) {
|
||||
optimizedNspc2Ncsp(MB);
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
size_t batch = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[0];
|
||||
|
||||
if (batch != MB)
|
||||
optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;
|
||||
|
||||
parallel_for2d(dstMemPtrs.size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
|
||||
uint8_t* dstData = dstMemPtrs[i];
|
||||
|
||||
cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]],
|
||||
&srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride],
|
||||
optimizedParams.dataSize[i]);
|
||||
});
|
||||
uint8_t* srcData = reinterpret_cast<uint8_t*>(srcMem.GetPtr());
|
||||
IE_ASSERT(execPtr != nullptr);
|
||||
execPtr->exec(srcData, dstMemPtrs, batch, MB);
|
||||
}
|
||||
|
||||
bool MKLDNNSplitNode::created() const {
|
||||
@ -303,61 +310,70 @@ bool MKLDNNSplitNode::isOptimized() const {
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
|
||||
if (!isOptimized()) {
|
||||
MKLDNNNode::initOptimalPrimitiveDescriptor();
|
||||
return;
|
||||
}
|
||||
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
auto config = selected_pd->getConfig();
|
||||
if (isConfigDefined(config))
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < config.inConfs.size(); i++) {
|
||||
if (config.inConfs[i].desc->isDefined())
|
||||
continue;
|
||||
if (!isOptimized()) {
|
||||
MKLDNNNode::initOptimalPrimitiveDescriptor();
|
||||
} else if (!isConfigDefined(config)) {
|
||||
for (size_t i = 0; i < config.inConfs.size(); i++) {
|
||||
if (config.inConfs[i].desc->isDefined())
|
||||
continue;
|
||||
|
||||
int num = getParentEdgeAt(i)->getOutputNum();
|
||||
if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
|
||||
if (num >= 0) {
|
||||
const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
|
||||
if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0)
|
||||
getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
|
||||
if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) {
|
||||
config.inConfs[i].desc = parentConfig.desc;
|
||||
continue;
|
||||
int num = getParentEdgeAt(i)->getOutputNum();
|
||||
if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
|
||||
if (num >= 0) {
|
||||
const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
|
||||
if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0)
|
||||
getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
|
||||
if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) {
|
||||
config.inConfs[i].desc = parentConfig.desc;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reset undefined offsets
|
||||
config.inConfs[i].desc = config.inConfs[i].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
}
|
||||
if (config.outConfs.size() != outputShapes.size())
|
||||
THROW_ERROR << "has invalid config";
|
||||
|
||||
// reset undefined offsets
|
||||
config.inConfs[i].desc = config.inConfs[i].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
}
|
||||
if (config.outConfs.size() != outputShapes.size())
|
||||
THROW_ERROR << "has invalid config";
|
||||
auto firstInBlockingDesc = config.inConfs[0].desc->as<BlockedMemoryDesc>();
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
auto oldDesc = config.outConfs[i].desc;
|
||||
auto outBlockingDesc = oldDesc->as<BlockedMemoryDesc>();
|
||||
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outBlockingDesc->getPrecision(),
|
||||
outBlockingDesc->getShape(),
|
||||
outBlockingDesc->getBlockDims(),
|
||||
outBlockingDesc->getOrder(),
|
||||
firstInBlockingDesc->getOffsetPadding() + offset,
|
||||
firstInBlockingDesc->getOffsetPaddingToData(),
|
||||
firstInBlockingDesc->getStrides());
|
||||
|
||||
auto firstInBlockingDesc = config.inConfs[0].desc->as<BlockedMemoryDesc>();
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
auto oldDesc = config.outConfs[i].desc;
|
||||
auto outBlockingDesc = oldDesc->as<BlockedMemoryDesc>();
|
||||
config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(outBlockingDesc->getPrecision(),
|
||||
outBlockingDesc->getShape(),
|
||||
outBlockingDesc->getBlockDims(),
|
||||
outBlockingDesc->getOrder(),
|
||||
firstInBlockingDesc->getOffsetPadding() + offset,
|
||||
firstInBlockingDesc->getOffsetPaddingToData(),
|
||||
firstInBlockingDesc->getStrides());
|
||||
|
||||
size_t axisSize = 1;
|
||||
for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) {
|
||||
axisSize *= outBlockingDesc->getBlockDims()[j];
|
||||
size_t axisSize = 1;
|
||||
for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) {
|
||||
axisSize *= outBlockingDesc->getBlockDims()[j];
|
||||
}
|
||||
offset += axisSize;
|
||||
}
|
||||
initDescriptor(config);
|
||||
}
|
||||
|
||||
config = selected_pd->getConfig();
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
IE_ASSERT(config.inConfs.size() > 0);
|
||||
const auto inConfDesc = config.inConfs[0].desc;
|
||||
if (axis == 1 && one_of(inConfDesc->getShape().getRank(), 4, 5) && inConfDesc->hasLayoutType(LayoutType::nspc)) {
|
||||
canUseOptimizedNspc2Ncsp = true;
|
||||
for (size_t i = 0; i < config.outConfs.size(); i++) {
|
||||
if (!config.outConfs[i].desc->hasLayoutType(LayoutType::ncsp))
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
}
|
||||
offset += axisSize;
|
||||
}
|
||||
initDescriptor(config);
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
@ -375,8 +391,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
}
|
||||
}
|
||||
|
||||
//check the descriptors and select the ones that have the same data format as the input
|
||||
|
||||
// check the descriptors and select the ones that have the same data format as the input
|
||||
std::vector<size_t> canSelectPrimitive;
|
||||
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
|
||||
auto parentEdge = getParentEdgeAt(0);
|
||||
@ -397,6 +412,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive[0]));
|
||||
return;
|
||||
}
|
||||
|
||||
// if there are more then one PD with similar data layouts - select the optimized one
|
||||
for (auto indx : canSelectPrimitive) {
|
||||
if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) {
|
||||
@ -463,55 +479,6 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
|
||||
dynBatchLim = lim;
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::prepareOptimizedParams() {
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
IE_THROW() << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
const auto inpTensorDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
||||
const auto outputPortsCount = outputShapes.size();
|
||||
|
||||
//find axis order position
|
||||
const auto& order = inpTensorDesc->getOrder();
|
||||
unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
|
||||
for (size_t i = 0; i < order.size(); ++i) {
|
||||
if (order[i] == axis) {
|
||||
axisOrderPos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (std::numeric_limits<unsigned>::max() == axisOrderPos) {
|
||||
THROW_ERROR << "Can't find the axis in the input tensor order list";
|
||||
}
|
||||
|
||||
uint8_t srcDataSize = inpTensorDesc->getPrecision().size();
|
||||
const auto& srcDims = inpTensorDesc->getBlockDims();
|
||||
const auto getRank = srcDims.size();
|
||||
|
||||
optimizedParams.countStrides = 1;
|
||||
for (int i = 0; i < axisOrderPos; i++)
|
||||
optimizedParams.countStrides *= srcDims[i];
|
||||
|
||||
optimizedParams.srcDataStride = 0;
|
||||
optimizedParams.dataSize.resize(outputPortsCount);
|
||||
|
||||
for (size_t i = 0; i < outputPortsCount; i++) {
|
||||
auto outputEdge = this->getChildEdgesAtPort(i).front();
|
||||
optimizedParams.dataSize[i] = srcDataSize;
|
||||
|
||||
auto desc = outputEdge->getMemory().getDesc().as<CpuBlockedMemoryDesc>();
|
||||
for (size_t j = axisOrderPos; j < getRank; j++)
|
||||
optimizedParams.dataSize[i] *= desc->getBlockDims()[j];
|
||||
|
||||
optimizedParams.srcDataStride += optimizedParams.dataSize[i];
|
||||
}
|
||||
|
||||
optimizedParams.srcDataOffsets.resize(outputPortsCount);
|
||||
optimizedParams.srcDataOffsets[0] = 0;
|
||||
for (size_t i = 1; i < outputPortsCount; i++) {
|
||||
optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
|
||||
auto parentEdge = getParentEdgeAt(0);
|
||||
const int rank = parentEdge->getMemory().GetShape().getRank();
|
||||
@ -534,7 +501,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
|
||||
auto dstData = dstMemPtrs[i];
|
||||
|
||||
size_t innerSize = 1;
|
||||
auto dims = outputShapes[i].getStaticDims();
|
||||
auto dims = getChildEdgesAtPort(i)[0]->getMemory().getStaticDims();
|
||||
|
||||
for (size_t j = axis; j < dims.size(); j++) {
|
||||
innerSize *= dims[j];
|
||||
@ -558,17 +525,62 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::initializeDstMemPtrs() {
|
||||
dstMemPtrs.clear();
|
||||
|
||||
for (size_t i = 0; i < outputShapes.size(); ++i) {
|
||||
auto outputEdges = this->getChildEdgesAtPort(i);
|
||||
if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outputEdges.front()->getMemoryPtr()->GetPtr())) {
|
||||
dstMemPtrs.push_back(dstData);
|
||||
} else {
|
||||
THROW_ERROR << "can't get child edge indx " << i << "data.";
|
||||
MKLDNNSplitNode::SplitOptimizedExecutor::SplitOptimizedExecutor(BlockedMemoryDescCPtr inDesc, const std::vector<BlockedMemoryDescCPtr> &outDescs,
|
||||
const size_t axis) {
|
||||
// find axis order position
|
||||
const auto& order = inDesc->getOrder();
|
||||
unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
|
||||
for (size_t i = 0; i < order.size(); ++i) {
|
||||
if (order[i] == axis) {
|
||||
axisOrderPos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (std::numeric_limits<unsigned>::max() == axisOrderPos) {
|
||||
IE_THROW() << "Can't create split executor, because can't find the axis in the input tensor order list";
|
||||
}
|
||||
|
||||
const auto outputPortsCount = outDescs.size();
|
||||
|
||||
uint8_t srcDataSize = inDesc->getPrecision().size();
|
||||
const auto& srcDims = inDesc->getBlockDims();
|
||||
const auto getRank = srcDims.size();
|
||||
|
||||
countStrides = 1;
|
||||
for (int i = 0; i < axisOrderPos; i++)
|
||||
countStrides *= srcDims[i];
|
||||
|
||||
srcDataStride = 0;
|
||||
dataSize.resize(outputPortsCount);
|
||||
|
||||
for (size_t i = 0; i < outputPortsCount; i++) {
|
||||
dataSize[i] = srcDataSize;
|
||||
for (size_t j = axisOrderPos; j < getRank; j++)
|
||||
dataSize[i] *= outDescs[i]->getBlockDims()[j];
|
||||
|
||||
srcDataStride += dataSize[i];
|
||||
}
|
||||
|
||||
srcDataOffsets.resize(outputPortsCount);
|
||||
srcDataOffsets[0] = 0;
|
||||
for (size_t i = 1; i < outputPortsCount; i++) {
|
||||
srcDataOffsets[i] = srcDataOffsets[i - 1] + dataSize[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vector<uint8_t*> &dstMemPtrs,
|
||||
const Dim origBatch, const Dim perInferBatch) {
|
||||
size_t execCountStrides = countStrides;
|
||||
if (origBatch != perInferBatch)
|
||||
execCountStrides = execCountStrides / origBatch * perInferBatch;
|
||||
|
||||
parallel_for2d(dstMemPtrs.size(), execCountStrides, [&](size_t i, size_t j) {
|
||||
uint8_t* dstData = dstMemPtrs[i];
|
||||
|
||||
cpu_memcpy(&dstData[j * dataSize[i]],
|
||||
&srcData[srcDataOffsets[i] + j * srcDataStride],
|
||||
dataSize[i]);
|
||||
});
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split);
|
||||
|
@ -30,9 +30,31 @@ public:
|
||||
return !isOptimized();
|
||||
}
|
||||
|
||||
bool needPrepareParams() const override;
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
|
||||
private:
|
||||
void prepareOptimizedParams();
|
||||
void initializeDstMemPtrs();
|
||||
struct SplitExecutor {
|
||||
virtual void exec(const uint8_t* srcData, const std::vector<uint8_t*> &dstMemPtrs,
|
||||
const Dim origBatch, const Dim perInferBatch) = 0;
|
||||
virtual ~SplitExecutor() = default;
|
||||
};
|
||||
std::shared_ptr<SplitExecutor> execPtr = nullptr;
|
||||
|
||||
struct SplitOptimizedExecutor : public SplitExecutor {
|
||||
public:
|
||||
SplitOptimizedExecutor(BlockedMemoryDescCPtr inDesc, const std::vector<BlockedMemoryDescCPtr> &outDescs, const size_t axis);
|
||||
void exec(const uint8_t* srcData, const std::vector<uint8_t*> &dstMemPtrs,
|
||||
const Dim origBatch, const Dim perInferBatch) override;
|
||||
|
||||
private:
|
||||
std::vector<size_t> dataSize;
|
||||
std::vector<size_t> srcDataOffsets;
|
||||
size_t srcDataStride;
|
||||
size_t countStrides;
|
||||
};
|
||||
|
||||
void optimizedNspc2Ncsp(size_t MB);
|
||||
|
||||
bool canUseOptimizedNspc2Ncsp;
|
||||
@ -40,13 +62,6 @@ private:
|
||||
size_t axis = 1;
|
||||
std::vector<uint8_t*> dstMemPtrs;
|
||||
|
||||
struct {
|
||||
std::vector<size_t> dataSize;
|
||||
std::vector<size_t> srcDataOffsets;
|
||||
size_t srcDataStride;
|
||||
size_t countStrides;
|
||||
} optimizedParams;
|
||||
|
||||
size_t INPUTS_NUM = 2;
|
||||
};
|
||||
|
||||
|
@ -39,7 +39,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// TODO: Issue 43417 sporadic issue, looks like an issue in test, reproducible only on Windows platform
|
||||
R"(.*decomposition1_batch=5_hidden_size=10_input_size=30_.*tanh.relu.*_clip=0_linear_before_reset=1.*_targetDevice=CPU_.*)",
|
||||
// Skip platforms that do not support BF16 (i.e. sse, avx, avx2)
|
||||
R"(.*BF16.*(jit_avx(?!5)|jit_sse|ref).*)",
|
||||
R"(.*(BF|bf)16.*(jit_avx(?!5)|jit_sse|ref).*)",
|
||||
// TODO: Incorrect blob sizes for node BinaryConvolution_X
|
||||
R"(.*BinaryConvolutionLayerTest.*)",
|
||||
R"(.*ClampLayerTest.*netPrc=(I64|I32).*)",
|
||||
@ -165,7 +165,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
if (!InferenceEngine::with_cpu_x86_avx512_core()) {
|
||||
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
|
||||
// tests are useless on such platforms
|
||||
retVector.emplace_back(R"(.*BF16.*)");
|
||||
retVector.emplace_back(R"(.*(BF|bf)16.*)");
|
||||
retVector.emplace_back(R"(.*bfloat16.*)");
|
||||
}
|
||||
|
||||
|
@ -2,56 +2,63 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::test;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
size_t, // Num splits
|
||||
int64_t, // Axis
|
||||
InferenceEngine::Precision, // Net precision
|
||||
std::vector<size_t>, // Input shapes
|
||||
std::vector<size_t>, // Used outputs indices
|
||||
std::string, // Target device name
|
||||
size_t, // Num splits
|
||||
int64_t, // Axis
|
||||
ElementType, // Net precision
|
||||
InputShape, // Input shapes
|
||||
std::vector<size_t>, // Used outputs indices
|
||||
CPUSpecificParams
|
||||
> splitCPUTestParams;
|
||||
|
||||
class SplitLayerCPUTest : public testing::WithParamInterface<splitCPUTestParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
virtual public SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<splitCPUTestParams> obj) {
|
||||
size_t numSplits;
|
||||
int64_t axis;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape, outIndices;
|
||||
std::string targetDevice;
|
||||
ElementType netPrecision;
|
||||
InputShape inputShapes;
|
||||
InferenceEngine::SizeVector outIndices;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = obj.param;
|
||||
std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||
result << "IS=";
|
||||
result << CommonTestUtils::partialShape2str({inputShapes.first}) << "_";
|
||||
result << "TS=";
|
||||
for (const auto& shape : inputShapes.second) {
|
||||
result << CommonTestUtils::vec2str(shape) << "_";
|
||||
}
|
||||
result << "numSplits=" << numSplits << "_";
|
||||
result << "axis=" << axis << "_";
|
||||
if (!outIndices.empty()) {
|
||||
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
|
||||
}
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "trgDev=" << targetDevice;
|
||||
result << "netPRC=" << netPrecision << "_";
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
size_t axis, numSplits;
|
||||
std::vector<size_t> inputShape, outIndices;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ElementType netPrecision;
|
||||
InputShape inputShapes;
|
||||
InferenceEngine::SizeVector outIndices;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(numSplits, axis, netPrecision, inputShape, outIndices, targetDevice, cpuParams) = this->GetParam();
|
||||
inPrc = outPrc = netPrecision;
|
||||
std::tie(numSplits, axis, netPrecision, inputShapes, outIndices, cpuParams) = this->GetParam();
|
||||
if (outIndices.empty()) {
|
||||
for (int i = 0; i < numSplits; ++i) {
|
||||
outIndices.push_back(i);
|
||||
@ -59,14 +66,15 @@ protected:
|
||||
}
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
selectedType += std::string("_") + inPrc.name();
|
||||
selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
init_input_shapes({inputShapes});
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto split = std::dynamic_pointer_cast<ngraph::opset5::Split>(ngraph::builder::makeSplit(paramOuts[0],
|
||||
ngPrc, numSplits, axis));
|
||||
netPrecision, numSplits, axis));
|
||||
ngraph::ResultVector results;
|
||||
|
||||
for (int i = 0; i < outIndices.size(); i++) {
|
||||
@ -85,8 +93,8 @@ protected:
|
||||
TEST_P(SplitLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
CheckPluginRelatedResults(executableNetwork, "Split");
|
||||
run();
|
||||
// CheckPluginRelatedResults(executableNetwork, "Split");
|
||||
}
|
||||
|
||||
namespace {
|
||||
@ -115,70 +123,150 @@ const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"}
|
||||
const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
|
||||
|
||||
// List of precisions natively supported by mkldnn.
|
||||
const std::vector<Precision> netPrecisions = {
|
||||
Precision::I8,
|
||||
Precision::I32,
|
||||
Precision::FP32,
|
||||
Precision::BF16
|
||||
const std::vector<ElementType> netPrecisions = {
|
||||
ElementType::i8,
|
||||
ElementType::i32,
|
||||
ElementType::f32,
|
||||
ElementType::bf16
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> outIndices3 = {{0, 1, 2}, {0, 1, 1, 0, 2}, {0, 0, 0, 2}};
|
||||
const std::vector<std::vector<size_t>> outIndices4 = {{0, 1, 2, 3}, {0, 1, 1, 0, 2, 3}, {0, 0, 0, 2, 3}};
|
||||
|
||||
const std::vector<InputShape> inputShapes4D_Nspc2NcspSpecial = {
|
||||
{ {}, {{3, 8, 11, 9}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 4, 5, 7},
|
||||
{3, 8, 5, 9},
|
||||
{5, 16, 1, 8}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 25}, {2, 10}},
|
||||
// target
|
||||
{
|
||||
{2, 8, 5, 7},
|
||||
{1, 4, 10, 2},
|
||||
{3, 16, 5, 9}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 28, 24, 9})),
|
||||
::testing::ValuesIn(inputShapes4D_Nspc2NcspSpecial),
|
||||
::testing::ValuesIn(outIndices4),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(perChannelsToPlanar_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes5D_Nspc2NcspSpecial = {
|
||||
{ {}, {{3, 9, 5, 9, 11}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 12, 5, 7, 5},
|
||||
{3, 6, 8, 9, 1},
|
||||
{5, 9, 1, 8, 2}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 25}, {2, 10}, {1, 64}},
|
||||
// target
|
||||
{
|
||||
{2, 6, 5, 7, 7},
|
||||
{1, 3, 10, 2, 11},
|
||||
{3, 9, 4, 9, 8}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 21, 24, 9, 15})),
|
||||
::testing::ValuesIn(inputShapes5D_Nspc2NcspSpecial),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(perChannelsToPlanar_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
const std::vector<InputShape> inputShapes4D_planar = {
|
||||
{ {}, {{3, 24, 24, 9}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 15, 12, 9},
|
||||
{3, 1, 9, 12},
|
||||
{5, 5, 6, 6}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 48}, {2, 48}},
|
||||
// target
|
||||
{
|
||||
{2, 5, 6, 9},
|
||||
{1, 7, 12, 6},
|
||||
{3, 11, 9, 3}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_planar, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(2, 3),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(inputShapes4D_planar),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes4D_block = {
|
||||
{ {}, {{3, 16, 12, 12}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, 16, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 16, 12, 12},
|
||||
{3, 16, 12, 12},
|
||||
{5, 16, 12, 12}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, 16, {1, 48}, {2, 24}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 12, 12},
|
||||
{1, 16, 12, 12},
|
||||
{3, 16, 12, 12}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(2, 3),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
|
||||
::testing::ValuesIn(inputShapes4D_block),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_4D)),
|
||||
::testing::Values(blocked8_4D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
|
||||
@ -186,43 +274,77 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16, SplitLayerCPUTest,
|
||||
::testing::Values(4),
|
||||
::testing::Values(2, 3),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12})),
|
||||
::testing::ValuesIn(inputShapes4D_block),
|
||||
::testing::ValuesIn(outIndices4),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_4D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
const std::vector<InputShape> inputShapes5D_planar = {
|
||||
{ {}, {{3, 5, 3, 6, 12}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 15, 12, 3, 9},
|
||||
{3, 1, 6, 12, 3},
|
||||
{5, 5, 6, 6, 6}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 48}, {2, 48}, {1, 40}},
|
||||
// target
|
||||
{
|
||||
{2, 5, 12, 3, 6},
|
||||
{1, 7, 12, 6, 9},
|
||||
{3, 11, 9, 3, 30}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_planar, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(0, 1),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
|
||||
::testing::ValuesIn(inputShapes5D_planar),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes5D_block = {
|
||||
{ {}, {{3, 16, 24, 12, 36}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, 16, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 16, 12, 24, 24},
|
||||
{3, 16, 12, 12, 12},
|
||||
{5, 16, 12, 12, 24}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, 16, {1, 48}, {2, 24}, {3, 64}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 12, 12, 24},
|
||||
{1, 16, 12, 12, 24},
|
||||
{3, 16, 12, 12, 12}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
|
||||
::testing::ValuesIn(inputShapes5D_block),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
|
||||
::testing::ValuesIn(outIndices4),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_5D)),
|
||||
::testing::Values(blocked8_5D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
|
||||
@ -230,43 +352,154 @@ INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16, SplitLayerCPUTest,
|
||||
::testing::Values(4),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({4, 64, 32, 12, 20})),
|
||||
::testing::ValuesIn(inputShapes5D_block),
|
||||
::testing::ValuesIn(outIndices4),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(blocked16_5D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes3D = {
|
||||
{ {}, {{14, 28, 21}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{7, 21, 14},
|
||||
{21, 7, 14},
|
||||
{21, 14, 7},
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 60}, {1, 50}, {1, 48}},
|
||||
// target
|
||||
{
|
||||
{14, 21, 7},
|
||||
{21, 7, 14},
|
||||
{7, 14, 21},
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split3D, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(7),
|
||||
::testing::Values(0, 1, 2),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({14, 42, 21})),
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes2D = {
|
||||
{ {}, {{6, 12}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1},
|
||||
// target
|
||||
{
|
||||
{2, 8},
|
||||
{10, 4},
|
||||
{2, 6},
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 60}, {1, 50}},
|
||||
// target
|
||||
{
|
||||
{2, 4},
|
||||
{4, 4},
|
||||
{6, 12},
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split2D, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(2),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({6, 12})),
|
||||
::testing::ValuesIn(inputShapes2D),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes1D = {
|
||||
{ {}, {{10}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1},
|
||||
// target
|
||||
{
|
||||
{5},
|
||||
{15},
|
||||
{10},
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 60}},
|
||||
// target
|
||||
{
|
||||
{15},
|
||||
{5},
|
||||
{10},
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split1D, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(5),
|
||||
::testing::Values(0),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({10})),
|
||||
::testing::ValuesIn(inputShapes1D),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
// ============================================== inPlace cases ============================================
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InputShape{ {}, {{3, 24, 24, 9}} }),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InputShape{ {}, {{4, 64, 32, 12}} }),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(blocked16_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InputShape{ {}, {{3, 24, 24, 9, 15}} }),
|
||||
::testing::ValuesIn(outIndices3),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(0, 1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InputShape{ {}, {{4, 64, 32, 12, 20}} }),
|
||||
::testing::ValuesIn(outIndices4),
|
||||
::testing::Values(blocked16_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,434 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
using namespace ov::test;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
InputShape,
|
||||
int64_t, // Axis
|
||||
std::vector<int>, // Split lengths
|
||||
ElementType, // Net precision
|
||||
CPUSpecificParams
|
||||
> varSplitCPUTestParams;
|
||||
|
||||
class VariadicSplitLayerCPUTest : public testing::WithParamInterface<varSplitCPUTestParams>,
|
||||
virtual public SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<varSplitCPUTestParams> obj) {
|
||||
InputShape shapes;
|
||||
int64_t axis;
|
||||
std::vector<int> splitLenght;
|
||||
ElementType netPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(shapes, axis, splitLenght, netPrecision, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=";
|
||||
result << CommonTestUtils::partialShape2str({shapes.first}) << "_";
|
||||
result << "TS=";
|
||||
for (const auto& shape : shapes.second) {
|
||||
result << CommonTestUtils::vec2str(shape) << "_";
|
||||
}
|
||||
result << "axis=" << axis << "_";
|
||||
result << "splitLenght=" << CommonTestUtils::vec2str(splitLenght) << "_";
|
||||
result << "netPRC=" << netPrecision << "_";
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
InputShape inputShapes;
|
||||
int64_t axis;
|
||||
std::vector<int> splitLenght;
|
||||
ElementType netPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(inputShapes, axis, splitLenght, netPrecision, cpuParams) = this->GetParam();
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
selectedType += std::string("_") + InferenceEngine::details::convertPrecision(netPrecision).name();
|
||||
|
||||
init_input_shapes({inputShapes});
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
|
||||
auto splitAxisOp = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i64, ngraph::Shape{}, std::vector<int64_t>{axis});
|
||||
auto splitLengthsOp = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i32, ngraph::Shape{splitLenght.size()}, splitLenght);
|
||||
auto varSplit = std::make_shared<ngraph::opset3::VariadicSplit>(paramOuts[0], splitAxisOp, splitLengthsOp);
|
||||
|
||||
varSplit->get_rt_info() = getCPUInfo();
|
||||
|
||||
function = std::make_shared<ngraph::Function>(varSplit, params, "VariadicSplitCPU");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(VariadicSplitLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
// CheckPluginRelatedResults(executableNetwork, "Split");
|
||||
}
|
||||
|
||||
namespace {
|
||||
const auto planar_4D_ref = CPUSpecificParams{{nchw}, {nchw}, {"ref"}, "ref"};
|
||||
const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"};
|
||||
|
||||
const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"};
|
||||
const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"};
|
||||
|
||||
const auto perChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
|
||||
const auto perChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
|
||||
|
||||
const auto perChannelsToPlanar_4D = CPUSpecificParams{{nhwc}, {nchw}, {}, "ref"};
|
||||
const auto perChannelsToPlanar_5D = CPUSpecificParams{{ndhwc}, {ncdhw}, {}, "ref"};
|
||||
|
||||
const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"};
|
||||
const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"};
|
||||
|
||||
const auto blocked8_4D_ref = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "ref"};
|
||||
const auto blocked8_5D_ref = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "ref"};
|
||||
|
||||
const auto blocked16_4D = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "unknown"};
|
||||
const auto blocked16_5D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "unknown"};
|
||||
|
||||
const auto blocked16_4D_ref = CPUSpecificParams{{nChw16c}, {nChw16c}, {}, "ref"};
|
||||
const auto blocked16_5D_ref = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {}, "ref"};
|
||||
|
||||
// List of precisions natively supported by mkldnn.
|
||||
const std::vector<ElementType> netPrecisions = {
|
||||
ElementType::i8,
|
||||
ElementType::i32,
|
||||
ElementType::f32,
|
||||
ElementType::bf16
|
||||
};
|
||||
|
||||
const std::vector<InputShape> inputShapes4D_Nspc2NcspSpecial = {
|
||||
{ {}, {{3, 5, 24, 9}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 8, 5, 7},
|
||||
{3, 9, 7, 9},
|
||||
{5, 6, 1, 8}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 25}, {2, 10}},
|
||||
// target
|
||||
{
|
||||
{2, 7, 5, 7},
|
||||
{1, 10, 10, 2},
|
||||
{3, 5, 6, 9}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_Nspc2NcspSpecial, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes4D_Nspc2NcspSpecial),
|
||||
::testing::Values(1),
|
||||
::testing::Values(std::vector<int>{1, 2, -1, 1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(perChannelsToPlanar_4D)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes5D_Nspc2NcspSpecial = {
|
||||
{ {}, {{3, 4, 7, 9, 3}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 6, 5, 7, 5},
|
||||
{3, 8, 6, 9, 1},
|
||||
{5, 9, 1, 8, 2}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 25}, {2, 10}, {1, 64}},
|
||||
// target
|
||||
{
|
||||
{2, 5, 5, 7, 7},
|
||||
{1, 4, 10, 2, 11},
|
||||
{3, 7, 5, 9, 8}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Nspc2NcspSpecial, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes5D_Nspc2NcspSpecial),
|
||||
::testing::Values(1),
|
||||
::testing::Values(std::vector<int>{2, 1, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(perChannelsToPlanar_5D)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes4D_planar = {
|
||||
{ {}, {{3, 6, 5, 6}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 9, 8, 7},
|
||||
{3, 8, 6, 5},
|
||||
{5, 3, 7, 6}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 48}, {2, 48}},
|
||||
// target
|
||||
{
|
||||
{2, 9, 5, 6},
|
||||
{1, 6, 9, 8},
|
||||
{3, 1, 6, 7}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_planar, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes4D_planar),
|
||||
::testing::Values(2, 3),
|
||||
::testing::Values(std::vector<int>{1, 3, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes4D_block = {
|
||||
{ {}, {{3, 16, 6, 7}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, 16, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 16, 8, 7},
|
||||
{3, 16, 7, 8},
|
||||
{5, 16, 9, 8}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, 16, {1, 48}, {2, 24}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 12, 6},
|
||||
{1, 16, 6, 9},
|
||||
{3, 16, 7, 6}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_Block8, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes4D_block),
|
||||
::testing::Values(2, 3),
|
||||
::testing::Values(std::vector<int>{2, 2, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(blocked8_4D_ref)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit4D_CPU_Block16, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes4D_block),
|
||||
::testing::Values(2, 3),
|
||||
::testing::Values(std::vector<int>{2, 2, -1, 1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(blocked16_4D_ref)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes5D_planar = {
|
||||
{ {}, {{3, 24, 4, 5, 6}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 2, 4, 6, 5},
|
||||
{3, 1, 6, 4, 5},
|
||||
{5, 6, 5, 7, 4}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {1, 64}, {1, 48}, {2, 48}, {2, 40}},
|
||||
// target
|
||||
{
|
||||
{2, 5, 4, 5, 6},
|
||||
{1, 7, 5, 4, 7},
|
||||
{3, 3, 5, 6, 4}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_planar, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes5D_planar),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::Values(std::vector<int>{2, 1, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes5D_block = {
|
||||
{ {}, {{3, 16, 8, 5, 6}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, 16, -1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 16, 5, 6, 7},
|
||||
{3, 16, 24, 5, 8},
|
||||
{5, 16, 6, 7, 5}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, 16, {1, 48}, {2, 24}, {2, 64}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 7, 6, 5},
|
||||
{1, 16, 6, 5, 7},
|
||||
{3, 16, 5, 7, 6}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Block8, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes5D_block),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::Values(std::vector<int>{1, 2, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(blocked8_5D_ref)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit5D_CPU_Block16, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes5D_block),
|
||||
::testing::Values(2, 3, 4),
|
||||
::testing::Values(std::vector<int>{2, 1, -1, 1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(blocked16_5D_ref)),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes3D = {
|
||||
{ {}, {{14, 7, 21}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{7, 21, 14},
|
||||
{21, 7, 14},
|
||||
{21, 14, 7},
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 60}, {1, 50}, {1, 48}},
|
||||
// target
|
||||
{
|
||||
{14, 21, 7},
|
||||
{21, 7, 14},
|
||||
{7, 14, 21},
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit3D, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::Values(0, 1, 2),
|
||||
::testing::Values(std::vector<int>{2, 4, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes2D = {
|
||||
{ {}, {{6, 12}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1},
|
||||
// target
|
||||
{
|
||||
{3, 8},
|
||||
{10, 4},
|
||||
{3, 6},
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 60}, {1, 50}},
|
||||
// target
|
||||
{
|
||||
{3, 4},
|
||||
{4, 4},
|
||||
{6, 12},
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit2D, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes2D),
|
||||
::testing::Values(0, 1),
|
||||
::testing::Values(std::vector<int>{2, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<InputShape> inputShapes1D = {
|
||||
{ {}, {{10}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1},
|
||||
// target
|
||||
{
|
||||
{5},
|
||||
{15},
|
||||
{10},
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 60}},
|
||||
// target
|
||||
{
|
||||
{15},
|
||||
{5},
|
||||
{10},
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplit1D, VariadicSplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes1D),
|
||||
::testing::Values(0),
|
||||
::testing::Values(std::vector<int>{2, 1, 1, -1}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"}, CPUSpecificParams{{}, {}, {"ref"}, "ref"})),
|
||||
VariadicSplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user