[CPU] Split layer nspc -> ncsp special case put back. (#3839)

2021-02-02 15:40:50 +03:00 · 2021-02-02 15:40:50 +03:00 · f0398212f8
commit f0398212f8
parent cc000e57e0
4 changed files with 216 additions and 41 deletions
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -6,6 +6,7 @@
 #include "common/cpu_memcpy.h"
 #include <legacy/ie_layers.h>
 #include <vector>
+#include <queue>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <climits>
@ -80,7 +81,7 @@ void MKLDNNSplitNode::getSupportedDescriptors() {
 void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
    using TensorDescFactory = std::function<TensorDesc(const Precision&, const SizeVector&)>;
    constexpr size_t channelsPos = 1lu;
-    // perform guard checks
+
    if (!supportedPrimitiveDescriptors.empty())
        return;

@ -218,6 +219,16 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
        }
        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
    }
+
+    // Special nspc -> ncsp case when splitting channels
+    if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) {
+        auto plain = makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref);
+        auto perChannel = makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref);
+
+        plain.getConfig().inConfs[0].desc = perChannel.getConfig().inConfs[0].desc;
+
+        supportedPrimitiveDescriptors.push_back(plain);
+    }
 }

 void MKLDNNSplitNode::createPrimitive() {
@ -231,23 +242,49 @@ void MKLDNNSplitNode::createPrimitive() {
    if (getSelectedPrimitiveDescriptor() == nullptr)
        THROW_ERROR << "Preferable primitive descriptor is not set.";

-    if (!isOptimized())
-        prepareOptimizedParams();
+    canUseOptimizedNspc2Ncsp = true;
+    if (axis != 1)
+        canUseOptimizedNspc2Ncsp = false;
+
+    if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC &&
+        getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC)
+        canUseOptimizedNspc2Ncsp = false;
+
+    for (size_t i = 0; i < getChildEdges().size(); i++) {
+        if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW &&
+            getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW)
+            canUseOptimizedNspc2Ncsp = false;
+    }
+
+    if (!isOptimized()) {
+        initializeDstMemPtrs();
+        if (!canUseOptimizedNspc2Ncsp)
+            prepareOptimizedParams();
+    }
 }

 void MKLDNNSplitNode::execute(mkldnn::stream strm) {
    if (isOptimized())
        return;

+    if (dstMemPtrs.empty())
+        THROW_ERROR << "Output data pointers have not been initialized.";
+
    int MB = batchToProcess();
+
+    if (canUseOptimizedNspc2Ncsp) {
+        optimizedNspc2Ncsp(MB);
+        return;
+    }
+
    uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
    size_t batch = this->getParentEdgeAt(0)->getDims()[0];

    if (batch != MB)
        optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;

-    parallel_for2d(this->getChildEdges().size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
-        uint8_t* dstData = optimizedParams.dstMemPtrs[i];
+    parallel_for2d(dstMemPtrs.size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
+        uint8_t* dstData = dstMemPtrs[i];

        cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]],
                   &srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride],
@ -346,7 +383,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
                inNum = 0;
            }
            if (MKLDNNExtensionUtils::initTensorsAreEqual(
-                    getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[0].desc,
+                    supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc,
                    parent_spd->getConfig().outConfs[inNum].desc)) {
                canSelectPrimitive.push_back(i);
            }
@ -364,6 +401,46 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
        }
    }

+    // if there are no inPlace, but more than one suitable configurations, select the one that matches the output layout
+    for (auto indx : canSelectPrimitive) {
+        bool outputDescFullMatch = true;
+        for (size_t i = 0; i < getChildEdges().size(); ++i) {
+            auto childEdge = getChildEdgeAt(i);
+            auto childPtr = childEdge->getChild();
+            auto& vecChildSpd = childPtr->getSupportedPrimitiveDescriptors();
+            const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[i].desc;
+
+            if (!vecChildSpd.empty()) {
+                int inNum = childEdge->getOutputNum();
+                if (inNum < 0) {
+                    inNum = 0;
+                }
+                bool hasMatchDesc = false;
+                for (auto& childSpd : vecChildSpd) {
+                    if (inNum >= childSpd.getConfig().inConfs.size()) {
+                        inNum = 0;
+                    }
+                    if (MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, childSpd.getConfig().inConfs[inNum].desc)) {
+                        hasMatchDesc = true;
+                        break;
+                    }
+                }
+                if (!hasMatchDesc) {
+                    outputDescFullMatch = false;
+                    break;
+                }
+            }
+        }
+        if (outputDescFullMatch) {
+            selectPrimitiveDescriptorByIndex(static_cast<int>(indx));
+            return;
+        }
+    }
+    if (!canSelectPrimitive.empty()) {
+        selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive.front()));
+        return;
+    }
+
    // if there are no matching data layouts, select first optimized implementation
    for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
        if (supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown) {
@ -384,50 +461,119 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) {

 void MKLDNNSplitNode::prepareOptimizedParams() {
    const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
+    const auto outputPortsCount = outDims.size();

    //find axis order position
    const auto& order = inpTensorDesc.getBlockingDesc().getOrder();
-    unsigned axisOrderPos = UINT_MAX;
+    unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
    for (size_t i = 0; i < order.size(); ++i) {
        if (order[i] == axis) {
            axisOrderPos = i;
            break;
        }
    }
-    if (UINT_MAX == axisOrderPos) {
+    if (std::numeric_limits<unsigned>::max() == axisOrderPos) {
        THROW_ERROR << "Can't find the axis in the input tensor order list";
    }

    uint8_t srcDataSize = inpTensorDesc.getPrecision().size();
    const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims();
-    int nDims = srcDims.size();
+    const auto nDims = srcDims.size();

    optimizedParams.countStrides = 1;
    for (int i = 0; i < axisOrderPos; i++)
        optimizedParams.countStrides *= srcDims[i];

    optimizedParams.srcDataStride = 0;
-    optimizedParams.dataSize.resize(this->getChildEdges().size());
-    optimizedParams.dstMemPtrs.clear();
-    for (int i = 0; i < this->getChildEdges().size(); i++) {
-        if (uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(i)->getMemoryPtr()->GetPtr())) {
-            optimizedParams.dstMemPtrs.push_back(dstData);
-        } else {
-            THROW_ERROR << "can't get child edge indx " << i << "data.";
-        }
+    optimizedParams.dataSize.resize(outputPortsCount);

+    for (size_t i = 0; i < outputPortsCount; i++) {
+        auto outputEdge = this->getChildEdgesAtPort(i).front();
        optimizedParams.dataSize[i] = srcDataSize;

-        for (int j = axisOrderPos; j < nDims; j++)
-            optimizedParams.dataSize[i] *= this->getChildEdgeAt(i)->getDesc().getBlockingDesc().getBlockDims()[j];
+        for (size_t j = axisOrderPos; j < nDims; j++)
+            optimizedParams.dataSize[i] *= outputEdge->getDesc().getBlockingDesc().getBlockDims()[j];

        optimizedParams.srcDataStride += optimizedParams.dataSize[i];
    }

-    optimizedParams.srcDataOffsets.resize(this->getChildEdges().size());
+    optimizedParams.srcDataOffsets.resize(outputPortsCount);
    optimizedParams.srcDataOffsets[0] = 0;
-    for (int i = 1; i < this->getChildEdges().size(); i++) {
+    for (size_t i = 1; i < outputPortsCount; i++) {
        optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1];
    }
 }
+void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
+    auto parentEdge = getParentEdgeAt(0);
+    const int ndims = parentEdge->getDims().ndims();
+    const size_t IC = parentEdge->getDims()[1];
+    const size_t D = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1;
+    const size_t H = parentEdge->getDims()[ndims - 2];
+    const size_t W = parentEdge->getDims()[ndims - 1];
+
+    auto srcBlob = parentEdge->getBlob();
+    auto srcData = srcBlob->cbuffer().as<const uint8_t*>();
+    const auto dataSize = srcBlob->getTensorDesc().getPrecision().size();
+
+    const size_t DHW = D*H*W;
+    const size_t strideIB = DHW * IC * dataSize;
+    const size_t strideIW = IC*dataSize;
+    const size_t strideOC = DHW * dataSize;
+
+    for (size_t i = 0, sIdx = 0; i < outDims.size(); i++) {
+        auto dstData = dstMemPtrs[i];
+
+        size_t innerSize = 1;
+        auto dims = outDims[i].ToSizeVector();
+
+        for (size_t j = axis; j < dims.size(); j++) {
+            innerSize *= dims[j];
+        }
+        auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx) * dataSize;
+
+        const size_t OC = dims[1];
+        const size_t strideOB = OC * strideOC;
+
+        parallel_for2d(MB, DHW, [&](size_t b, size_t j) {
+            auto localSrcPtr = srcPtr + b*strideIB + j*strideIW;
+            auto localDstPtr = dstData + b*strideOB + j*dataSize;
+            for (size_t c = 0; c < OC; c++) {
+                cpu_memcpy(localDstPtr, localSrcPtr, dataSize);
+                localSrcPtr += dataSize;
+                localDstPtr += strideOC;
+            }
+        });
+
+        sIdx += innerSize;
+    }
+}
+
+void MKLDNNSplitNode::initializeDstMemPtrs() {
+    dstMemPtrs.clear();
+
+    //Here we have to place the output data pointers in the order that reflects the output edges order.
+    //It's important in case when several edges are connected to one port.
+    //This is a naive implementation, an indexed priority queue or modified treap would be a more elegant solution.
+    std::unordered_map<uint8_t*, size_t> mapDstPtrs;
+    using pair_t = std::pair<uint8_t*, size_t>;
+    for (size_t i = 0; i < getChildEdges().size(); ++i) {
+        auto outputEdge = this->getChildEdgeAt(i);
+        if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outputEdge->getMemoryPtr()->GetPtr())) {
+            mapDstPtrs[dstData] = i;
+        } else {
+            THROW_ERROR << "can't get child edge indx " << i << "data.";
+        }
+    }
+
+    std::vector<uint8_t*> vecCountingSort(getChildEdges().size(), nullptr);
+    for (auto& item : mapDstPtrs) {
+        vecCountingSort[item.second] = item.first;
+    }
+
+    dstMemPtrs.reserve(vecCountingSort.size());
+    auto backInserter = std::back_inserter(dstMemPtrs);
+    std::copy_if(vecCountingSort.begin(), vecCountingSort.end(), backInserter, [](const uint8_t* x) {return x;});
+    dstMemPtrs.shrink_to_fit();
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -29,13 +29,17 @@ public:

 private:
    void prepareOptimizedParams();
+    void initializeDstMemPtrs();
+    void optimizedNspc2Ncsp(size_t MB);
+
+    bool canUseOptimizedNspc2Ncsp;

    size_t axis = 1;
+    std::vector<uint8_t*> dstMemPtrs;

    struct {
        std::vector<size_t> dataSize;
        std::vector<size_t> srcDataOffsets;
-        std::vector<uint8_t *> dstMemPtrs;
        size_t srcDataStride;
        size_t countStrides;
    } optimizedParams;
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/split.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2020 Intel Corporation
+// Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -91,8 +91,11 @@ const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"};
 const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"};
 const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"};

-const auto planarChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
-const auto planarChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
+const auto perChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
+const auto perChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
+
+const auto perChannelsToPlanar_4D = CPUSpecificParams{{nhwc}, {nchw}, {}, "ref"};
+const auto perChannelsToPlanar_5D = CPUSpecificParams{{ndhwc}, {ncdhw}, {}, "ref"};

 const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"};
 const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"};
@ -114,6 +117,28 @@ const std::vector<Precision> netPrecisions = {
        Precision::BF16
 };

+INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(4),
+                                ::testing::Values(1),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(std::vector<size_t>({3, 28, 24, 9})),
+                                ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(perChannelsToPlanar_4D)),
+                        SplitLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Values(3),
+                                ::testing::Values(1),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(std::vector<size_t>({3, 21, 24, 9, 15})),
+                                ::testing::Values(std::vector<size_t>({})),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(perChannelsToPlanar_5D)),
+                        SplitLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
                    ::testing::Combine(
                            ::testing::Values(3),
@ -122,7 +147,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
                            ::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
                            ::testing::Values(std::vector<size_t>({})),
                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                            ::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D)),
+                            ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)),
                    SplitLayerCPUTest::getTestCaseName);

 INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
@ -133,7 +158,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
                                ::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                ::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D_ref)),
+                                ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D_ref)),
                        SplitLayerCPUTest::getTestCaseName);

 INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
@ -166,7 +191,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
                                ::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                ::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D)),
+                                ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)),
                        SplitLayerCPUTest::getTestCaseName);

 INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
@ -177,7 +202,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
                                ::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                                ::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D_ref)),
+                                ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D_ref)),
                        SplitLayerCPUTest::getTestCaseName);

 INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -230,27 +230,27 @@ INSTANTIATE_TEST_CASE_P(
                split_test_params {
                        {1, 24, 2, 5},
                        {{1, 16, 2, 5}, {1, 8, 2, 5}},
-                        1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
+                        1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
                },
                split_test_params {
                        {1, 20, 2, 5},
                        {{1, 13, 2, 5}, {1, 7, 2, 5}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
+                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
                },
                split_test_params {
                        {1, 20, 2, 5},
                        {{1, 10, 2, 5}, {1, 10, 2, 5}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
+                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
                },
                split_test_params {
                        {2, 20, 2, 5},
                        {{2, 10, 2, 5}, {2, 10, 2, 5}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
+                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
                },
                split_test_params {
                        {2, 20, 2, 5},
                        {{2, 15, 2, 5}, {2,  5, 2, 5}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
+                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
                },
                split_test_params {
                        {9, 11, 7, 5},
@ -275,7 +275,7 @@ INSTANTIATE_TEST_CASE_P(
                split_test_params {
                        {5, 6, 7, 15},
                        {{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
+                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
                },
                split_test_params {
                        {5, 6, 7, 15},
@ -290,15 +290,15 @@ INSTANTIATE_TEST_CASE_P(
                split_test_params {
                        {5, 6, 7, 15},
                        {{5, 6, 7, 15}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
+                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
                split_test_params {
                        {1, 32, 16, 16, 16},
                        {{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
-                        1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
+                        1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
                split_test_params {
                        {1, 32, 16, 16, 16},
                        {{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
-                        1, 5, MKLDNNPlugin::impl_desc_type::unknown, {}}));
+                        1, 6, MKLDNNPlugin::impl_desc_type::unknown, {}}));

 class MKLDNNGraphDynBatchSplitTests: public MKLDNNGraphSplitTests {
 protected: