[CPU] Split layer nspc -> ncsp special case put back. (#3839)
This commit is contained in:
parent
cc000e57e0
commit
f0398212f8
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <climits>
|
||||
@ -80,7 +81,7 @@ void MKLDNNSplitNode::getSupportedDescriptors() {
|
||||
void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
using TensorDescFactory = std::function<TensorDesc(const Precision&, const SizeVector&)>;
|
||||
constexpr size_t channelsPos = 1lu;
|
||||
// perform guard checks
|
||||
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
@ -218,6 +219,16 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
|
||||
}
|
||||
|
||||
// Special nspc -> ncsp case when splitting channels
|
||||
if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) {
|
||||
auto plain = makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref);
|
||||
auto perChannel = makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref);
|
||||
|
||||
plain.getConfig().inConfs[0].desc = perChannel.getConfig().inConfs[0].desc;
|
||||
|
||||
supportedPrimitiveDescriptors.push_back(plain);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::createPrimitive() {
|
||||
@ -231,23 +242,49 @@ void MKLDNNSplitNode::createPrimitive() {
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
|
||||
if (!isOptimized())
|
||||
prepareOptimizedParams();
|
||||
canUseOptimizedNspc2Ncsp = true;
|
||||
if (axis != 1)
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
|
||||
if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC &&
|
||||
getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC)
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW &&
|
||||
getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW)
|
||||
canUseOptimizedNspc2Ncsp = false;
|
||||
}
|
||||
|
||||
if (!isOptimized()) {
|
||||
initializeDstMemPtrs();
|
||||
if (!canUseOptimizedNspc2Ncsp)
|
||||
prepareOptimizedParams();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::execute(mkldnn::stream strm) {
|
||||
if (isOptimized())
|
||||
return;
|
||||
|
||||
if (dstMemPtrs.empty())
|
||||
THROW_ERROR << "Output data pointers have not been initialized.";
|
||||
|
||||
int MB = batchToProcess();
|
||||
|
||||
if (canUseOptimizedNspc2Ncsp) {
|
||||
optimizedNspc2Ncsp(MB);
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
size_t batch = this->getParentEdgeAt(0)->getDims()[0];
|
||||
|
||||
if (batch != MB)
|
||||
optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;
|
||||
|
||||
parallel_for2d(this->getChildEdges().size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
|
||||
uint8_t* dstData = optimizedParams.dstMemPtrs[i];
|
||||
parallel_for2d(dstMemPtrs.size(), optimizedParams.countStrides, [&](size_t i, size_t j) {
|
||||
uint8_t* dstData = dstMemPtrs[i];
|
||||
|
||||
cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]],
|
||||
&srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride],
|
||||
@ -346,7 +383,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
inNum = 0;
|
||||
}
|
||||
if (MKLDNNExtensionUtils::initTensorsAreEqual(
|
||||
getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[0].desc,
|
||||
supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc,
|
||||
parent_spd->getConfig().outConfs[inNum].desc)) {
|
||||
canSelectPrimitive.push_back(i);
|
||||
}
|
||||
@ -364,6 +401,46 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
|
||||
}
|
||||
}
|
||||
|
||||
// if there are no inPlace, but more than one suitable configurations, select the one that matches the output layout
|
||||
for (auto indx : canSelectPrimitive) {
|
||||
bool outputDescFullMatch = true;
|
||||
for (size_t i = 0; i < getChildEdges().size(); ++i) {
|
||||
auto childEdge = getChildEdgeAt(i);
|
||||
auto childPtr = childEdge->getChild();
|
||||
auto& vecChildSpd = childPtr->getSupportedPrimitiveDescriptors();
|
||||
const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[i].desc;
|
||||
|
||||
if (!vecChildSpd.empty()) {
|
||||
int inNum = childEdge->getOutputNum();
|
||||
if (inNum < 0) {
|
||||
inNum = 0;
|
||||
}
|
||||
bool hasMatchDesc = false;
|
||||
for (auto& childSpd : vecChildSpd) {
|
||||
if (inNum >= childSpd.getConfig().inConfs.size()) {
|
||||
inNum = 0;
|
||||
}
|
||||
if (MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, childSpd.getConfig().inConfs[inNum].desc)) {
|
||||
hasMatchDesc = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasMatchDesc) {
|
||||
outputDescFullMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (outputDescFullMatch) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(indx));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!canSelectPrimitive.empty()) {
|
||||
selectPrimitiveDescriptorByIndex(static_cast<int>(canSelectPrimitive.front()));
|
||||
return;
|
||||
}
|
||||
|
||||
// if there are no matching data layouts, select first optimized implementation
|
||||
for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) {
|
||||
if (supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown) {
|
||||
@ -384,50 +461,119 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
|
||||
|
||||
void MKLDNNSplitNode::prepareOptimizedParams() {
|
||||
const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
|
||||
const auto outputPortsCount = outDims.size();
|
||||
|
||||
//find axis order position
|
||||
const auto& order = inpTensorDesc.getBlockingDesc().getOrder();
|
||||
unsigned axisOrderPos = UINT_MAX;
|
||||
unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
|
||||
for (size_t i = 0; i < order.size(); ++i) {
|
||||
if (order[i] == axis) {
|
||||
axisOrderPos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (UINT_MAX == axisOrderPos) {
|
||||
if (std::numeric_limits<unsigned>::max() == axisOrderPos) {
|
||||
THROW_ERROR << "Can't find the axis in the input tensor order list";
|
||||
}
|
||||
|
||||
uint8_t srcDataSize = inpTensorDesc.getPrecision().size();
|
||||
const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims();
|
||||
int nDims = srcDims.size();
|
||||
const auto nDims = srcDims.size();
|
||||
|
||||
optimizedParams.countStrides = 1;
|
||||
for (int i = 0; i < axisOrderPos; i++)
|
||||
optimizedParams.countStrides *= srcDims[i];
|
||||
|
||||
optimizedParams.srcDataStride = 0;
|
||||
optimizedParams.dataSize.resize(this->getChildEdges().size());
|
||||
optimizedParams.dstMemPtrs.clear();
|
||||
for (int i = 0; i < this->getChildEdges().size(); i++) {
|
||||
if (uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(i)->getMemoryPtr()->GetPtr())) {
|
||||
optimizedParams.dstMemPtrs.push_back(dstData);
|
||||
} else {
|
||||
THROW_ERROR << "can't get child edge indx " << i << "data.";
|
||||
}
|
||||
optimizedParams.dataSize.resize(outputPortsCount);
|
||||
|
||||
for (size_t i = 0; i < outputPortsCount; i++) {
|
||||
auto outputEdge = this->getChildEdgesAtPort(i).front();
|
||||
optimizedParams.dataSize[i] = srcDataSize;
|
||||
|
||||
for (int j = axisOrderPos; j < nDims; j++)
|
||||
optimizedParams.dataSize[i] *= this->getChildEdgeAt(i)->getDesc().getBlockingDesc().getBlockDims()[j];
|
||||
for (size_t j = axisOrderPos; j < nDims; j++)
|
||||
optimizedParams.dataSize[i] *= outputEdge->getDesc().getBlockingDesc().getBlockDims()[j];
|
||||
|
||||
optimizedParams.srcDataStride += optimizedParams.dataSize[i];
|
||||
}
|
||||
|
||||
optimizedParams.srcDataOffsets.resize(this->getChildEdges().size());
|
||||
optimizedParams.srcDataOffsets.resize(outputPortsCount);
|
||||
optimizedParams.srcDataOffsets[0] = 0;
|
||||
for (int i = 1; i < this->getChildEdges().size(); i++) {
|
||||
for (size_t i = 1; i < outputPortsCount; i++) {
|
||||
optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1];
|
||||
}
|
||||
}
|
||||
void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
|
||||
auto parentEdge = getParentEdgeAt(0);
|
||||
const int ndims = parentEdge->getDims().ndims();
|
||||
const size_t IC = parentEdge->getDims()[1];
|
||||
const size_t D = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1;
|
||||
const size_t H = parentEdge->getDims()[ndims - 2];
|
||||
const size_t W = parentEdge->getDims()[ndims - 1];
|
||||
|
||||
auto srcBlob = parentEdge->getBlob();
|
||||
auto srcData = srcBlob->cbuffer().as<const uint8_t*>();
|
||||
const auto dataSize = srcBlob->getTensorDesc().getPrecision().size();
|
||||
|
||||
const size_t DHW = D*H*W;
|
||||
const size_t strideIB = DHW * IC * dataSize;
|
||||
const size_t strideIW = IC*dataSize;
|
||||
const size_t strideOC = DHW * dataSize;
|
||||
|
||||
for (size_t i = 0, sIdx = 0; i < outDims.size(); i++) {
|
||||
auto dstData = dstMemPtrs[i];
|
||||
|
||||
size_t innerSize = 1;
|
||||
auto dims = outDims[i].ToSizeVector();
|
||||
|
||||
for (size_t j = axis; j < dims.size(); j++) {
|
||||
innerSize *= dims[j];
|
||||
}
|
||||
auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx) * dataSize;
|
||||
|
||||
const size_t OC = dims[1];
|
||||
const size_t strideOB = OC * strideOC;
|
||||
|
||||
parallel_for2d(MB, DHW, [&](size_t b, size_t j) {
|
||||
auto localSrcPtr = srcPtr + b*strideIB + j*strideIW;
|
||||
auto localDstPtr = dstData + b*strideOB + j*dataSize;
|
||||
for (size_t c = 0; c < OC; c++) {
|
||||
cpu_memcpy(localDstPtr, localSrcPtr, dataSize);
|
||||
localSrcPtr += dataSize;
|
||||
localDstPtr += strideOC;
|
||||
}
|
||||
});
|
||||
|
||||
sIdx += innerSize;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::initializeDstMemPtrs() {
|
||||
dstMemPtrs.clear();
|
||||
|
||||
//Here we have to place the output data pointers in the order that reflects the output edges order.
|
||||
//It's important in case when several edges are connected to one port.
|
||||
//This is a naive implementation, an indexed priority queue or modified treap would be a more elegant solution.
|
||||
std::unordered_map<uint8_t*, size_t> mapDstPtrs;
|
||||
using pair_t = std::pair<uint8_t*, size_t>;
|
||||
for (size_t i = 0; i < getChildEdges().size(); ++i) {
|
||||
auto outputEdge = this->getChildEdgeAt(i);
|
||||
if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outputEdge->getMemoryPtr()->GetPtr())) {
|
||||
mapDstPtrs[dstData] = i;
|
||||
} else {
|
||||
THROW_ERROR << "can't get child edge indx " << i << "data.";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint8_t*> vecCountingSort(getChildEdges().size(), nullptr);
|
||||
for (auto& item : mapDstPtrs) {
|
||||
vecCountingSort[item.second] = item.first;
|
||||
}
|
||||
|
||||
dstMemPtrs.reserve(vecCountingSort.size());
|
||||
auto backInserter = std::back_inserter(dstMemPtrs);
|
||||
std::copy_if(vecCountingSort.begin(), vecCountingSort.end(), backInserter, [](const uint8_t* x) {return x;});
|
||||
dstMemPtrs.shrink_to_fit();
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split);
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -29,13 +29,17 @@ public:
|
||||
|
||||
private:
|
||||
void prepareOptimizedParams();
|
||||
void initializeDstMemPtrs();
|
||||
void optimizedNspc2Ncsp(size_t MB);
|
||||
|
||||
bool canUseOptimizedNspc2Ncsp;
|
||||
|
||||
size_t axis = 1;
|
||||
std::vector<uint8_t*> dstMemPtrs;
|
||||
|
||||
struct {
|
||||
std::vector<size_t> dataSize;
|
||||
std::vector<size_t> srcDataOffsets;
|
||||
std::vector<uint8_t *> dstMemPtrs;
|
||||
size_t srcDataStride;
|
||||
size_t countStrides;
|
||||
} optimizedParams;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -91,8 +91,11 @@ const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"};
|
||||
const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"};
|
||||
const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"};
|
||||
|
||||
const auto planarChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
|
||||
const auto planarChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
|
||||
const auto perChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"};
|
||||
const auto perChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"};
|
||||
|
||||
const auto perChannelsToPlanar_4D = CPUSpecificParams{{nhwc}, {nchw}, {}, "ref"};
|
||||
const auto perChannelsToPlanar_5D = CPUSpecificParams{{ndhwc}, {ncdhw}, {}, "ref"};
|
||||
|
||||
const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"};
|
||||
const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"};
|
||||
@ -114,6 +117,28 @@ const std::vector<Precision> netPrecisions = {
|
||||
Precision::BF16
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(4),
|
||||
::testing::Values(1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 28, 24, 9})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(perChannelsToPlanar_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
::testing::Values(1),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({3, 21, 24, 9, 15})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(perChannelsToPlanar_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(3),
|
||||
@ -122,7 +147,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D)),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
|
||||
@ -133,7 +158,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest,
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D_ref)),
|
||||
::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
@ -166,7 +191,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest,
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D)),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
|
||||
@ -177,7 +202,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest,
|
||||
::testing::Values(std::vector<size_t>({3, 24, 24, 9, 15})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D_ref)),
|
||||
::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D_ref)),
|
||||
SplitLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest,
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -230,27 +230,27 @@ INSTANTIATE_TEST_CASE_P(
|
||||
split_test_params {
|
||||
{1, 24, 2, 5},
|
||||
{{1, 16, 2, 5}, {1, 8, 2, 5}},
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 20, 2, 5},
|
||||
{{1, 13, 2, 5}, {1, 7, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{1, 20, 2, 5},
|
||||
{{1, 10, 2, 5}, {1, 10, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 10, 2, 5}, {2, 10, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{2, 20, 2, 5},
|
||||
{{2, 15, 2, 5}, {2, 5, 2, 5}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{9, 11, 7, 5},
|
||||
@ -275,7 +275,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
|
||||
},
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
@ -290,15 +290,15 @@ INSTANTIATE_TEST_CASE_P(
|
||||
split_test_params {
|
||||
{5, 6, 7, 15},
|
||||
{{5, 6, 7, 15}},
|
||||
1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
split_test_params {
|
||||
{1, 32, 16, 16, 16},
|
||||
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
|
||||
split_test_params {
|
||||
{1, 32, 16, 16, 16},
|
||||
{{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
|
||||
1, 5, MKLDNNPlugin::impl_desc_type::unknown, {}}));
|
||||
1, 6, MKLDNNPlugin::impl_desc_type::unknown, {}}));
|
||||
|
||||
class MKLDNNGraphDynBatchSplitTests: public MKLDNNGraphSplitTests {
|
||||
protected:
|
||||
|
Loading…
Reference in New Issue
Block a user