[CPU] Added dynamism support for DepthToSpace (#8375)

This commit is contained in:
Alexandra Sidorova 2021-11-15 12:06:41 +03:00 committed by GitHub
parent f3e1dc25b2
commit c981d2f0dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 364 additions and 231 deletions

View File

@ -17,17 +17,11 @@
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using namespace mkldnn;
using namespace mkldnn::impl;
using namespace mkldnn::impl::cpu::x64;
bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto depthToSpace = std::dynamic_pointer_cast<const ngraph::opset1::DepthToSpace>(op);
auto depthToSpace = ov::as_type_ptr<const ngraph::opset1::DepthToSpace>(op);
if (!depthToSpace) {
errorMessage = "Only opset1 DepthToSpace operation is supported";
return false;
@ -46,75 +40,58 @@ bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ng
MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
const auto depthToSpace = std::dynamic_pointer_cast<const ngraph::opset1::DepthToSpace>(op);
const auto modeNgraph = depthToSpace->get_mode();
if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) {
mode = Mode::BLOCKS_FIRST;
} else if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST) {
mode = Mode::DEPTH_FIRST;
} else {
THROW_ERROR << "doesn't support mode: " << ngraph::as_string(modeNgraph);
}
blockSize = depthToSpace->get_block_size();
if (blockSize == 0)
THROW_ERROR << "has incorrect block_size parameter is zero!";
size_t nSpatialDims = inputShapes[0].getRank() - 2;
blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
} else {
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
}
}
if (inputShapes.size() != 1 || outputShapes.size() != 1)
THROW_ERROR << "has incorrect number of input/output edges!";
void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {
SizeVector srcDims = inputShapes[0].getStaticDims();
if (srcDims.size() < 3)
THROW_ERROR << "has incorrect number of input dimensions";
if (srcDims.size() > 5)
THROW_ERROR << "doesn't support dimensions with rank greater than 5";
auto depthToSpace = ov::as_type_ptr<const ngraph::opset1::DepthToSpace>(op);
if (!depthToSpace)
THROW_ERROR << "supports only opset1";
SizeVector dstDims = outputShapes[0].getStaticDims();
if (srcDims.size() != dstDims.size())
THROW_ERROR << "has incorrect number of input/output dimensions";
if (srcDims[1] % blockStep)
THROW_ERROR << "has block_size parameter which is incompatible with input tensor channels dimension size";
if (srcDims[1] / blockStep != dstDims[1])
THROW_ERROR << "has incompatible input/output channels";
size_t nSpatialDims = srcDims.size() - 2;
for (size_t i = 0; i < nSpatialDims; ++i) {
if (srcDims[i + 2] * blockSize != dstDims[i + 2])
THROW_ERROR << "has incompatible spatial dims";
const auto modeNgraph = depthToSpace->get_mode();
if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) {
attrs.mode = Mode::BLOCKS_FIRST;
} else if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST) {
attrs.mode = Mode::DEPTH_FIRST;
} else {
THROW_ERROR << "doesn't support mode: " << ngraph::as_string(modeNgraph);
}
if (getParentEdges().size() != 1)
THROW_ERROR << "has incorrect number of input edges";
if (getChildEdges().empty())
THROW_ERROR << "has incorrect number of output edges";
attrs.blockSize = depthToSpace->get_block_size();
if (attrs.blockSize == 0)
THROW_ERROR << "has incorrect block_size parameter is zero!";
const size_t srcRank = getInputShapeAtPort(0).getRank();
const size_t dstRank = getOutputShapeAtPort(0).getRank();
if (srcRank < 3)
THROW_ERROR << "has incorrect number of input dimensions";
if (srcRank > 5)
THROW_ERROR << "doesn't support dimensions with rank greater than 5";
if (srcRank != dstRank)
THROW_ERROR << "has incorrect number of input/output dimensions";
const size_t nSpatialDims = srcRank - 2;
attrs.blockStep = static_cast<size_t>(std::pow(attrs.blockSize, nSpatialDims));
}
void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {}
void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
auto srcDims = getInputShapeAtPort(0).getStaticDims();
const size_t nDims = srcDims.size();
impl_desc_type impl_type;
if (mayiuse(impl::cpu::x64::avx512_common)) {
impl_desc_type impl_type = impl_desc_type::ref;
if (cpu::x64::mayiuse(cpu::x64::avx512_common)) {
impl_type = impl_desc_type::jit_avx512;
} else if (mayiuse(cpu::x64::avx2)) {
} else if (cpu::x64::mayiuse(cpu::x64::avx2)) {
impl_type = impl_desc_type::jit_avx2;
} else if (mayiuse(cpu::x64::sse41)) {
} else if (cpu::x64::mayiuse(cpu::x64::sse41)) {
impl_type = impl_desc_type::jit_sse42;
} else {
impl_type = impl_desc_type::ref;
}
NodeConfig config;
@ -126,11 +103,15 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
config.outConfs[0].inPlace = -1;
config.outConfs[0].constant = false;
const auto& inputDataShape = getInputShapeAtPort(0);
const auto& outputDataShape = getOutputShapeAtPort(0);
std::vector<LayoutType> supportedTypes;
if (nDims > 2) {
if (inputDataShape.getRank() > 2) {
const auto& srcDims = inputDataShape.getDims();
auto canUseBlocked = [=](const size_t block) {
return srcDims[1] % block == 0 && (srcDims[1] / block) % blockStep == 0 &&
(mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true);
return srcDims[1] != Shape::UNDEFINED_DIM && srcDims[1] % block == 0 && (srcDims[1] / block) % attrs.blockStep == 0 &&
(attrs.mode == Mode::DEPTH_FIRST ? block % attrs.blockStep == 0 : true);
};
supportedTypes.push_back(LayoutType::nspc);
@ -141,18 +122,18 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
}
supportedTypes.push_back(LayoutType::ncsp);
auto creators = BlockedDescCreator::getCommonCreators();
auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
auto range = BlockedDescCreator::makeFilteredRange(creators, inputDataShape.getRank(), supportedTypes);
for (auto itr = range.first; itr != range.second; ++itr) {
config.inConfs[0].desc = itr->second->createSharedDesc(precision, getInputShapeAtPort(0));
config.outConfs[0].desc = itr->second->createSharedDesc(precision, getOutputShapeAtPort(0));
config.inConfs[0].desc = itr->second->createSharedDesc(precision, inputDataShape);
config.outConfs[0].desc = itr->second->createSharedDesc(precision, outputDataShape);
supportedPrimitiveDescriptors.emplace_back(config, impl_type);
}
}
void MKLDNNDepthToSpaceNode::createPrimitive() {
auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_ERROR << "has not allocated destination memory";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
@ -160,24 +141,44 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_ERROR << "has unidentified preferable primitive descriptor";
VectorDims srcDims = srcMemPtr->getStaticDims();
const auto& memoryDesc = srcMemPtr->getDesc();
attrs.dataSize = memoryDesc.getPrecision().size();
attrs.nSpatialDims = memoryDesc.getShape().getRank() - 2;
attrs.layoutType = memoryDesc.hasLayoutType(LayoutType::nCsp16c) ? LayoutType::nCsp16c :
memoryDesc.hasLayoutType(LayoutType::nCsp8c) ? LayoutType::nCsp8c :
memoryDesc.hasLayoutType(LayoutType::nspc) ? LayoutType::nspc : LayoutType::ncsp;
size_t nDims = srcDims.size();
const size_t nSpatialDims = nDims - 2;
const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) ||
getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c);
const size_t reshapedRank = nDims + nSpatialDims + static_cast<int>(isBlocked) + static_cast<int>(isBlocked && mode == Mode::DEPTH_FIRST);
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
void MKLDNNDepthToSpaceNode::prepareParams() {
attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
execPtr = std::make_shared<DepthToSpaceExecutor>(attrs);
}
MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) {
if (!MKLDNNPlugin::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp))
IE_THROW() << "DepthToSpace executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts.";
const bool isBlocked = MKLDNNPlugin::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c);
const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc;
const size_t nDims = attrs.srcBlockedDims.size();
const size_t reshapedRank = nDims + attrs.nSpatialDims + static_cast<int>(isBlocked && attrs.mode == Mode::DEPTH_FIRST);
const size_t lastIdx = reshapedRank - 1;
size_t firstSpatialOrder = 2;
PermuteParams params;
params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
params.data_size = attrs.dataSize;
params.order.resize(reshapedRank, 0);
params.src_block_order.resize(reshapedRank);
params.dst_block_order.resize(reshapedRank);
params.dst_block_dims.resize(reshapedRank);
params.src_block_dims.resize(reshapedRank);
params.src_block_dims[0] = srcDims[0];
params.src_block_dims[0] = attrs.srcBlockedDims[0];
// reshaping of src dimensions and creating the permutation order for each layout:
// new shape: mode = blocks_first [N, block_size, block_size, ..., block_size, C / (block_size ^ K), D1, D2, ..., DK]
@ -186,62 +187,58 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
// mode = depth_first : [0, 1, K + 2, 2, K + 3, 3, K + 4, 4, ..., K + (K + 1), K + 1]
// where `k` is number of spatial dimensions
auto reshapeAndSetPermOrder = [&](const size_t idx1, const size_t idx2, const size_t shift, const SizeVector& dims) {
for (size_t i = 0; i < nSpatialDims; i++) {
auto reshapeAndSetPermOrder = [&](const size_t idx1, const size_t idx2, const size_t shift, const VectorDims& dims) {
for (size_t i = 0; i < attrs.nSpatialDims; i++) {
params.order[i * 2 + shift] = i + idx1;
params.order[i * 2 + shift + 1] = i + idx2;
params.src_block_dims[params.order[i * 2 + shift]] = dims[i + shift];
params.src_block_dims[params.order[i * 2 + shift + 1]] = blockSize;
params.src_block_dims[params.order[i * 2 + shift + 1]] = attrs.blockSize;
}
};
if (isBlocked) {
VectorDims srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
size_t orderShiftForBlocks, orderShiftForDims;
if (mode == Mode::BLOCKS_FIRST) {
if (attrs.mode == Mode::BLOCKS_FIRST) {
orderShiftForBlocks = 1;
orderShiftForDims = nSpatialDims + 2;
orderShiftForDims = attrs.nSpatialDims + 2;
params.src_block_dims[nSpatialDims + 1] = srcBlockedDims[1] / blockStep;
params.src_block_dims[lastIdx] = srcBlockedDims.back();
params.src_block_dims[attrs.nSpatialDims + 1] = attrs.srcBlockedDims[1] / attrs.blockStep;
params.src_block_dims[lastIdx] = attrs.srcBlockedDims.back();
params.order[1] = nSpatialDims + 1;
params.order[1] = attrs.nSpatialDims + 1;
params.order[lastIdx] = lastIdx;
} else {
orderShiftForBlocks = nSpatialDims + 4;
orderShiftForBlocks = attrs.nSpatialDims + 4;
orderShiftForDims = 3;
size_t newBlockSize = srcBlockedDims.back() / blockStep;
size_t newBlocksCount = srcBlockedDims[1] / blockStep;
size_t newBlockSize = attrs.srcBlockedDims.back() / attrs.blockStep;
size_t newBlocksCount = attrs.srcBlockedDims[1] / attrs.blockStep;
params.src_block_dims[1] = newBlocksCount;
params.src_block_dims[2] = srcBlockedDims[1] / newBlocksCount;
params.src_block_dims[lastIdx - nSpatialDims] = newBlockSize;
params.src_block_dims[2] = attrs.srcBlockedDims[1] / newBlocksCount;
params.src_block_dims[lastIdx - attrs.nSpatialDims] = newBlockSize;
params.order[1] = 1;
params.order[2] = 3;
params.order[lastIdx - 1] = 2;
params.order[lastIdx] = lastIdx - nSpatialDims;
params.order[lastIdx] = lastIdx - attrs.nSpatialDims;
}
reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims);
} else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) {
srcDims.push_back(srcDims[1]);
srcDims.erase(srcDims.begin() + 1);
reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, attrs.srcBlockedDims);
} else if (isChannelsFirst) {
firstSpatialOrder = 1;
size_t shift = static_cast<size_t>(mode == DEPTH_FIRST) + nSpatialDims + 1;
params.order[lastIdx] = mode == Mode::DEPTH_FIRST ? nSpatialDims + 1 : lastIdx;
params.src_block_dims[params.order[lastIdx]] = srcDims.back() / blockStep;
size_t shift = static_cast<size_t>(attrs.mode == DEPTH_FIRST) + attrs.nSpatialDims + 1;
params.order[lastIdx] = attrs.mode == Mode::DEPTH_FIRST ? attrs.nSpatialDims + 1 : lastIdx;
params.src_block_dims[params.order[lastIdx]] = attrs.srcBlockedDims.back() / attrs.blockStep;
reshapeAndSetPermOrder(firstSpatialOrder, shift, firstSpatialOrder, srcDims);
reshapeAndSetPermOrder(firstSpatialOrder, shift, firstSpatialOrder, attrs.srcBlockedDims);
} else {
size_t shift = static_cast<size_t>(mode == DEPTH_FIRST) + 1;
params.order[1] = mode == DEPTH_FIRST ? 1 : nSpatialDims + 1;
params.src_block_dims[params.order[1]] = srcDims[1] / blockStep;
size_t shift = static_cast<size_t>(attrs.mode == DEPTH_FIRST) + 1;
params.order[1] = attrs.mode == DEPTH_FIRST ? 1 : attrs.nSpatialDims + 1;
params.src_block_dims[params.order[1]] = attrs.srcBlockedDims[1] / attrs.blockStep;
reshapeAndSetPermOrder(nSpatialDims + firstSpatialOrder, shift, firstSpatialOrder, srcDims);
reshapeAndSetPermOrder(attrs.nSpatialDims + firstSpatialOrder, shift, firstSpatialOrder, attrs.srcBlockedDims);
}
std::iota(params.src_block_order.begin(), params.src_block_order.end(), 0);
@ -252,11 +249,27 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params));
}
void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) {
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
void MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) {
if (!permuteKernel)
IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled.";
permuteKernel->execute(srcData, dstData, batchToProcess());
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(srcMemPtr->GetPtr());
uint8_t* dstData = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
permuteKernel->execute(srcData, dstData, MB);
}
void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) {
if (!execPtr) {
THROW_ERROR << "doesn't have a compiled executor.";
}
int MB = isDynamicNode() ? getParentEdgeAt(0)->getMemoryPtr()->getStaticDims()[0] : batchToProcess();
execPtr->exec(getParentEdgeAt(0)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr(), MB);
}
void MKLDNNDepthToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNDepthToSpaceNode::created() const {

View File

@ -15,13 +15,17 @@ class MKLDNNDepthToSpaceNode : public MKLDNNNode {
public:
MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void prepareParams() override;
protected:
void executeDynamicImpl(mkldnn::stream strm) override;
private:
enum Mode {
@ -29,11 +33,26 @@ private:
DEPTH_FIRST = 1
};
Mode mode;
size_t blockSize;
size_t blockStep;
struct DepthToSpaceAttrs {
LayoutType layoutType;
Mode mode;
size_t blockSize = 0lu;
size_t blockStep = 0lu;
size_t dataSize = 1lu;
size_t nSpatialDims = 0lu;
VectorDims srcBlockedDims;
} attrs;
std::unique_ptr<PermuteKernel> permuteKernel;
struct DepthToSpaceExecutor {
DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs);
void exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB);
~DepthToSpaceExecutor() = default;
private:
std::unique_ptr<PermuteKernel> permuteKernel;
};
using executorPtr = std::shared_ptr<DepthToSpaceExecutor>;
executorPtr execPtr = nullptr;
};
} // namespace MKLDNNPlugin

View File

@ -2,56 +2,78 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <shared_test_classes/single_layer/depth_to_space.hpp>
#include "shared_test_classes/single_layer/depth_to_space.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ngraph::opset3;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
LayerTestsDefinitions::depthToSpaceParamsTuple,
using DepthToSpaceLayerCPUTestParamSet = std::tuple<
InputShape, // Input shape
ElementType, // Input element type
DepthToSpace::DepthToSpaceMode, // Mode
std::size_t, // Block size
CPUSpecificParams
> DepthToSpaceLayerCPUTestParamSet;
>;
class DepthToSpaceLayerCPUTest : public testing::WithParamInterface<DepthToSpaceLayerCPUTestParamSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<DepthToSpaceLayerCPUTestParamSet> obj) {
LayerTestsDefinitions::depthToSpaceParamsTuple basicParamsSet;
InputShape shapes;
ElementType inType;
DepthToSpace::DepthToSpaceMode mode;
std::size_t blockSize;
CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = obj.param;
std::tie(shapes, inType, mode, blockSize, cpuParams) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::DepthToSpaceLayerTest::getTestCaseName(
testing::TestParamInfo<LayerTestsDefinitions::depthToSpaceParamsTuple>(basicParamsSet, 0));
std::ostringstream results;
results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
results << "TS=";
for (const auto& item : shapes.second) {
results << CommonTestUtils::vec2str(item) << "_";
}
results << "Prc=" << inType << "_";
switch (mode) {
case DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST:
results << "BLOCKS_FIRST_";
break;
case DepthToSpace::DepthToSpaceMode::DEPTH_FIRST:
results << "DEPTH_FIRST_";
break;
default:
throw std::runtime_error("Unsupported DepthToSpaceMode");
}
results << "BS=" << blockSize << "_";
results << CPUTestsBase::getTestCaseName(cpuParams);
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
return results.str();
}
protected:
void SetUp() override {
LayerTestsDefinitions::depthToSpaceParamsTuple basicParamsSet;
CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::vector<size_t> inputShape;
InputShape shapes;
ElementType inType;
DepthToSpace::DepthToSpaceMode mode;
std::size_t blockSize;
InferenceEngine::Precision netPrecision;
std::tie(inputShape, netPrecision, mode, blockSize, targetDevice) = basicParamsSet;
CPUSpecificParams cpuParams;
std::tie(shapes, inType, mode, blockSize, cpuParams) = this->GetParam();
inPrc = outPrc = netPrecision;
selectedType = getPrimitiveType() + "_" + inPrc.name();
auto inPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(inPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto d2s = ngraph::builder::makeDepthToSpace(paramOuts[0], mode, blockSize);
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
if (selectedType.empty()) {
selectedType = getPrimitiveType();
}
selectedType = selectedType + "_" + InferenceEngine::details::convertPrecision(inType).name();
targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes({shapes});
auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
auto d2s = ngraph::builder::makeDepthToSpace(params[0], mode, blockSize);
d2s->get_rt_info() = getCPUInfo();
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(d2s)};
function = std::make_shared<ngraph::Function>(results, params, "DepthToSpaceCPU");
@ -61,35 +83,51 @@ protected:
TEST_P(DepthToSpaceLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "DepthToSpace");
run();
// TODO: need to uncomment when this method will be updated
// CheckPluginRelatedResults(executableNetwork, "DepthToSpace");
}
namespace {
const auto cpuParams_nChw16c = CPUSpecificParams {{nChw16c}, {nChw16c}, {"jit_avx512"}, {"jit_avx512"}};
const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {nCdhw16c}, {"jit_avx512"}, {"jit_avx512"}};
const auto cpuParams_nChw16c = CPUSpecificParams {{nChw16c}, {nChw16c}, {}, {}};
const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {nCdhw16c}, {}, {}};
const auto cpuParams_nChw8c_avx2 = CPUSpecificParams {{nChw8c}, {nChw8c}, {"jit_avx2"}, {"jit_avx2"}};
const auto cpuParams_nCdhw8c_avx2 = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {"jit_avx2"}, {"jit_avx2"}};
const auto cpuParams_nChw8c = CPUSpecificParams {{nChw8c}, {nChw8c}, {}, {}};
const auto cpuParams_nCdhw8c = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {}, {}};
const auto cpuParams_nChw8c_sse42 = CPUSpecificParams {{nChw8c}, {nChw8c}, {"jit_sse42"}, {"jit_sse42"}};
const auto cpuParams_nCdhw8c_sse42 = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {"jit_sse42"}, {"jit_sse42"}};
const auto cpuParams_nhwc = CPUSpecificParams {{nhwc}, {nhwc}, {}, {}};
const auto cpuParams_ndhwc = CPUSpecificParams {{ndhwc}, {ndhwc}, {}, {}};
const auto cpuParams_nhwc_avx2 = CPUSpecificParams {{nhwc}, {nhwc}, {"jit_avx2"}, {"jit_avx2"}};
const auto cpuParams_ndhwc_avx2 = CPUSpecificParams {{ndhwc}, {ndhwc}, {"jit_avx2"}, {"jit_avx2"}};
const auto cpuParams_nchw = CPUSpecificParams {{nchw}, {nchw}, {}, {}};
const auto cpuParams_ncdhw = CPUSpecificParams {{ncdhw}, {ncdhw}, {}, {}};
const auto cpuParams_nhwc_sse42 = CPUSpecificParams {{nhwc}, {nhwc}, {"jit_sse42"}, {"jit_sse42"}};
const auto cpuParams_ndhwc_sse42 = CPUSpecificParams {{ndhwc}, {ndhwc}, {"jit_sse42"}, {"jit_sse42"}};
const std::vector<CPUSpecificParams> CPUParams4D = {
cpuParams_nhwc,
cpuParams_nchw
};
const auto cpuParams_nhwc_ref = CPUSpecificParams {{nhwc}, {nhwc}, {"ref_any"}, {"ref_any"}};
const auto cpuParams_ndhwc_ref = CPUSpecificParams {{ndhwc}, {ndhwc}, {"ref_any"}, {"ref_any"}};
const std::vector<CPUSpecificParams> CPUParamsBlocked4D = {
cpuParams_nChw16c,
cpuParams_nChw8c,
cpuParams_nhwc
};
const std::vector<CPUSpecificParams> CPUParams5D = {
cpuParams_ndhwc,
cpuParams_ncdhw
};
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::BF16,
InferenceEngine::Precision::I8
const std::vector<CPUSpecificParams> CPUParamsBlocked5D = {
cpuParams_nCdhw16c,
cpuParams_nCdhw8c,
cpuParams_ndhwc
};
const std::vector<ElementType> inputElementType = {
ElementType::f32,
ElementType::bf16,
ElementType::i8
};
const std::vector<DepthToSpace::DepthToSpaceMode> depthToSpaceModes = {
@ -97,99 +135,162 @@ const std::vector<DepthToSpace::DepthToSpaceMode> depthToSpaceModes = {
DepthToSpace::DepthToSpaceMode::DEPTH_FIRST
};
const std::vector<std::vector<size_t >> inputShapesBS2_4D = {
{1, 64, 1, 1}, {1, 64, 1, 3}, {1, 128, 3, 3}, {2, 128, 1, 1}, {1, 192, 2, 2}, {2, 256, 2, 3}, {1, 512, 2, 1}
/* *========================* Static Shapes Tests *========================* */
namespace static_shapes {
const std::vector<ov::Shape> inputShapesBS2_4D = {
{1, 64, 1, 1},
{1, 64, 1, 3},
{1, 128, 3, 3},
{2, 128, 1, 1},
{1, 192, 2, 2},
{2, 256, 2, 3},
{1, 512, 2, 1}
};
const std::vector<std::vector<size_t >> inputShapesBS3_4D = {
{1, 27, 1, 1}, {1, 27, 2, 3}, {1, 18, 2, 3}, {3, 18, 1, 1}, {2, 18, 3, 1}
const std::vector<ov::Shape> inputShapesBS3_4D = {
{1, 27, 1, 1},
{1, 27, 2, 3},
{1, 18, 2, 3},
{3, 18, 1, 1},
{2, 18, 3, 1}
};
const std::vector<CPUSpecificParams> CPUParamsBS2_4D = {
cpuParams_nChw16c,
cpuParams_nChw8c_avx2,
cpuParams_nChw8c_sse42,
cpuParams_nhwc_avx2,
cpuParams_nhwc_sse42,
cpuParams_nhwc_ref,
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS2_4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS2_4D)),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceStaticBS3_4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS3_4D)),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParams4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<ov::Shape> inputShapesBS2_5D = {
{1, 128, 1, 1, 1},
{1, 128, 2, 1, 2},
{1, 256, 2, 1, 3},
{2, 256, 3, 1, 1},
{1, 384, 1, 2, 2},
{2, 512, 1, 2, 1}
};
const auto depthToSpaceBS2_4DParams = testing::Combine(
testing::Combine(
testing::ValuesIn(inputShapesBS2_4D),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2),
testing::Values(CommonTestUtils::DEVICE_CPU)),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS2_4D))
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS2_4D, DepthToSpaceLayerCPUTest, depthToSpaceBS2_4DParams, DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<CPUSpecificParams> CPUParamsBS3_4D = {
cpuParams_nhwc_avx2,
cpuParams_nhwc_sse42,
cpuParams_nhwc_ref,
const std::vector<ov::Shape> inputShapesBS3_5D = {
{1, 54, 1, 1, 1},
{1, 54, 2, 1, 2},
{3, 54, 1, 1, 1},
{2, 54, 3, 1, 2},
{1, 54, 3, 2, 2}
};
const auto depthToSpaceBS3_4DParams = testing::Combine(
testing::Combine(
testing::ValuesIn(inputShapesBS3_4D),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3),
testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS3_4D))
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceStaticBS2_5D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS2_5D)),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked5D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS3_4D, DepthToSpaceLayerCPUTest, depthToSpaceBS3_4DParams, DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceStaticBS3_5D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS3_5D)),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParams5D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<std::vector<size_t >> inputShapesBS2_5D = {
{1, 128, 1, 1, 1}, {1, 128, 2, 1, 2}, {1, 256, 2, 1, 3}, {2, 256, 3, 1, 1}, {1, 384, 1, 2, 2}, {2, 512, 1, 2, 1}
} // namespace static_shapes
/* *========================* *==================* *========================* */
/* *========================* Dynamic Shapes Tests *========================* */
namespace dynamic_shapes {
const std::vector<InputShape> inputShapes4D = {
{{-1, -1, -1 , -1}, // dynamic
{{2, 36, 1, 1}, {1, 36, 3, 1}, {1, 72, 1, 4}}}, // target
{{-1, 576, -1 , -1}, // dynamic
{{1, 576, 1, 1}, {1, 576, 2, 2}, {3, 576, 4, 1}}}, // target
{{{1, 5}, {36, 72}, {1, 16}, {1, 16}}, // dynamic
{{3, 36, 4, 4}, {1, 36, 16, 12}, {3, 72, 8, 8}}}, // target
};
const std::vector<std::vector<size_t >> inputShapesBS3_5D = {
{1, 54, 1, 1, 1}, {1, 54, 2, 1, 2}, {3, 54, 1, 1, 1}, {2, 54, 3, 1, 2}, {1, 54, 3, 2, 2}
const std::vector<InputShape> inputShapes5D = {
{{-1, -1, -1, -1, -1}, // dynamic
{{2, 216, 1, 1, 1}, {1, 216, 3, 1, 2}, {1, 432, 2, 3, 1}}}, // target
{{{1, 3}, {216, 432}, {1, 4}, {1, 4}, {1, 4}}, // dynamic
{{3, 216, 2, 2, 2}, {1, 432, 1, 1, 1}}}, // target
};
const std::vector<CPUSpecificParams> CPUParamsBS2_5D = {
cpuParams_nCdhw16c,
cpuParams_nCdhw8c_avx2,
cpuParams_nCdhw8c_sse42,
cpuParams_ndhwc_avx2,
cpuParams_ndhwc_sse42,
cpuParams_ndhwc_ref,
const std::vector<InputShape> inputShapesBlocked5D = {
{{-1, 256, -1, -1, -1}, // dynamic
{{1, 256, 1, 1, 1}, {1, 256, 2, 1, 4}, {3, 256, 4, 1, 2}}}, // target
{{{1, 3}, 256, {1, 3}, {1, 3}, {1, 3}}, // dynamic
{{1, 256, 1, 1, 1}, {1, 256, 2, 1, 3}, {3, 256, 3, 1, 2}}}, // target
};
const auto depthToSpaceBS2_5DParams = testing::Combine(
testing::Combine(
testing::ValuesIn(inputShapesBS2_5D),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2),
testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS2_5D))
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamic4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(inputShapes4D),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParams4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS2_5D, DepthToSpaceLayerCPUTest, depthToSpaceBS2_5DParams, DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamicBlocksFirstBlocked4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::Values(inputShapes4D[1]),
testing::ValuesIn(inputElementType),
testing::Values(DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST),
testing::Values(1, 2, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<CPUSpecificParams> CPUParamsBS3_5D = {
cpuParams_ndhwc_avx2,
cpuParams_ndhwc_sse42,
cpuParams_ndhwc_ref,
};
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamicDepthFirstBlocked4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::Values(inputShapes4D[1]),
testing::ValuesIn(inputElementType),
testing::Values(DepthToSpace::DepthToSpaceMode::DEPTH_FIRST),
testing::Values(1, 2),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const auto depthToSpaceBS3_5DParams = testing::Combine(
testing::Combine(
testing::ValuesIn(inputShapesBS3_5D),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3),
testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS3_5D))
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamic5D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(inputShapes5D),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParams5D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS3_5D, DepthToSpaceLayerCPUTest, depthToSpaceBS3_5DParams, DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamicCPUSpecific5D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(inputShapesBlocked5D),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked5D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
} // namespace dynamic_shapes
/* *========================* *==================* *========================* */
} // namespace
} // namespace CPULayerTestsDefinitions