[CPU] Added dynamism support for DepthToSpace (#8375)

This commit is contained in:
Alexandra Sidorova 2021-11-15 12:06:41 +03:00 committed by GitHub
parent f3e1dc25b2
commit c981d2f0dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 364 additions and 231 deletions

View File

@ -17,17 +17,11 @@
using namespace MKLDNNPlugin; using namespace MKLDNNPlugin;
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace mkldnn;
using namespace mkldnn::impl; using namespace mkldnn::impl;
using namespace mkldnn::impl::cpu::x64;
bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept { bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try { try {
if (isDynamicNgraphNode(op)) { auto depthToSpace = ov::as_type_ptr<const ngraph::opset1::DepthToSpace>(op);
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto depthToSpace = std::dynamic_pointer_cast<const ngraph::opset1::DepthToSpace>(op);
if (!depthToSpace) { if (!depthToSpace) {
errorMessage = "Only opset1 DepthToSpace operation is supported"; errorMessage = "Only opset1 DepthToSpace operation is supported";
return false; return false;
@ -46,75 +40,58 @@ bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ng
MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) { : MKLDNNNode(op, eng, cache) {
std::string errorMessage; std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) { if (!isSupportedOperation(op, errorMessage)) {
const auto depthToSpace = std::dynamic_pointer_cast<const ngraph::opset1::DepthToSpace>(op);
const auto modeNgraph = depthToSpace->get_mode();
if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) {
mode = Mode::BLOCKS_FIRST;
} else if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST) {
mode = Mode::DEPTH_FIRST;
} else {
THROW_ERROR << "doesn't support mode: " << ngraph::as_string(modeNgraph);
}
blockSize = depthToSpace->get_block_size();
if (blockSize == 0)
THROW_ERROR << "has incorrect block_size parameter is zero!";
size_t nSpatialDims = inputShapes[0].getRank() - 2;
blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
} else {
IE_THROW(NotImplemented) << errorMessage; IE_THROW(NotImplemented) << errorMessage;
} }
} if (inputShapes.size() != 1 || outputShapes.size() != 1)
THROW_ERROR << "has incorrect number of input/output edges!";
void MKLDNNDepthToSpaceNode::getSupportedDescriptors() { auto depthToSpace = ov::as_type_ptr<const ngraph::opset1::DepthToSpace>(op);
SizeVector srcDims = inputShapes[0].getStaticDims(); if (!depthToSpace)
if (srcDims.size() < 3) THROW_ERROR << "supports only opset1";
THROW_ERROR << "has incorrect number of input dimensions";
if (srcDims.size() > 5)
THROW_ERROR << "doesn't support dimensions with rank greater than 5";
SizeVector dstDims = outputShapes[0].getStaticDims(); const auto modeNgraph = depthToSpace->get_mode();
if (srcDims.size() != dstDims.size()) if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) {
THROW_ERROR << "has incorrect number of input/output dimensions"; attrs.mode = Mode::BLOCKS_FIRST;
} else if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST) {
if (srcDims[1] % blockStep) attrs.mode = Mode::DEPTH_FIRST;
THROW_ERROR << "has block_size parameter which is incompatible with input tensor channels dimension size"; } else {
THROW_ERROR << "doesn't support mode: " << ngraph::as_string(modeNgraph);
if (srcDims[1] / blockStep != dstDims[1])
THROW_ERROR << "has incompatible input/output channels";
size_t nSpatialDims = srcDims.size() - 2;
for (size_t i = 0; i < nSpatialDims; ++i) {
if (srcDims[i + 2] * blockSize != dstDims[i + 2])
THROW_ERROR << "has incompatible spatial dims";
} }
if (getParentEdges().size() != 1) attrs.blockSize = depthToSpace->get_block_size();
THROW_ERROR << "has incorrect number of input edges"; if (attrs.blockSize == 0)
if (getChildEdges().empty()) THROW_ERROR << "has incorrect block_size parameter is zero!";
THROW_ERROR << "has incorrect number of output edges";
const size_t srcRank = getInputShapeAtPort(0).getRank();
const size_t dstRank = getOutputShapeAtPort(0).getRank();
if (srcRank < 3)
THROW_ERROR << "has incorrect number of input dimensions";
if (srcRank > 5)
THROW_ERROR << "doesn't support dimensions with rank greater than 5";
if (srcRank != dstRank)
THROW_ERROR << "has incorrect number of input/output dimensions";
const size_t nSpatialDims = srcRank - 2;
attrs.blockStep = static_cast<size_t>(std::pow(attrs.blockSize, nSpatialDims));
} }
void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {}
void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty()) if (!supportedPrimitiveDescriptors.empty())
return; return;
InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
auto srcDims = getInputShapeAtPort(0).getStaticDims();
const size_t nDims = srcDims.size();
impl_desc_type impl_type; impl_desc_type impl_type = impl_desc_type::ref;
if (mayiuse(impl::cpu::x64::avx512_common)) { if (cpu::x64::mayiuse(cpu::x64::avx512_common)) {
impl_type = impl_desc_type::jit_avx512; impl_type = impl_desc_type::jit_avx512;
} else if (mayiuse(cpu::x64::avx2)) { } else if (cpu::x64::mayiuse(cpu::x64::avx2)) {
impl_type = impl_desc_type::jit_avx2; impl_type = impl_desc_type::jit_avx2;
} else if (mayiuse(cpu::x64::sse41)) { } else if (cpu::x64::mayiuse(cpu::x64::sse41)) {
impl_type = impl_desc_type::jit_sse42; impl_type = impl_desc_type::jit_sse42;
} else {
impl_type = impl_desc_type::ref;
} }
NodeConfig config; NodeConfig config;
@ -126,11 +103,15 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
config.outConfs[0].inPlace = -1; config.outConfs[0].inPlace = -1;
config.outConfs[0].constant = false; config.outConfs[0].constant = false;
const auto& inputDataShape = getInputShapeAtPort(0);
const auto& outputDataShape = getOutputShapeAtPort(0);
std::vector<LayoutType> supportedTypes; std::vector<LayoutType> supportedTypes;
if (nDims > 2) { if (inputDataShape.getRank() > 2) {
const auto& srcDims = inputDataShape.getDims();
auto canUseBlocked = [=](const size_t block) { auto canUseBlocked = [=](const size_t block) {
return srcDims[1] % block == 0 && (srcDims[1] / block) % blockStep == 0 && return srcDims[1] != Shape::UNDEFINED_DIM && srcDims[1] % block == 0 && (srcDims[1] / block) % attrs.blockStep == 0 &&
(mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true); (attrs.mode == Mode::DEPTH_FIRST ? block % attrs.blockStep == 0 : true);
}; };
supportedTypes.push_back(LayoutType::nspc); supportedTypes.push_back(LayoutType::nspc);
@ -141,18 +122,18 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
} }
supportedTypes.push_back(LayoutType::ncsp); supportedTypes.push_back(LayoutType::ncsp);
auto creators = BlockedDescCreator::getCommonCreators(); auto creators = BlockedDescCreator::getCommonCreators();
auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); auto range = BlockedDescCreator::makeFilteredRange(creators, inputDataShape.getRank(), supportedTypes);
for (auto itr = range.first; itr != range.second; ++itr) { for (auto itr = range.first; itr != range.second; ++itr) {
config.inConfs[0].desc = itr->second->createSharedDesc(precision, getInputShapeAtPort(0)); config.inConfs[0].desc = itr->second->createSharedDesc(precision, inputDataShape);
config.outConfs[0].desc = itr->second->createSharedDesc(precision, getOutputShapeAtPort(0)); config.outConfs[0].desc = itr->second->createSharedDesc(precision, outputDataShape);
supportedPrimitiveDescriptors.emplace_back(config, impl_type); supportedPrimitiveDescriptors.emplace_back(config, impl_type);
} }
} }
void MKLDNNDepthToSpaceNode::createPrimitive() { void MKLDNNDepthToSpaceNode::createPrimitive() {
auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_ERROR << "has not allocated destination memory"; THROW_ERROR << "has not allocated destination memory";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
@ -160,24 +141,44 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
if (getSelectedPrimitiveDescriptor() == nullptr) if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_ERROR << "has unidentified preferable primitive descriptor"; THROW_ERROR << "has unidentified preferable primitive descriptor";
VectorDims srcDims = srcMemPtr->getStaticDims(); const auto& memoryDesc = srcMemPtr->getDesc();
attrs.dataSize = memoryDesc.getPrecision().size();
attrs.nSpatialDims = memoryDesc.getShape().getRank() - 2;
attrs.layoutType = memoryDesc.hasLayoutType(LayoutType::nCsp16c) ? LayoutType::nCsp16c :
memoryDesc.hasLayoutType(LayoutType::nCsp8c) ? LayoutType::nCsp8c :
memoryDesc.hasLayoutType(LayoutType::nspc) ? LayoutType::nspc : LayoutType::ncsp;
size_t nDims = srcDims.size(); if (inputShapesDefined()) {
const size_t nSpatialDims = nDims - 2; if (needPrepareParams())
const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || prepareParams();
getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); updateLastInputDims();
const size_t reshapedRank = nDims + nSpatialDims + static_cast<int>(isBlocked) + static_cast<int>(isBlocked && mode == Mode::DEPTH_FIRST); }
}
void MKLDNNDepthToSpaceNode::prepareParams() {
attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
execPtr = std::make_shared<DepthToSpaceExecutor>(attrs);
}
MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) {
if (!MKLDNNPlugin::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp))
IE_THROW() << "DepthToSpace executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts.";
const bool isBlocked = MKLDNNPlugin::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c);
const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc;
const size_t nDims = attrs.srcBlockedDims.size();
const size_t reshapedRank = nDims + attrs.nSpatialDims + static_cast<int>(isBlocked && attrs.mode == Mode::DEPTH_FIRST);
const size_t lastIdx = reshapedRank - 1; const size_t lastIdx = reshapedRank - 1;
size_t firstSpatialOrder = 2; size_t firstSpatialOrder = 2;
PermuteParams params; PermuteParams params;
params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.data_size = attrs.dataSize;
params.order.resize(reshapedRank, 0); params.order.resize(reshapedRank, 0);
params.src_block_order.resize(reshapedRank); params.src_block_order.resize(reshapedRank);
params.dst_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank);
params.dst_block_dims.resize(reshapedRank); params.dst_block_dims.resize(reshapedRank);
params.src_block_dims.resize(reshapedRank); params.src_block_dims.resize(reshapedRank);
params.src_block_dims[0] = srcDims[0]; params.src_block_dims[0] = attrs.srcBlockedDims[0];
// reshaping of src dimensions and creating the permutation order for each layout: // reshaping of src dimensions and creating the permutation order for each layout:
// new shape: mode = blocks_first [N, block_size, block_size, ..., block_size, C / (block_size ^ K), D1, D2, ..., DK] // new shape: mode = blocks_first [N, block_size, block_size, ..., block_size, C / (block_size ^ K), D1, D2, ..., DK]
@ -186,62 +187,58 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
// mode = depth_first : [0, 1, K + 2, 2, K + 3, 3, K + 4, 4, ..., K + (K + 1), K + 1] // mode = depth_first : [0, 1, K + 2, 2, K + 3, 3, K + 4, 4, ..., K + (K + 1), K + 1]
// where `k` is number of spatial dimensions // where `k` is number of spatial dimensions
auto reshapeAndSetPermOrder = [&](const size_t idx1, const size_t idx2, const size_t shift, const SizeVector& dims) { auto reshapeAndSetPermOrder = [&](const size_t idx1, const size_t idx2, const size_t shift, const VectorDims& dims) {
for (size_t i = 0; i < nSpatialDims; i++) { for (size_t i = 0; i < attrs.nSpatialDims; i++) {
params.order[i * 2 + shift] = i + idx1; params.order[i * 2 + shift] = i + idx1;
params.order[i * 2 + shift + 1] = i + idx2; params.order[i * 2 + shift + 1] = i + idx2;
params.src_block_dims[params.order[i * 2 + shift]] = dims[i + shift]; params.src_block_dims[params.order[i * 2 + shift]] = dims[i + shift];
params.src_block_dims[params.order[i * 2 + shift + 1]] = blockSize; params.src_block_dims[params.order[i * 2 + shift + 1]] = attrs.blockSize;
} }
}; };
if (isBlocked) { if (isBlocked) {
VectorDims srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
size_t orderShiftForBlocks, orderShiftForDims; size_t orderShiftForBlocks, orderShiftForDims;
if (mode == Mode::BLOCKS_FIRST) { if (attrs.mode == Mode::BLOCKS_FIRST) {
orderShiftForBlocks = 1; orderShiftForBlocks = 1;
orderShiftForDims = nSpatialDims + 2; orderShiftForDims = attrs.nSpatialDims + 2;
params.src_block_dims[nSpatialDims + 1] = srcBlockedDims[1] / blockStep; params.src_block_dims[attrs.nSpatialDims + 1] = attrs.srcBlockedDims[1] / attrs.blockStep;
params.src_block_dims[lastIdx] = srcBlockedDims.back(); params.src_block_dims[lastIdx] = attrs.srcBlockedDims.back();
params.order[1] = nSpatialDims + 1; params.order[1] = attrs.nSpatialDims + 1;
params.order[lastIdx] = lastIdx; params.order[lastIdx] = lastIdx;
} else { } else {
orderShiftForBlocks = nSpatialDims + 4; orderShiftForBlocks = attrs.nSpatialDims + 4;
orderShiftForDims = 3; orderShiftForDims = 3;
size_t newBlockSize = srcBlockedDims.back() / blockStep; size_t newBlockSize = attrs.srcBlockedDims.back() / attrs.blockStep;
size_t newBlocksCount = srcBlockedDims[1] / blockStep; size_t newBlocksCount = attrs.srcBlockedDims[1] / attrs.blockStep;
params.src_block_dims[1] = newBlocksCount; params.src_block_dims[1] = newBlocksCount;
params.src_block_dims[2] = srcBlockedDims[1] / newBlocksCount; params.src_block_dims[2] = attrs.srcBlockedDims[1] / newBlocksCount;
params.src_block_dims[lastIdx - nSpatialDims] = newBlockSize; params.src_block_dims[lastIdx - attrs.nSpatialDims] = newBlockSize;
params.order[1] = 1; params.order[1] = 1;
params.order[2] = 3; params.order[2] = 3;
params.order[lastIdx - 1] = 2; params.order[lastIdx - 1] = 2;
params.order[lastIdx] = lastIdx - nSpatialDims; params.order[lastIdx] = lastIdx - attrs.nSpatialDims;
} }
reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims); reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, attrs.srcBlockedDims);
} else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { } else if (isChannelsFirst) {
srcDims.push_back(srcDims[1]);
srcDims.erase(srcDims.begin() + 1);
firstSpatialOrder = 1; firstSpatialOrder = 1;
size_t shift = static_cast<size_t>(mode == DEPTH_FIRST) + nSpatialDims + 1; size_t shift = static_cast<size_t>(attrs.mode == DEPTH_FIRST) + attrs.nSpatialDims + 1;
params.order[lastIdx] = mode == Mode::DEPTH_FIRST ? nSpatialDims + 1 : lastIdx; params.order[lastIdx] = attrs.mode == Mode::DEPTH_FIRST ? attrs.nSpatialDims + 1 : lastIdx;
params.src_block_dims[params.order[lastIdx]] = srcDims.back() / blockStep; params.src_block_dims[params.order[lastIdx]] = attrs.srcBlockedDims.back() / attrs.blockStep;
reshapeAndSetPermOrder(firstSpatialOrder, shift, firstSpatialOrder, srcDims); reshapeAndSetPermOrder(firstSpatialOrder, shift, firstSpatialOrder, attrs.srcBlockedDims);
} else { } else {
size_t shift = static_cast<size_t>(mode == DEPTH_FIRST) + 1; size_t shift = static_cast<size_t>(attrs.mode == DEPTH_FIRST) + 1;
params.order[1] = mode == DEPTH_FIRST ? 1 : nSpatialDims + 1; params.order[1] = attrs.mode == DEPTH_FIRST ? 1 : attrs.nSpatialDims + 1;
params.src_block_dims[params.order[1]] = srcDims[1] / blockStep; params.src_block_dims[params.order[1]] = attrs.srcBlockedDims[1] / attrs.blockStep;
reshapeAndSetPermOrder(nSpatialDims + firstSpatialOrder, shift, firstSpatialOrder, srcDims); reshapeAndSetPermOrder(attrs.nSpatialDims + firstSpatialOrder, shift, firstSpatialOrder, attrs.srcBlockedDims);
} }
std::iota(params.src_block_order.begin(), params.src_block_order.end(), 0); std::iota(params.src_block_order.begin(), params.src_block_order.end(), 0);
@ -252,11 +249,27 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params)); permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params));
} }
void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) { void MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) {
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); if (!permuteKernel)
uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled.";
permuteKernel->execute(srcData, dstData, batchToProcess()); const uint8_t* srcData = reinterpret_cast<const uint8_t*>(srcMemPtr->GetPtr());
uint8_t* dstData = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
permuteKernel->execute(srcData, dstData, MB);
}
void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) {
if (!execPtr) {
THROW_ERROR << "doesn't have a compiled executor.";
}
int MB = isDynamicNode() ? getParentEdgeAt(0)->getMemoryPtr()->getStaticDims()[0] : batchToProcess();
execPtr->exec(getParentEdgeAt(0)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr(), MB);
}
void MKLDNNDepthToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
} }
bool MKLDNNDepthToSpaceNode::created() const { bool MKLDNNDepthToSpaceNode::created() const {

View File

@ -15,13 +15,17 @@ class MKLDNNDepthToSpaceNode : public MKLDNNNode {
public: public:
MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override; void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override; void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override; void createPrimitive() override;
void execute(mkldnn::stream strm) override; void execute(mkldnn::stream strm) override;
bool created() const override; bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept; void prepareParams() override;
protected:
void executeDynamicImpl(mkldnn::stream strm) override;
private: private:
enum Mode { enum Mode {
@ -29,11 +33,26 @@ private:
DEPTH_FIRST = 1 DEPTH_FIRST = 1
}; };
Mode mode; struct DepthToSpaceAttrs {
size_t blockSize; LayoutType layoutType;
size_t blockStep; Mode mode;
size_t blockSize = 0lu;
size_t blockStep = 0lu;
size_t dataSize = 1lu;
size_t nSpatialDims = 0lu;
VectorDims srcBlockedDims;
} attrs;
std::unique_ptr<PermuteKernel> permuteKernel; struct DepthToSpaceExecutor {
DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs);
void exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB);
~DepthToSpaceExecutor() = default;
private:
std::unique_ptr<PermuteKernel> permuteKernel;
};
using executorPtr = std::shared_ptr<DepthToSpaceExecutor>;
executorPtr execPtr = nullptr;
}; };
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@ -2,56 +2,78 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include <shared_test_classes/single_layer/depth_to_space.hpp> #include "shared_test_classes/single_layer/depth_to_space.hpp"
#include "test_utils/cpu_test_utils.hpp" #include "test_utils/cpu_test_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace CPUTestUtils; using namespace CPUTestUtils;
using namespace ngraph::opset3; using namespace ngraph::opset3;
using namespace ov::test;
namespace CPULayerTestsDefinitions { namespace CPULayerTestsDefinitions {
typedef std::tuple< using DepthToSpaceLayerCPUTestParamSet = std::tuple<
LayerTestsDefinitions::depthToSpaceParamsTuple, InputShape, // Input shape
ElementType, // Input element type
DepthToSpace::DepthToSpaceMode, // Mode
std::size_t, // Block size
CPUSpecificParams CPUSpecificParams
> DepthToSpaceLayerCPUTestParamSet; >;
class DepthToSpaceLayerCPUTest : public testing::WithParamInterface<DepthToSpaceLayerCPUTestParamSet>, class DepthToSpaceLayerCPUTest : public testing::WithParamInterface<DepthToSpaceLayerCPUTestParamSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
public: public:
static std::string getTestCaseName(testing::TestParamInfo<DepthToSpaceLayerCPUTestParamSet> obj) { static std::string getTestCaseName(testing::TestParamInfo<DepthToSpaceLayerCPUTestParamSet> obj) {
LayerTestsDefinitions::depthToSpaceParamsTuple basicParamsSet; InputShape shapes;
ElementType inType;
DepthToSpace::DepthToSpaceMode mode;
std::size_t blockSize;
CPUSpecificParams cpuParams; CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = obj.param; std::tie(shapes, inType, mode, blockSize, cpuParams) = obj.param;
std::ostringstream result; std::ostringstream results;
result << LayerTestsDefinitions::DepthToSpaceLayerTest::getTestCaseName( results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
testing::TestParamInfo<LayerTestsDefinitions::depthToSpaceParamsTuple>(basicParamsSet, 0)); results << "TS=";
for (const auto& item : shapes.second) {
results << CommonTestUtils::vec2str(item) << "_";
}
results << "Prc=" << inType << "_";
switch (mode) {
case DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST:
results << "BLOCKS_FIRST_";
break;
case DepthToSpace::DepthToSpaceMode::DEPTH_FIRST:
results << "DEPTH_FIRST_";
break;
default:
throw std::runtime_error("Unsupported DepthToSpaceMode");
}
results << "BS=" << blockSize << "_";
results << CPUTestsBase::getTestCaseName(cpuParams);
result << CPUTestsBase::getTestCaseName(cpuParams); return results.str();
return result.str();
} }
protected: protected:
void SetUp() override { void SetUp() override {
LayerTestsDefinitions::depthToSpaceParamsTuple basicParamsSet; InputShape shapes;
CPUSpecificParams cpuParams; ElementType inType;
std::tie(basicParamsSet, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::vector<size_t> inputShape;
DepthToSpace::DepthToSpaceMode mode; DepthToSpace::DepthToSpaceMode mode;
std::size_t blockSize; std::size_t blockSize;
InferenceEngine::Precision netPrecision; CPUSpecificParams cpuParams;
std::tie(inputShape, netPrecision, mode, blockSize, targetDevice) = basicParamsSet; std::tie(shapes, inType, mode, blockSize, cpuParams) = this->GetParam();
inPrc = outPrc = netPrecision; std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType = getPrimitiveType() + "_" + inPrc.name(); if (selectedType.empty()) {
auto inPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); selectedType = getPrimitiveType();
auto params = ngraph::builder::makeParams(inPrc, {inputShape}); }
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params)); selectedType = selectedType + "_" + InferenceEngine::details::convertPrecision(inType).name();
auto d2s = ngraph::builder::makeDepthToSpace(paramOuts[0], mode, blockSize); targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes({shapes});
auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
auto d2s = ngraph::builder::makeDepthToSpace(params[0], mode, blockSize);
d2s->get_rt_info() = getCPUInfo(); d2s->get_rt_info() = getCPUInfo();
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(d2s)}; ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(d2s)};
function = std::make_shared<ngraph::Function>(results, params, "DepthToSpaceCPU"); function = std::make_shared<ngraph::Function>(results, params, "DepthToSpaceCPU");
@ -61,35 +83,51 @@ protected:
TEST_P(DepthToSpaceLayerCPUTest, CompareWithRefs) { TEST_P(DepthToSpaceLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED() SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run(); run();
CheckPluginRelatedResults(executableNetwork, "DepthToSpace"); // TODO: need to uncomment when this method will be updated
// CheckPluginRelatedResults(executableNetwork, "DepthToSpace");
} }
namespace { namespace {
const auto cpuParams_nChw16c = CPUSpecificParams {{nChw16c}, {nChw16c}, {"jit_avx512"}, {"jit_avx512"}}; const auto cpuParams_nChw16c = CPUSpecificParams {{nChw16c}, {nChw16c}, {}, {}};
const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {nCdhw16c}, {"jit_avx512"}, {"jit_avx512"}}; const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {nCdhw16c}, {}, {}};
const auto cpuParams_nChw8c_avx2 = CPUSpecificParams {{nChw8c}, {nChw8c}, {"jit_avx2"}, {"jit_avx2"}}; const auto cpuParams_nChw8c = CPUSpecificParams {{nChw8c}, {nChw8c}, {}, {}};
const auto cpuParams_nCdhw8c_avx2 = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {"jit_avx2"}, {"jit_avx2"}}; const auto cpuParams_nCdhw8c = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {}, {}};
const auto cpuParams_nChw8c_sse42 = CPUSpecificParams {{nChw8c}, {nChw8c}, {"jit_sse42"}, {"jit_sse42"}}; const auto cpuParams_nhwc = CPUSpecificParams {{nhwc}, {nhwc}, {}, {}};
const auto cpuParams_nCdhw8c_sse42 = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {"jit_sse42"}, {"jit_sse42"}}; const auto cpuParams_ndhwc = CPUSpecificParams {{ndhwc}, {ndhwc}, {}, {}};
const auto cpuParams_nhwc_avx2 = CPUSpecificParams {{nhwc}, {nhwc}, {"jit_avx2"}, {"jit_avx2"}}; const auto cpuParams_nchw = CPUSpecificParams {{nchw}, {nchw}, {}, {}};
const auto cpuParams_ndhwc_avx2 = CPUSpecificParams {{ndhwc}, {ndhwc}, {"jit_avx2"}, {"jit_avx2"}}; const auto cpuParams_ncdhw = CPUSpecificParams {{ncdhw}, {ncdhw}, {}, {}};
const auto cpuParams_nhwc_sse42 = CPUSpecificParams {{nhwc}, {nhwc}, {"jit_sse42"}, {"jit_sse42"}}; const std::vector<CPUSpecificParams> CPUParams4D = {
const auto cpuParams_ndhwc_sse42 = CPUSpecificParams {{ndhwc}, {ndhwc}, {"jit_sse42"}, {"jit_sse42"}}; cpuParams_nhwc,
cpuParams_nchw
};
const auto cpuParams_nhwc_ref = CPUSpecificParams {{nhwc}, {nhwc}, {"ref_any"}, {"ref_any"}}; const std::vector<CPUSpecificParams> CPUParamsBlocked4D = {
const auto cpuParams_ndhwc_ref = CPUSpecificParams {{ndhwc}, {ndhwc}, {"ref_any"}, {"ref_any"}}; cpuParams_nChw16c,
cpuParams_nChw8c,
cpuParams_nhwc
};
const std::vector<CPUSpecificParams> CPUParams5D = {
cpuParams_ndhwc,
cpuParams_ncdhw
};
const std::vector<InferenceEngine::Precision> inputPrecisions = { const std::vector<CPUSpecificParams> CPUParamsBlocked5D = {
InferenceEngine::Precision::FP32, cpuParams_nCdhw16c,
InferenceEngine::Precision::BF16, cpuParams_nCdhw8c,
InferenceEngine::Precision::I8 cpuParams_ndhwc
};
const std::vector<ElementType> inputElementType = {
ElementType::f32,
ElementType::bf16,
ElementType::i8
}; };
const std::vector<DepthToSpace::DepthToSpaceMode> depthToSpaceModes = { const std::vector<DepthToSpace::DepthToSpaceMode> depthToSpaceModes = {
@ -97,99 +135,162 @@ const std::vector<DepthToSpace::DepthToSpaceMode> depthToSpaceModes = {
DepthToSpace::DepthToSpaceMode::DEPTH_FIRST DepthToSpace::DepthToSpaceMode::DEPTH_FIRST
}; };
const std::vector<std::vector<size_t >> inputShapesBS2_4D = { /* *========================* Static Shapes Tests *========================* */
{1, 64, 1, 1}, {1, 64, 1, 3}, {1, 128, 3, 3}, {2, 128, 1, 1}, {1, 192, 2, 2}, {2, 256, 2, 3}, {1, 512, 2, 1}
namespace static_shapes {
const std::vector<ov::Shape> inputShapesBS2_4D = {
{1, 64, 1, 1},
{1, 64, 1, 3},
{1, 128, 3, 3},
{2, 128, 1, 1},
{1, 192, 2, 2},
{2, 256, 2, 3},
{1, 512, 2, 1}
}; };
const std::vector<std::vector<size_t >> inputShapesBS3_4D = { const std::vector<ov::Shape> inputShapesBS3_4D = {
{1, 27, 1, 1}, {1, 27, 2, 3}, {1, 18, 2, 3}, {3, 18, 1, 1}, {2, 18, 3, 1} {1, 27, 1, 1},
{1, 27, 2, 3},
{1, 18, 2, 3},
{3, 18, 1, 1},
{2, 18, 3, 1}
}; };
const std::vector<CPUSpecificParams> CPUParamsBS2_4D = { INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS2_4D, DepthToSpaceLayerCPUTest,
cpuParams_nChw16c, testing::Combine(
cpuParams_nChw8c_avx2, testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS2_4D)),
cpuParams_nChw8c_sse42, testing::ValuesIn(inputElementType),
cpuParams_nhwc_avx2, testing::ValuesIn(depthToSpaceModes),
cpuParams_nhwc_sse42, testing::Values(1, 2),
cpuParams_nhwc_ref, testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceStaticBS3_4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS3_4D)),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParams4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<ov::Shape> inputShapesBS2_5D = {
{1, 128, 1, 1, 1},
{1, 128, 2, 1, 2},
{1, 256, 2, 1, 3},
{2, 256, 3, 1, 1},
{1, 384, 1, 2, 2},
{2, 512, 1, 2, 1}
}; };
const auto depthToSpaceBS2_4DParams = testing::Combine( const std::vector<ov::Shape> inputShapesBS3_5D = {
testing::Combine( {1, 54, 1, 1, 1},
testing::ValuesIn(inputShapesBS2_4D), {1, 54, 2, 1, 2},
testing::ValuesIn(inputPrecisions), {3, 54, 1, 1, 1},
testing::ValuesIn(depthToSpaceModes), {2, 54, 3, 1, 2},
testing::Values(1, 2), {1, 54, 3, 2, 2}
testing::Values(CommonTestUtils::DEVICE_CPU)),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS2_4D))
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS2_4D, DepthToSpaceLayerCPUTest, depthToSpaceBS2_4DParams, DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<CPUSpecificParams> CPUParamsBS3_4D = {
cpuParams_nhwc_avx2,
cpuParams_nhwc_sse42,
cpuParams_nhwc_ref,
}; };
const auto depthToSpaceBS3_4DParams = testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceStaticBS2_5D, DepthToSpaceLayerCPUTest,
testing::Combine( testing::Combine(
testing::ValuesIn(inputShapesBS3_4D), testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS2_5D)),
testing::ValuesIn(inputPrecisions), testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes), testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3), testing::Values(1, 2),
testing::Values(CommonTestUtils::DEVICE_CPU)), testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked5D))),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS3_4D)) DepthToSpaceLayerCPUTest::getTestCaseName);
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS3_4D, DepthToSpaceLayerCPUTest, depthToSpaceBS3_4DParams, DepthToSpaceLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceStaticBS3_5D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(static_shapes_to_test_representation(inputShapesBS3_5D)),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParams5D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<std::vector<size_t >> inputShapesBS2_5D = { } // namespace static_shapes
{1, 128, 1, 1, 1}, {1, 128, 2, 1, 2}, {1, 256, 2, 1, 3}, {2, 256, 3, 1, 1}, {1, 384, 1, 2, 2}, {2, 512, 1, 2, 1} /* *========================* *==================* *========================* */
/* *========================* Dynamic Shapes Tests *========================* */
namespace dynamic_shapes {
const std::vector<InputShape> inputShapes4D = {
{{-1, -1, -1 , -1}, // dynamic
{{2, 36, 1, 1}, {1, 36, 3, 1}, {1, 72, 1, 4}}}, // target
{{-1, 576, -1 , -1}, // dynamic
{{1, 576, 1, 1}, {1, 576, 2, 2}, {3, 576, 4, 1}}}, // target
{{{1, 5}, {36, 72}, {1, 16}, {1, 16}}, // dynamic
{{3, 36, 4, 4}, {1, 36, 16, 12}, {3, 72, 8, 8}}}, // target
}; };
const std::vector<std::vector<size_t >> inputShapesBS3_5D = { const std::vector<InputShape> inputShapes5D = {
{1, 54, 1, 1, 1}, {1, 54, 2, 1, 2}, {3, 54, 1, 1, 1}, {2, 54, 3, 1, 2}, {1, 54, 3, 2, 2} {{-1, -1, -1, -1, -1}, // dynamic
{{2, 216, 1, 1, 1}, {1, 216, 3, 1, 2}, {1, 432, 2, 3, 1}}}, // target
{{{1, 3}, {216, 432}, {1, 4}, {1, 4}, {1, 4}}, // dynamic
{{3, 216, 2, 2, 2}, {1, 432, 1, 1, 1}}}, // target
}; };
const std::vector<CPUSpecificParams> CPUParamsBS2_5D = { const std::vector<InputShape> inputShapesBlocked5D = {
cpuParams_nCdhw16c, {{-1, 256, -1, -1, -1}, // dynamic
cpuParams_nCdhw8c_avx2, {{1, 256, 1, 1, 1}, {1, 256, 2, 1, 4}, {3, 256, 4, 1, 2}}}, // target
cpuParams_nCdhw8c_sse42,
cpuParams_ndhwc_avx2, {{{1, 3}, 256, {1, 3}, {1, 3}, {1, 3}}, // dynamic
cpuParams_ndhwc_sse42, {{1, 256, 1, 1, 1}, {1, 256, 2, 1, 3}, {3, 256, 3, 1, 2}}}, // target
cpuParams_ndhwc_ref,
}; };
const auto depthToSpaceBS2_5DParams = testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamic4D, DepthToSpaceLayerCPUTest,
testing::Combine( testing::Combine(
testing::ValuesIn(inputShapesBS2_5D), testing::ValuesIn(inputShapes4D),
testing::ValuesIn(inputPrecisions), testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes), testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2), testing::Values(1, 2, 3),
testing::Values(CommonTestUtils::DEVICE_CPU)), testing::ValuesIn(filterCPUInfoForDevice(CPUParams4D))),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS2_5D)) DepthToSpaceLayerCPUTest::getTestCaseName);
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS2_5D, DepthToSpaceLayerCPUTest, depthToSpaceBS2_5DParams, DepthToSpaceLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamicBlocksFirstBlocked4D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::Values(inputShapes4D[1]),
testing::ValuesIn(inputElementType),
testing::Values(DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST),
testing::Values(1, 2, 3),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const std::vector<CPUSpecificParams> CPUParamsBS3_5D = { INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamicDepthFirstBlocked4D, DepthToSpaceLayerCPUTest,
cpuParams_ndhwc_avx2, testing::Combine(
cpuParams_ndhwc_sse42, testing::Values(inputShapes4D[1]),
cpuParams_ndhwc_ref, testing::ValuesIn(inputElementType),
}; testing::Values(DepthToSpace::DepthToSpaceMode::DEPTH_FIRST),
testing::Values(1, 2),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked4D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
const auto depthToSpaceBS3_5DParams = testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamic5D, DepthToSpaceLayerCPUTest,
testing::Combine( testing::Combine(
testing::ValuesIn(inputShapesBS3_5D), testing::ValuesIn(inputShapes5D),
testing::ValuesIn(inputPrecisions), testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes), testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 3), testing::Values(1, 2, 3),
testing::Values(CommonTestUtils::DEVICE_CPU)), testing::ValuesIn(filterCPUInfoForDevice(CPUParams5D))),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBS3_5D)) DepthToSpaceLayerCPUTest::getTestCaseName);
);
INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceBS3_5D, DepthToSpaceLayerCPUTest, depthToSpaceBS3_5DParams, DepthToSpaceLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_CPUDepthToSpaceDynamicCPUSpecific5D, DepthToSpaceLayerCPUTest,
testing::Combine(
testing::ValuesIn(inputShapesBlocked5D),
testing::ValuesIn(inputElementType),
testing::ValuesIn(depthToSpaceModes),
testing::Values(1, 2),
testing::ValuesIn(filterCPUInfoForDevice(CPUParamsBlocked5D))),
DepthToSpaceLayerCPUTest::getTestCaseName);
} // namespace dynamic_shapes
/* *========================* *==================* *========================* */
} // namespace } // namespace
} // namespace CPULayerTestsDefinitions } // namespace CPULayerTestsDefinitions