[ARM CPU] Add ACL deconvolution operation (#18655)

This commit is contained in:
Nesterov Alexander 2023-08-02 23:02:01 +02:00 committed by GitHub
parent d51fc7adad
commit f3bafef128
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 657 additions and 50 deletions

View File

@ -23,6 +23,11 @@
#include <common/primitive_desc_iface.hpp> #include <common/primitive_desc_iface.hpp>
#include <utils/shape_inference/shape_inference_ngraph.hpp> #include <utils/shape_inference/shape_inference_ngraph.hpp>
#if defined(OV_CPU_WITH_ACL)
#include "executors/acl/acl_utils.hpp"
#include "utils/debug_capabilities.h"
#endif
#include <oneapi/dnnl/dnnl.hpp> #include <oneapi/dnnl/dnnl.hpp>
#include <string> #include <string>
@ -174,15 +179,15 @@ Deconvolution::Deconvolution(const std::shared_ptr<ngraph::Node>& op,
withGroups = false; withGroups = false;
for (size_t i = 0; i < convBackprop->get_strides().size(); i++) { for (size_t i = 0; i < convBackprop->get_strides().size(); i++) {
stride.push_back(static_cast<ptrdiff_t>(convBackprop->get_strides()[i])); deconvAttrs.stride.push_back(static_cast<ptrdiff_t>(convBackprop->get_strides()[i]));
} }
for (size_t i = 0; i < convBackprop->get_dilations().size(); i++) { for (size_t i = 0; i < convBackprop->get_dilations().size(); i++) {
dilation.push_back(static_cast<ptrdiff_t>(convBackprop->get_dilations()[i]) - 1); deconvAttrs.dilation.push_back(static_cast<ptrdiff_t>(convBackprop->get_dilations()[i]) - 1);
} }
paddingL = convBackprop->get_pads_begin(); deconvAttrs.paddingL = convBackprop->get_pads_begin();
paddingR = convBackprop->get_pads_end(); deconvAttrs.paddingR = convBackprop->get_pads_end();
outputPadding = convBackprop->get_output_padding(); deconvAttrs.outputPadding = convBackprop->get_output_padding();
autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER); autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
} else if (auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op)) { } else if (auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op)) {
@ -196,20 +201,20 @@ Deconvolution::Deconvolution(const std::shared_ptr<ngraph::Node>& op,
isDW = withGroups && groupNum == OC && groupNum == IC; isDW = withGroups && groupNum == OC && groupNum == IC;
for (size_t i = 0; i < groupConvBackprop->get_strides().size(); i++) { for (size_t i = 0; i < groupConvBackprop->get_strides().size(); i++) {
stride.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_strides()[i])); deconvAttrs.stride.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_strides()[i]));
} }
for (size_t i = 0; i < groupConvBackprop->get_dilations().size(); i++) { for (size_t i = 0; i < groupConvBackprop->get_dilations().size(); i++) {
dilation.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_dilations()[i]) - 1); deconvAttrs.dilation.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_dilations()[i]) - 1);
} }
paddingL = groupConvBackprop->get_pads_begin(); deconvAttrs.paddingL = groupConvBackprop->get_pads_begin();
paddingR = groupConvBackprop->get_pads_end(); deconvAttrs.paddingR = groupConvBackprop->get_pads_end();
outputPadding = groupConvBackprop->get_output_padding(); deconvAttrs.outputPadding = groupConvBackprop->get_output_padding();
autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER); autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
} }
for (size_t i = 0; i < dilation.size(); i++) { for (size_t i = 0; i < deconvAttrs.dilation.size(); i++) {
kernel.push_back(weightDims[withGroups + 2 + i]); deconvAttrs.kernel.push_back(weightDims[withGroups + 2 + i]);
} }
externOutShape = inputShapes.size() == 3; externOutShape = inputShapes.size() == 3;
@ -274,7 +279,7 @@ bool Deconvolution::canBeExecutedInInt8() const {
return false; return false;
} }
if (!withGroups && stride.back() > 3) if (!withGroups && deconvAttrs.stride.back() > 3)
return false; return false;
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) { if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) {
const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims(); const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
@ -291,8 +296,8 @@ bool Deconvolution::canBeExecutedInInt8() const {
return false; return false;
} }
for (size_t i = 0; i < kernel.size(); i++) { for (size_t i = 0; i < deconvAttrs.kernel.size(); i++) {
if (kernel[i] < stride[i]) if (deconvAttrs.kernel[i] < deconvAttrs.stride[i])
return false; return false;
} }
@ -301,7 +306,7 @@ bool Deconvolution::canBeExecutedInInt8() const {
: impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) ? 8 : 4; : impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) ? 8 : 4;
if (withGroups && !isDW && (IC % channelBlock != 0 || OC % channelBlock != 0)) if (withGroups && !isDW && (IC % channelBlock != 0 || OC % channelBlock != 0))
return false; return false;
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) && stride.back() > 3) if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) && deconvAttrs.stride.back() > 3)
return false; return false;
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0); InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
@ -310,7 +315,7 @@ bool Deconvolution::canBeExecutedInInt8() const {
InferenceEngine::Precision weiPrecision = getOriginalInputPrecisionAtPort(1); InferenceEngine::Precision weiPrecision = getOriginalInputPrecisionAtPort(1);
auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(weiPrecision); auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(weiPrecision);
if (isDW && (inputDataType == dnnl_s8 || dilation.size() == 3)) if (isDW && (inputDataType == dnnl_s8 || deconvAttrs.dilation.size() == 3))
return false; return false;
return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8; return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8;
@ -351,10 +356,10 @@ std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
const auto& weightDims = getWeightDims(); const auto& weightDims = getWeightDims();
const size_t wghOffset = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0; const size_t wghOffset = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0;
VectorDims paddings(paddingL.size()); VectorDims paddings(deconvAttrs.paddingL.size());
if (!autoPad) { if (!autoPad) {
for (size_t i = 0; i < paddings.size(); ++i) { for (size_t i = 0; i < paddings.size(); ++i) {
paddings[i] = paddingL[i] + paddingR[i]; paddings[i] = deconvAttrs.paddingL[i] + deconvAttrs.paddingR[i];
} }
} else { } else {
for (size_t i = 0; i < origInDims.size() - 2; i++) { for (size_t i = 0; i < origInDims.size() - 2; i++) {
@ -363,17 +368,17 @@ std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
// if input shape is dynamic and bounded, paddings should be computed basing on the following limitations: // if input shape is dynamic and bounded, paddings should be computed basing on the following limitations:
// 1. paddings must not be negative // 1. paddings must not be negative
// 2. the result padding must have such a value to keep the dummy dimensions inside the predefined interval // 2. the result padding must have such a value to keep the dummy dimensions inside the predefined interval
auto c1 = lastOutputSpatialDims[i] - outputPadding[i] - 1 - auto c1 = lastOutputSpatialDims[i] - deconvAttrs.outputPadding[i] - 1 -
(dilation[i] + 1) * static_cast<int32_t>(weightDims[wghOffset + 2 + i] - 1); (deconvAttrs.dilation[i] + 1) * static_cast<int32_t>(weightDims[wghOffset + 2 + i] - 1);
if (origInMaxDims[i + 2] != Shape::UNDEFINED_DIM) { if (origInMaxDims[i + 2] != Shape::UNDEFINED_DIM) {
auto upper_bound = stride[i] * static_cast<int32_t>(origInMaxDims[i + 2] - 1) - c1; auto upper_bound = deconvAttrs.stride[i] * static_cast<int32_t>(origInMaxDims[i + 2] - 1) - c1;
if (upper_bound < 0) { if (upper_bound < 0) {
IE_THROW() << errorPrefix << ": paddings for dummy shapes can't be computed"; IE_THROW() << errorPrefix << ": paddings for dummy shapes can't be computed";
} }
} }
auto lower_bound = stride[i] * static_cast<int32_t>(origInMinDims[i + 2] - 1) - c1; auto lower_bound = deconvAttrs.stride[i] * static_cast<int32_t>(origInMinDims[i + 2] - 1) - c1;
if (lower_bound > 0) { if (lower_bound > 0) {
paddings[i] = lower_bound; paddings[i] = lower_bound;
} }
@ -383,16 +388,16 @@ std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
for (size_t i = 0; i < inputDims.size() - 2; i++) { for (size_t i = 0; i < inputDims.size() - 2; i++) {
if (origInDims[2 + i] == Shape::UNDEFINED_DIM) { if (origInDims[2 + i] == Shape::UNDEFINED_DIM) {
inputDims[2 + i] = (lastOutputSpatialDims[i] - (dilation[i] + 1) * inputDims[2 + i] = (lastOutputSpatialDims[i] - (deconvAttrs.dilation[i] + 1) *
(weightDims[wghOffset + 2 + i] - 1) - 1 + paddings[i] - outputPadding[i]) / (weightDims[wghOffset + 2 + i] - 1) - 1 + paddings[i] - deconvAttrs.outputPadding[i]) /
stride[i] + 1; deconvAttrs.stride[i] + 1;
} }
} }
} }
inShape = Shape(inputDims); inShape = Shape(inputDims);
outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims)); outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
paddingL = shapeInference->get_pads_begin(); deconvAttrs.paddingL = shapeInference->get_pads_begin();
paddingR = shapeInference->get_pads_end(); deconvAttrs.paddingR = shapeInference->get_pads_end();
} }
return {inShape.getStaticDims(), outShape.getStaticDims()}; return {inShape.getStaticDims(), outShape.getStaticDims()};
} }
@ -420,7 +425,7 @@ void Deconvolution::getSupportedDescriptors() {
if (!descs.empty()) if (!descs.empty())
return; return;
isInt8 = canBeExecutedInInt8(); isInt8 = canBeExecutedInInt8();
withBiases = externOutShape ? getOriginalInputsNumber() == 4 : getOriginalInputsNumber() == 3; deconvAttrs.withBiasesParam = withBiases = externOutShape ? getOriginalInputsNumber() == 4 : getOriginalInputsNumber() == 3;
//ONEDNN deconvolution_fwd_t primitive can support bias fusing. //ONEDNN deconvolution_fwd_t primitive can support bias fusing.
//ONEDNN convolution_data_bwd_t can't support bias fusing. //ONEDNN convolution_data_bwd_t can't support bias fusing.
//Current only int8 precision choose deconvolution_fwd_t. //Current only int8 precision choose deconvolution_fwd_t.
@ -463,6 +468,41 @@ void Deconvolution::getSupportedDescriptors() {
Shape outShape(outDims); Shape outShape(outDims);
initPaddingR(inShape, outShape); initPaddingR(inShape, outShape);
#if defined(OV_CPU_WITH_ACL)
NodeConfig config;
config.inConfs.resize(getParentEdges().size());
config.outConfs.resize(getOriginalOutputsNumber());
auto& creatorsMap = BlockedDescCreator::getCommonCreators();
for (size_t i = 0; i < getParentEdges().size(); ++i) {
auto checkDesc = [&](LayoutType format) -> bool {
NodeConfig config;
config.inConfs.resize(getParentEdges().size());
config.outConfs.resize(getOriginalOutputsNumber());
for (size_t i = 0; i < getParentEdges().size(); ++i) {
config.inConfs[i].setMemDesc(
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(i), getInputShapeAtPort(i)));
}
config.outConfs[0].setMemDesc(
creatorsMap.at(format)->createSharedDesc(getOriginalOutputPrecisionAtPort(0), getOutputShapeAtPort(0)));
std::vector<MemoryDescPtr> srcMemoryDescs;
for (size_t i = 0; i < config.inConfs.size(); i++) {
srcMemoryDescs.push_back(config.inConfs[i].getMemDesc());
}
std::vector<MemoryDescPtr> dstMemoryDescs;
for (size_t i = 0; i < config.outConfs.size(); i++) {
dstMemoryDescs.push_back(config.outConfs[i].getMemDesc());
}
return AclDeconvExecutorBuilder::customIsSupported(deconvAttrs, srcMemoryDescs, dstMemoryDescs);
};
useACL = checkDesc(LayoutType::nspc) || checkDesc(LayoutType::ncsp);
}
if (useACL) return;
#endif
setPostOps(*attr, outShape.getStaticDims()); setPostOps(*attr, outShape.getStaticDims());
if (isInt8) { if (isInt8) {
@ -484,15 +524,14 @@ void Deconvolution::getSupportedDescriptors() {
} }
void Deconvolution::initPaddingR(const Shape &inShape, const Shape &outShape) { void Deconvolution::initPaddingR(const Shape &inShape, const Shape &outShape) {
for (size_t i = 0; i < paddingR.size(); i++) { for (size_t i = 0; i < deconvAttrs.paddingR.size(); i++) {
int with_group = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0; int with_group = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0;
const auto& weightDims = getWeightDims(); const auto& weightDims = getWeightDims();
int krn = weightDims[with_group + 2 + i]; int krn = weightDims[with_group + 2 + i];
int src = outShape.getStaticDims()[2 + i]; int src = outShape.getStaticDims()[2 + i];
int dst = inShape.getStaticDims()[2 + i]; int dst = inShape.getStaticDims()[2 + i];
krn = (krn - 1)*(deconvAttrs.dilation[i] + 1) + 1;
krn = (krn - 1)*(dilation[i] + 1) + 1; deconvAttrs.paddingR[i] = (dst - 1) * deconvAttrs.stride[i] - (src - krn + deconvAttrs.paddingL[i]);
paddingR[i] = (dst - 1) * stride[i] - (src - krn + paddingL[i]);
} }
} }
@ -594,6 +633,20 @@ VectorDims Deconvolution::shapeInferInternal(const VectorDims &inDims, std::vect
} }
void Deconvolution::execute(dnnl::stream strm) { void Deconvolution::execute(dnnl::stream strm) {
if (useACL) {
std::vector<MemoryCPtr> srcMemory;
for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
srcMemory.push_back(getParentEdgeAt(i)->getMemoryPtr());
}
std::vector<MemoryPtr> dstMemory;
for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
dstMemory.push_back(getChildEdgeAt(i)->getMemoryPtr());
}
//TODO: need to pass post ops data
execPtrDeconv->exec(srcMemory, dstMemory, nullptr);
return;
}
if (!execPtr) { if (!execPtr) {
IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled"; IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
} }
@ -774,7 +827,7 @@ void Deconvolution::createPrimitive() {
const AttrPtr pAttr = makePrimitiveAttr(outDims); const AttrPtr pAttr = makePrimitiveAttr(outDims);
auto prim_desc = createInt8MkldnnDeconvDesc(inDesc->getDnnlDesc(), wgh_candidate, dnnlBiasDesc, outDesc->getDnnlDesc(), withBiases, auto prim_desc = createInt8MkldnnDeconvDesc(inDesc->getDnnlDesc(), wgh_candidate, dnnlBiasDesc, outDesc->getDnnlDesc(), withBiases,
stride, dilation, paddingL, paddingR, *pAttr, getEngine()); deconvAttrs.stride, deconvAttrs.dilation, deconvAttrs.paddingL, deconvAttrs.paddingR, *pAttr, getEngine());
const bool found = DnnlExtensionUtils::find_implementation(prim_desc, selectedImpl); const bool found = DnnlExtensionUtils::find_implementation(prim_desc, selectedImpl);
@ -803,10 +856,26 @@ void Deconvolution::prepareParams() {
IE_THROW() << "Input memory has not been allocated."; IE_THROW() << "Input memory has not been allocated.";
if (!wghMemPtr || !wghMemPtr->isAllocated()) if (!wghMemPtr || !wghMemPtr->isAllocated())
IE_THROW() << "Weight memory has not been allocated."; IE_THROW() << "Weight memory has not been allocated.";
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr) if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
if (useACL) {
std::vector<MemoryDescPtr> srcMemoryDescs;
for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
srcMemoryDescs.push_back(getParentEdgesAtPort(i).front()->getMemory().getDescWithType<DnnlMemoryDesc>());
}
std::vector<MemoryDescPtr> dstMemoryDescs;
for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
dstMemoryDescs.push_back(getChildEdgesAtPort(i).front()->getMemory().getDescWithType<DnnlMemoryDesc>());
}
execPtrDeconv = selected_pd->getExecutorFactoryAs<DeconvExecutorFactory>()->makeExecutor(deconvAttrs, srcMemoryDescs,
dstMemoryDescs, *attr);
selected_pd->setImplementationType(execPtrDeconv->getImplType());
return;
}
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().getDescWithType<DnnlMemoryDesc>(); auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().getDescWithType<DnnlMemoryDesc>();
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().getDescWithType<DnnlMemoryDesc>(); auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().getDescWithType<DnnlMemoryDesc>();
@ -817,8 +886,8 @@ void Deconvolution::prepareParams() {
} }
pAttrLocal = pAttr; pAttrLocal = pAttr;
if (autoPad || externOutShape) { if (autoPad || externOutShape) {
paddingL = shapeInference->get_pads_begin(); deconvAttrs.paddingL = shapeInference->get_pads_begin();
paddingR = shapeInference->get_pads_end(); deconvAttrs.paddingR = shapeInference->get_pads_end();
} }
initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape()); initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
} else { } else {
@ -846,10 +915,10 @@ void Deconvolution::prepareParams() {
wghDesc, wghDesc,
biasDesc, biasDesc,
outMemoryDesc, outMemoryDesc,
stride, deconvAttrs.stride,
dilation, deconvAttrs.dilation,
paddingL, deconvAttrs.paddingL,
paddingR, deconvAttrs.paddingR,
isInt8, isInt8,
*pAttrLocal, *pAttrLocal,
selected_pd->getImplementationType()}; selected_pd->getImplementationType()};
@ -1006,14 +1075,16 @@ void Deconvolution::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc
} }
dnnl::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); dnnl::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, bias_candidate, descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, bias_candidate,
out_candidate, withBiases, stride, dilation, paddingL, paddingR, *attr, getEngine())); out_candidate, withBiases, deconvAttrs.stride, deconvAttrs.dilation,
deconvAttrs.paddingL, deconvAttrs.paddingR, *attr, getEngine()));
} else { } else {
dnnl::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(getWeightDims()), dnnl::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(getWeightDims()),
dnnlInDesc.getDataType(), memory::format_tag::any); dnnlInDesc.getDataType(), memory::format_tag::any);
convolution_backward_data::primitive_desc deconv_desc; convolution_backward_data::primitive_desc deconv_desc;
convolution_forward::primitive_desc fwd_conv_pd; convolution_forward::primitive_desc fwd_conv_pd;
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, dnnl::algorithm::convolution_direct, std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, dnnl::algorithm::convolution_direct,
stride, dilation, paddingL, paddingR, *attr, getEngine()); deconvAttrs.stride, deconvAttrs.dilation, deconvAttrs.paddingL,
deconvAttrs.paddingR, *attr, getEngine());
IE_ASSERT(fwd_conv_pd && deconv_desc && deconv_desc.get(true) != nullptr) IE_ASSERT(fwd_conv_pd && deconv_desc && deconv_desc.get(true) != nullptr)
<< "Failed to create convolution_backward_data::primitive_desc: " << "Node: ##" << getName(); << "Failed to create convolution_backward_data::primitive_desc: " << "Node: ##" << getName();
fwdConvPD.push_back(fwd_conv_pd); // oneDNN requires forward pd to exists until primitive is created fwdConvPD.push_back(fwd_conv_pd); // oneDNN requires forward pd to exists until primitive is created
@ -1120,6 +1191,44 @@ bool Deconvolution::canFuseBias() const {
(externOutShape ? getParentEdges().size() == 3 : getParentEdges().size() == 2)); (externOutShape ? getParentEdges().size() == 3 : getParentEdges().size() == 2));
} }
void Deconvolution::initSupportedPrimitiveDescriptors() {
if (!useACL) {
Node::initSupportedPrimitiveDescriptors();
return;
}
auto& creatorsMap = BlockedDescCreator::getCommonCreators();
auto pushDesc = [&](LayoutType format) {
NodeConfig config;
config.inConfs.resize(getParentEdges().size());
config.outConfs.resize(getOriginalOutputsNumber());
for (size_t i = 0; i < getParentEdges().size(); ++i) {
config.inConfs[i].setMemDesc(
// ACL expected equal precision
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getInputShapeAtPort(i)));
}
config.outConfs[0].setMemDesc(
// ACL expected equal precision
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getOutputShapeAtPort(0)));
std::vector<MemoryDescPtr> srcMemoryDescs;
for (size_t i = 0; i < config.inConfs.size(); i++) {
srcMemoryDescs.push_back(config.inConfs[i].getMemDesc());
}
std::vector<MemoryDescPtr> dstMemoryDescs;
for (size_t i = 0; i < config.outConfs.size(); i++) {
dstMemoryDescs.push_back(config.outConfs[i].getMemDesc());
}
auto factory = std::make_shared<DeconvExecutorFactory>(deconvAttrs, srcMemoryDescs, dstMemoryDescs,
std::make_shared<ExecutorContext>(context, getImplPriority()));
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::acl, factory);
};
pushDesc(LayoutType::ncsp);
}
} // namespace node } // namespace node
} // namespace intel_cpu } // namespace intel_cpu

View File

@ -11,6 +11,8 @@
#include <vector> #include <vector>
#include "common/dnnl_executor.h" #include "common/dnnl_executor.h"
#include "executors/deconv_list.hpp"
namespace ov { namespace ov {
namespace intel_cpu { namespace intel_cpu {
namespace node { namespace node {
@ -20,6 +22,7 @@ public:
Deconvolution(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context); Deconvolution(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
void getSupportedDescriptors() override; void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc, void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
const std::vector<MemoryDescPtr>& outputDesc) override; const std::vector<MemoryDescPtr>& outputDesc) override;
void createPrimitive() override; void createPrimitive() override;
@ -41,7 +44,7 @@ public:
bool canFuse(const NodePtr& node) const override; bool canFuse(const NodePtr& node) const override;
const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); } const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
const std::vector<ptrdiff_t>& getStride() const { return stride; } const std::vector<ptrdiff_t>& getStride() const { return deconvAttrs.stride; }
void prepareParams() override; void prepareParams() override;
void execute(dnnl::stream strm) override; void execute(dnnl::stream strm) override;
@ -55,6 +58,7 @@ protected:
AttrPtr initPrimitiveAttr() override; AttrPtr initPrimitiveAttr() override;
AttrPtr makePrimitiveAttr(const VectorDims& dims); AttrPtr makePrimitiveAttr(const VectorDims& dims);
std::vector<dnnl::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const override; std::vector<dnnl::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const override;
std::shared_ptr<DeconvExecutor> execPtrDeconv = nullptr;
private: private:
using executorPtr = std::shared_ptr<DnnlExecutor>; using executorPtr = std::shared_ptr<DnnlExecutor>;
@ -89,16 +93,13 @@ private:
size_t groupNum = 1; size_t groupNum = 1;
size_t IC = 0; size_t IC = 0;
size_t OC = 0; size_t OC = 0;
std::vector<ptrdiff_t> kernel;
std::vector<ptrdiff_t> stride;
std::vector<ptrdiff_t> dilation;
ov::CoordinateDiff paddingL;
ov::CoordinateDiff paddingR;
ov::CoordinateDiff outputPadding;
std::vector<int32_t> lastOutputSpatialDims; std::vector<int32_t> lastOutputSpatialDims;
VectorDims int8WeightDims; VectorDims int8WeightDims;
VectorDims expectedBiasDims {}; VectorDims expectedBiasDims {};
bool useACL = false;
DeconvAttrs deconvAttrs;
Shape inShape; Shape inShape;
AttrPtr pAttr; AttrPtr pAttr;

View File

@ -0,0 +1,248 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "acl_deconv.hpp"
#include "ie_parallel.hpp"
namespace ov {
namespace intel_cpu {
using namespace arm_compute;
ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs) {
auto srcDims = srcDescs[0]->getShape().getDims();
auto weiDims = srcDescs[1]->getShape().getDims();
// swap input and output channels dimensions to be align with ACL
// weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
std::swap(weiDims[0], weiDims[1]);
auto dstDims = dstDescs[0]->getShape().getDims();
VectorDims biasDims;
TensorInfo biasTensorInfo;
if (deconvAttrs.withBiasesParam) {
biasDims = srcDescs[2]->getShape().getStaticDims();
biasTensorInfo = TensorInfo(shapeCast(biasDims), 1,
precisionToAclDataType(srcDescs[2]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2]));
}
TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1,
precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
TensorInfo weiTensorInfo = TensorInfo(shapeCast(weiDims), 1,
precisionToAclDataType(srcDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[1]));
TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1,
precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));
unsigned int pad_l =
(deconvAttrs.paddingL.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingL.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
unsigned int pad_r =
(deconvAttrs.paddingR.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingR.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingR.at(0));
unsigned int pad_t = static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
unsigned int pad_b = static_cast<unsigned int>(deconvAttrs.paddingR.at(0));
unsigned int stride_x = (deconvAttrs.stride.size() > 1) ? deconvAttrs.stride.at(1) : deconvAttrs.stride.at(0);
unsigned int stride_y = deconvAttrs.stride.at(0);
PadStrideInfo deconv_info(stride_x, stride_y, pad_l, pad_r, pad_t, pad_b, DimensionRoundingType::FLOOR);
return ACLDeconvTensorInfo{srcTensorInfo, weiTensorInfo, biasTensorInfo, dstTensorInfo, deconv_info};
}
AclDeconvExecutor::AclDeconvExecutor(const ExecutorContext::CPtr context) : DeconvExecutor(context) {}
bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) {
this->deconvAttrs = deconvAttrs;
ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo;
TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo;
TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;
arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
&weiTensorInfo,
deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
&dstTensorInfo,
deconv_info);
if (!status) {
DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
return false;
}
srcTensor.allocator()->init(srcTensorInfo);
weiTensor.allocator()->init(weiTensorInfo);
dstTensor.allocator()->init(dstTensorInfo);
if (deconvAttrs.withBiasesParam)
biasTensor.allocator()->init(biasTensorInfo);
deconv = std::make_unique<arm_compute::NEDeconvolutionLayer>();
deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info);
// weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
weiBuffer = std::vector<float>(srcDescs[1]->getShape().getStaticDims()[0] *
srcDescs[1]->getShape().getStaticDims()[1] *
srcDescs[1]->getShape().getStaticDims()[2] *
srcDescs[1]->getShape().getStaticDims()[3]);
return true;
}
static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
const auto src_data = reinterpret_cast<float*>(srcMemPtr->getData());
const int DIM0 = srcMemPtr->getStaticDims()[0];
const int DIM1 = srcMemPtr->getStaticDims()[1];
const int DIM2 = srcMemPtr->getStaticDims()[2];
const int DIM3 = srcMemPtr->getStaticDims()[3];
parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) {
for (int dim3 = 0; dim3 < DIM3; ++dim3) {
const int src_off = dim0 * DIM1 * DIM2 * DIM3 +
dim1 * DIM2 * DIM3 +
dim2 * DIM3 +
dim3;
const int dst_off = dim1 * DIM0 * DIM2 * DIM3 +
dim0 * DIM2 * DIM3 +
dim2 * DIM3 +
dim3;
dst_data[dst_off] = src_data[src_off];
}
});
}
void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) {
// TODO: Remove transpose from exec
transpose_to_1023(src[1], weiBuffer);
srcTensor.allocator()->import_memory(src[0]->getData());
dstTensor.allocator()->import_memory(dst[0]->getData());
weiTensor.allocator()->import_memory(weiBuffer.data());
if (deconvAttrs.withBiasesParam)
biasTensor.allocator()->import_memory(src[2]->getData());
deconv->run();
srcTensor.allocator()->free();
dstTensor.allocator()->free();
weiTensor.allocator()->free();
if (deconvAttrs.withBiasesParam)
biasTensor.allocator()->free();
}
bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs,
const std::vector<MemoryDescPtr> &srcDescs,
const std::vector<MemoryDescPtr> &dstDescs) {
if ((srcDescs[0]->getShape().getDims().size() != 3 && srcDescs[0]->getShape().getDims().size() != 4) ||
dstDescs[0]->getShape().getDims().size() != srcDescs[0]->getShape().getDims().size() ||
srcDescs[1]->getShape().getDims().size() != 4) {
DEBUG_LOG("AclDeconvExecutor does not support dimension:",
" src[0]=", srcDescs[0]->getShape().getDims().size(),
" src[1]=", srcDescs[1]->getShape().getDims().size(),
" dst[0]=", dstDescs[0]->getShape().getDims().size());
return false;
}
// TODO: Ticket CVS-114087 - enable FP16 when check FP16 scoup
if (!(one_of(srcDescs[0]->getPrecision(), /*InferenceEngine::Precision::FP16, */InferenceEngine::Precision::FP32) &&
srcDescs[0]->getPrecision() == srcDescs[1]->getPrecision() &&
srcDescs[1]->getPrecision() == dstDescs[0]->getPrecision())) {
DEBUG_LOG("AclDeconvExecutor does not support precisions:",
" src[0]=", srcDescs[0]->getPrecision(),
" src[1]=", srcDescs[1]->getPrecision(),
" dst[0]=", dstDescs[0]->getPrecision());
return false;
}
if (deconvAttrs.withBiasesParam && srcDescs[2]->getPrecision() != srcDescs[0]->getPrecision()) {
DEBUG_LOG("AclDeconvExecutor does not support precisions:",
" src[2]=", srcDescs[2]->getPrecision());
return false;
}
if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) &&
srcDescs[1]->hasLayoutType(LayoutType::ncsp) &&
dstDescs[0]->hasLayoutType(LayoutType::ncsp)) &&
!(srcDescs[0]->hasLayoutType(LayoutType::nspc) &&
srcDescs[1]->hasLayoutType(LayoutType::nspc) &&
dstDescs[0]->hasLayoutType(LayoutType::nspc))) {
DEBUG_LOG("AclDeconvExecutor does not support layouts:",
" src[0]=", srcDescs[0]->serializeFormat(),
" src[1]=", srcDescs[1]->serializeFormat(),
" dst=", dstDescs[0]->serializeFormat());
return false;
}
if (deconvAttrs.withBiasesParam &&
!(srcDescs[2]->hasLayoutType(LayoutType::ncsp)) &&
!(srcDescs[2]->hasLayoutType(LayoutType::nspc))) {
DEBUG_LOG("AclDeconvExecutor does not support layouts:",
" src[0]=", srcDescs[0]->serializeFormat(),
" src[1]=", srcDescs[1]->serializeFormat(),
" src[2]=", srcDescs[2]->serializeFormat(),
" dst=", dstDescs[0]->serializeFormat());
return false;
}
ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo;
TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo;
TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;
unsigned int kernel_x = (deconvAttrs.kernel.size() > 1) ? deconvAttrs.kernel.at(1) : deconvAttrs.kernel.at(0);
unsigned int kernel_y = deconvAttrs.kernel.at(0);
// After stride=8 up-sampling in ACL Deconvolution layer slower than reference
if (deconv_info.stride().first >= 8 || deconv_info.stride().second >= 8) return false;
unsigned int dilation_x = (deconvAttrs.dilation.size() > 1) ? deconvAttrs.dilation.at(1) : deconvAttrs.dilation.at(0);
unsigned int dilation_y = deconvAttrs.dilation.at(0);
if (!one_of(dilation_x, static_cast<unsigned int >(0), static_cast<unsigned int >(1)) ||
!one_of(dilation_y, static_cast<unsigned int >(0), static_cast<unsigned int >(1))) return false;
size_t in_h = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[2] : srcDescs[0]->getShape().getDims()[1];
size_t in_w = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[3] : srcDescs[0]->getShape().getDims()[2];
// Validate function has bug (https://github.com/ARM-software/ComputeLibrary/issues/1061) with error exception.
// We copy deconvolution_output_dimensions function for get correct validation
// TODO: remove after fix
if (validate_deconvolution_output_dimensions(in_w, in_h, kernel_x, kernel_y, deconv_info)) {
DEBUG_LOG("NEDeconvolutionLayer arm_compute::deconvolution_output_dimensions failed");
return false;
}
arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
&weiTensorInfo,
deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
&dstTensorInfo,
deconv_info);
if (!status) {
DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
return false;
}
return true;
}
bool AclDeconvExecutorBuilder::validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
unsigned int kernel_width,
unsigned int kernel_height,
const PadStrideInfo &pad_stride_info) {
const unsigned int pad_left = pad_stride_info.pad_left();
const unsigned int pad_top = pad_stride_info.pad_top();
const unsigned int pad_right = pad_stride_info.pad_right();
const unsigned int pad_bottom = pad_stride_info.pad_bottom();
const unsigned int stride_x = pad_stride_info.stride().first;
const unsigned int stride_y = pad_stride_info.stride().second;
if (!((in_width < 1 || in_height < 1) ||
(((in_width - 1) * stride_x + kernel_width) < (pad_left + pad_right)) ||
(((in_height - 1) * stride_y + kernel_height) < (pad_top + pad_bottom)))) { return false; }
return true;
}
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,78 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "nodes/executors/deconv.hpp"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "utils/debug_capabilities.h"
#include "acl_utils.hpp"
namespace ov {
namespace intel_cpu {
struct ACLDeconvTensorInfo {
arm_compute::TensorInfo srcTensorInfo;
arm_compute::TensorInfo weiTensorInfo;
arm_compute::TensorInfo biasTensorInfo;
arm_compute::TensorInfo dstTensorInfo;
arm_compute::PadStrideInfo deconv_info;
};
ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs);
class AclDeconvExecutor : public DeconvExecutor {
public:
explicit AclDeconvExecutor(const ExecutorContext::CPtr context);
bool init(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) override;
void exec(const std::vector<MemoryCPtr>& src,
const std::vector<MemoryPtr>& dst,
const void *post_ops_data_) override;
impl_desc_type getImplType() const override {
return implType;
}
private:
DeconvAttrs deconvAttrs;
impl_desc_type implType = impl_desc_type::acl;
arm_compute::Tensor srcTensor;
arm_compute::Tensor weiTensor;
arm_compute::Tensor biasTensor;
arm_compute::Tensor dstTensor;
std::unique_ptr<arm_compute::NEDeconvolutionLayer> deconv = nullptr;
std::vector<float> weiBuffer;
};
class AclDeconvExecutorBuilder : public DeconvExecutorBuilder {
public:
static bool customIsSupported(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs);
bool isSupported(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs) const override {
return customIsSupported(deconvAttrs, srcDescs, dstDescs);
}
DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override {
return std::make_shared<AclDeconvExecutor>(context);
}
private:
static bool validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
unsigned int kernel_width, unsigned int kernel_height,
const arm_compute::PadStrideInfo &pad_stride_info);
};
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,13 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "deconv.hpp"
namespace ov {
namespace intel_cpu {
using namespace InferenceEngine;
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,60 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cpu_memory.h"
#include "onednn/iml_type_mapper.h"
#include "executor.hpp"
#include <common/primitive_hashing_utils.hpp>
#include "nodes/common/dnnl_executor.h"
namespace ov {
namespace intel_cpu {
struct DeconvAttrs {
std::vector<ptrdiff_t> kernel;
std::vector<ptrdiff_t> stride;
std::vector<ptrdiff_t> dilation;
std::vector<ptrdiff_t> paddingL;
std::vector<ptrdiff_t> paddingR;
ov::CoordinateDiff outputPadding;
bool withBiasesParam = false;
};
class DeconvExecutor {
public:
explicit DeconvExecutor(const ExecutorContext::CPtr context) : context(context) {}
virtual bool init(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) = 0;
virtual void exec(const std::vector<MemoryCPtr>& src,
const std::vector<MemoryPtr>& dst,
const void *post_ops_data_) = 0;
virtual ~DeconvExecutor() = default;
virtual impl_desc_type getImplType() const = 0;
protected:
DeconvAttrs deconvAttrs;
ExecutorContext::CPtr context;
};
using DeconvExecutorPtr = std::shared_ptr<DeconvExecutor>;
using DeconvExecutorCPtr = std::shared_ptr<const DeconvExecutor>;
class DeconvExecutorBuilder {
public:
~DeconvExecutorBuilder() = default;
virtual bool isSupported(const DeconvAttrs& convAttrs, const std::vector<MemoryDescPtr>& srcDescs, const std::vector<MemoryDescPtr>& dstDescs) const = 0;
virtual DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const = 0;
};
using DeconvExecutorBuilderPtr = std::shared_ptr<DeconvExecutorBuilder>;
using DeconvExecutorBuilderCPtr = std::shared_ptr<const DeconvExecutorBuilder>;
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,19 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "deconv_list.hpp"
namespace ov {
namespace intel_cpu {
const std::vector<DeconvExecutorDesc>& getDeconvExecutorsList() {
static std::vector<DeconvExecutorDesc> descs = {
OV_CPU_INSTANCE_ACL(ExecutorType::Acl, std::make_shared<AclDeconvExecutorBuilder>())
};
return descs;
}
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,79 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "executor.hpp"
#include "deconv.hpp"
#if defined(OV_CPU_WITH_ACL)
#include "acl/acl_deconv.hpp"
#endif
#include "onednn/iml_type_mapper.h"
#include "common/primitive_cache.hpp"
namespace ov {
namespace intel_cpu {
struct DeconvExecutorDesc {
ExecutorType executorType;
DeconvExecutorBuilderCPtr builder;
};
const std::vector<DeconvExecutorDesc>& getDeconvExecutorsList();
class DeconvExecutorFactory : public ExecutorFactory {
public:
DeconvExecutorFactory(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const ExecutorContext::CPtr context) : ExecutorFactory(context) {
for (auto& desc : getDeconvExecutorsList()) {
if (desc.builder->isSupported(deconvAttrs, srcDescs, dstDescs)) {
supportedDescs.push_back(desc);
}
}
}
~DeconvExecutorFactory() = default;
virtual DeconvExecutorPtr makeExecutor(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) {
auto build = [&](const DeconvExecutorDesc* desc) {
auto executor = desc->builder->makeExecutor(context);
if (executor->init(deconvAttrs, srcDescs, dstDescs, attr)) {
return executor;
}
DeconvExecutorPtr ptr = nullptr;
return ptr;
};
if (chosenDesc) {
if (auto executor = build(chosenDesc)) {
return executor;
}
}
for (const auto& sd : supportedDescs) {
if (auto executor = build(&sd)) {
chosenDesc = &sd;
return executor;
}
}
IE_THROW() << "DeconvExecutorFactory: Supported executor is not found";
}
private:
std::vector<DeconvExecutorDesc> supportedDescs;
const DeconvExecutorDesc* chosenDesc = nullptr;
};
using DeconvExecutorFactoryPtr = std::shared_ptr<DeconvExecutorFactory>;
using DeconvExecutorFactoryCPtr = std::shared_ptr<const DeconvExecutorFactory>;
} // namespace intel_cpu
} // namespace ov