[ARM CPU] Add ACL deconvolution operation (#18655)
This commit is contained in:
parent
d51fc7adad
commit
f3bafef128
@ -23,6 +23,11 @@
|
||||
#include <common/primitive_desc_iface.hpp>
|
||||
#include <utils/shape_inference/shape_inference_ngraph.hpp>
|
||||
|
||||
#if defined(OV_CPU_WITH_ACL)
|
||||
#include "executors/acl/acl_utils.hpp"
|
||||
#include "utils/debug_capabilities.h"
|
||||
#endif
|
||||
|
||||
#include <oneapi/dnnl/dnnl.hpp>
|
||||
|
||||
#include <string>
|
||||
@ -174,15 +179,15 @@ Deconvolution::Deconvolution(const std::shared_ptr<ngraph::Node>& op,
|
||||
withGroups = false;
|
||||
|
||||
for (size_t i = 0; i < convBackprop->get_strides().size(); i++) {
|
||||
stride.push_back(static_cast<ptrdiff_t>(convBackprop->get_strides()[i]));
|
||||
deconvAttrs.stride.push_back(static_cast<ptrdiff_t>(convBackprop->get_strides()[i]));
|
||||
}
|
||||
for (size_t i = 0; i < convBackprop->get_dilations().size(); i++) {
|
||||
dilation.push_back(static_cast<ptrdiff_t>(convBackprop->get_dilations()[i]) - 1);
|
||||
deconvAttrs.dilation.push_back(static_cast<ptrdiff_t>(convBackprop->get_dilations()[i]) - 1);
|
||||
}
|
||||
paddingL = convBackprop->get_pads_begin();
|
||||
paddingR = convBackprop->get_pads_end();
|
||||
deconvAttrs.paddingL = convBackprop->get_pads_begin();
|
||||
deconvAttrs.paddingR = convBackprop->get_pads_end();
|
||||
|
||||
outputPadding = convBackprop->get_output_padding();
|
||||
deconvAttrs.outputPadding = convBackprop->get_output_padding();
|
||||
|
||||
autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
|
||||
} else if (auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op)) {
|
||||
@ -196,20 +201,20 @@ Deconvolution::Deconvolution(const std::shared_ptr<ngraph::Node>& op,
|
||||
isDW = withGroups && groupNum == OC && groupNum == IC;
|
||||
|
||||
for (size_t i = 0; i < groupConvBackprop->get_strides().size(); i++) {
|
||||
stride.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_strides()[i]));
|
||||
deconvAttrs.stride.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_strides()[i]));
|
||||
}
|
||||
for (size_t i = 0; i < groupConvBackprop->get_dilations().size(); i++) {
|
||||
dilation.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_dilations()[i]) - 1);
|
||||
deconvAttrs.dilation.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_dilations()[i]) - 1);
|
||||
}
|
||||
paddingL = groupConvBackprop->get_pads_begin();
|
||||
paddingR = groupConvBackprop->get_pads_end();
|
||||
deconvAttrs.paddingL = groupConvBackprop->get_pads_begin();
|
||||
deconvAttrs.paddingR = groupConvBackprop->get_pads_end();
|
||||
|
||||
outputPadding = groupConvBackprop->get_output_padding();
|
||||
deconvAttrs.outputPadding = groupConvBackprop->get_output_padding();
|
||||
|
||||
autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
|
||||
}
|
||||
for (size_t i = 0; i < dilation.size(); i++) {
|
||||
kernel.push_back(weightDims[withGroups + 2 + i]);
|
||||
for (size_t i = 0; i < deconvAttrs.dilation.size(); i++) {
|
||||
deconvAttrs.kernel.push_back(weightDims[withGroups + 2 + i]);
|
||||
}
|
||||
|
||||
externOutShape = inputShapes.size() == 3;
|
||||
@ -274,7 +279,7 @@ bool Deconvolution::canBeExecutedInInt8() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!withGroups && stride.back() > 3)
|
||||
if (!withGroups && deconvAttrs.stride.back() > 3)
|
||||
return false;
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) {
|
||||
const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
|
||||
@ -291,8 +296,8 @@ bool Deconvolution::canBeExecutedInInt8() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kernel.size(); i++) {
|
||||
if (kernel[i] < stride[i])
|
||||
for (size_t i = 0; i < deconvAttrs.kernel.size(); i++) {
|
||||
if (deconvAttrs.kernel[i] < deconvAttrs.stride[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -301,7 +306,7 @@ bool Deconvolution::canBeExecutedInInt8() const {
|
||||
: impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) ? 8 : 4;
|
||||
if (withGroups && !isDW && (IC % channelBlock != 0 || OC % channelBlock != 0))
|
||||
return false;
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) && stride.back() > 3)
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core) && deconvAttrs.stride.back() > 3)
|
||||
return false;
|
||||
|
||||
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
|
||||
@ -310,7 +315,7 @@ bool Deconvolution::canBeExecutedInInt8() const {
|
||||
InferenceEngine::Precision weiPrecision = getOriginalInputPrecisionAtPort(1);
|
||||
auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(weiPrecision);
|
||||
|
||||
if (isDW && (inputDataType == dnnl_s8 || dilation.size() == 3))
|
||||
if (isDW && (inputDataType == dnnl_s8 || deconvAttrs.dilation.size() == 3))
|
||||
return false;
|
||||
|
||||
return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8;
|
||||
@ -351,10 +356,10 @@ std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
|
||||
const auto& weightDims = getWeightDims();
|
||||
const size_t wghOffset = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0;
|
||||
|
||||
VectorDims paddings(paddingL.size());
|
||||
VectorDims paddings(deconvAttrs.paddingL.size());
|
||||
if (!autoPad) {
|
||||
for (size_t i = 0; i < paddings.size(); ++i) {
|
||||
paddings[i] = paddingL[i] + paddingR[i];
|
||||
paddings[i] = deconvAttrs.paddingL[i] + deconvAttrs.paddingR[i];
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < origInDims.size() - 2; i++) {
|
||||
@ -363,17 +368,17 @@ std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
|
||||
// if input shape is dynamic and bounded, paddings should be computed basing on the following limitations:
|
||||
// 1. paddings must not be negative
|
||||
// 2. the result padding must have such a value to keep the dummy dimensions inside the predefined interval
|
||||
auto c1 = lastOutputSpatialDims[i] - outputPadding[i] - 1 -
|
||||
(dilation[i] + 1) * static_cast<int32_t>(weightDims[wghOffset + 2 + i] - 1);
|
||||
auto c1 = lastOutputSpatialDims[i] - deconvAttrs.outputPadding[i] - 1 -
|
||||
(deconvAttrs.dilation[i] + 1) * static_cast<int32_t>(weightDims[wghOffset + 2 + i] - 1);
|
||||
|
||||
if (origInMaxDims[i + 2] != Shape::UNDEFINED_DIM) {
|
||||
auto upper_bound = stride[i] * static_cast<int32_t>(origInMaxDims[i + 2] - 1) - c1;
|
||||
auto upper_bound = deconvAttrs.stride[i] * static_cast<int32_t>(origInMaxDims[i + 2] - 1) - c1;
|
||||
if (upper_bound < 0) {
|
||||
IE_THROW() << errorPrefix << ": paddings for dummy shapes can't be computed";
|
||||
}
|
||||
}
|
||||
|
||||
auto lower_bound = stride[i] * static_cast<int32_t>(origInMinDims[i + 2] - 1) - c1;
|
||||
auto lower_bound = deconvAttrs.stride[i] * static_cast<int32_t>(origInMinDims[i + 2] - 1) - c1;
|
||||
if (lower_bound > 0) {
|
||||
paddings[i] = lower_bound;
|
||||
}
|
||||
@ -383,16 +388,16 @@ std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
|
||||
|
||||
for (size_t i = 0; i < inputDims.size() - 2; i++) {
|
||||
if (origInDims[2 + i] == Shape::UNDEFINED_DIM) {
|
||||
inputDims[2 + i] = (lastOutputSpatialDims[i] - (dilation[i] + 1) *
|
||||
(weightDims[wghOffset + 2 + i] - 1) - 1 + paddings[i] - outputPadding[i]) /
|
||||
stride[i] + 1;
|
||||
inputDims[2 + i] = (lastOutputSpatialDims[i] - (deconvAttrs.dilation[i] + 1) *
|
||||
(weightDims[wghOffset + 2 + i] - 1) - 1 + paddings[i] - deconvAttrs.outputPadding[i]) /
|
||||
deconvAttrs.stride[i] + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
inShape = Shape(inputDims);
|
||||
outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
|
||||
paddingL = shapeInference->get_pads_begin();
|
||||
paddingR = shapeInference->get_pads_end();
|
||||
deconvAttrs.paddingL = shapeInference->get_pads_begin();
|
||||
deconvAttrs.paddingR = shapeInference->get_pads_end();
|
||||
}
|
||||
return {inShape.getStaticDims(), outShape.getStaticDims()};
|
||||
}
|
||||
@ -420,7 +425,7 @@ void Deconvolution::getSupportedDescriptors() {
|
||||
if (!descs.empty())
|
||||
return;
|
||||
isInt8 = canBeExecutedInInt8();
|
||||
withBiases = externOutShape ? getOriginalInputsNumber() == 4 : getOriginalInputsNumber() == 3;
|
||||
deconvAttrs.withBiasesParam = withBiases = externOutShape ? getOriginalInputsNumber() == 4 : getOriginalInputsNumber() == 3;
|
||||
//ONEDNN deconvolution_fwd_t primitive can support bias fusing.
|
||||
//ONEDNN convolution_data_bwd_t can't support bias fusing.
|
||||
//Current only int8 precision choose deconvolution_fwd_t.
|
||||
@ -463,6 +468,41 @@ void Deconvolution::getSupportedDescriptors() {
|
||||
Shape outShape(outDims);
|
||||
initPaddingR(inShape, outShape);
|
||||
|
||||
#if defined(OV_CPU_WITH_ACL)
|
||||
NodeConfig config;
|
||||
config.inConfs.resize(getParentEdges().size());
|
||||
config.outConfs.resize(getOriginalOutputsNumber());
|
||||
|
||||
auto& creatorsMap = BlockedDescCreator::getCommonCreators();
|
||||
for (size_t i = 0; i < getParentEdges().size(); ++i) {
|
||||
auto checkDesc = [&](LayoutType format) -> bool {
|
||||
NodeConfig config;
|
||||
config.inConfs.resize(getParentEdges().size());
|
||||
config.outConfs.resize(getOriginalOutputsNumber());
|
||||
|
||||
for (size_t i = 0; i < getParentEdges().size(); ++i) {
|
||||
config.inConfs[i].setMemDesc(
|
||||
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(i), getInputShapeAtPort(i)));
|
||||
}
|
||||
config.outConfs[0].setMemDesc(
|
||||
creatorsMap.at(format)->createSharedDesc(getOriginalOutputPrecisionAtPort(0), getOutputShapeAtPort(0)));
|
||||
|
||||
std::vector<MemoryDescPtr> srcMemoryDescs;
|
||||
for (size_t i = 0; i < config.inConfs.size(); i++) {
|
||||
srcMemoryDescs.push_back(config.inConfs[i].getMemDesc());
|
||||
}
|
||||
std::vector<MemoryDescPtr> dstMemoryDescs;
|
||||
for (size_t i = 0; i < config.outConfs.size(); i++) {
|
||||
dstMemoryDescs.push_back(config.outConfs[i].getMemDesc());
|
||||
}
|
||||
|
||||
return AclDeconvExecutorBuilder::customIsSupported(deconvAttrs, srcMemoryDescs, dstMemoryDescs);
|
||||
};
|
||||
useACL = checkDesc(LayoutType::nspc) || checkDesc(LayoutType::ncsp);
|
||||
}
|
||||
if (useACL) return;
|
||||
#endif
|
||||
|
||||
setPostOps(*attr, outShape.getStaticDims());
|
||||
|
||||
if (isInt8) {
|
||||
@ -484,15 +524,14 @@ void Deconvolution::getSupportedDescriptors() {
|
||||
}
|
||||
|
||||
void Deconvolution::initPaddingR(const Shape &inShape, const Shape &outShape) {
|
||||
for (size_t i = 0; i < paddingR.size(); i++) {
|
||||
for (size_t i = 0; i < deconvAttrs.paddingR.size(); i++) {
|
||||
int with_group = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0;
|
||||
const auto& weightDims = getWeightDims();
|
||||
int krn = weightDims[with_group + 2 + i];
|
||||
int src = outShape.getStaticDims()[2 + i];
|
||||
int dst = inShape.getStaticDims()[2 + i];
|
||||
|
||||
krn = (krn - 1)*(dilation[i] + 1) + 1;
|
||||
paddingR[i] = (dst - 1) * stride[i] - (src - krn + paddingL[i]);
|
||||
krn = (krn - 1)*(deconvAttrs.dilation[i] + 1) + 1;
|
||||
deconvAttrs.paddingR[i] = (dst - 1) * deconvAttrs.stride[i] - (src - krn + deconvAttrs.paddingL[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -594,6 +633,20 @@ VectorDims Deconvolution::shapeInferInternal(const VectorDims &inDims, std::vect
|
||||
}
|
||||
|
||||
void Deconvolution::execute(dnnl::stream strm) {
|
||||
if (useACL) {
|
||||
std::vector<MemoryCPtr> srcMemory;
|
||||
for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
|
||||
srcMemory.push_back(getParentEdgeAt(i)->getMemoryPtr());
|
||||
}
|
||||
std::vector<MemoryPtr> dstMemory;
|
||||
for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
|
||||
dstMemory.push_back(getChildEdgeAt(i)->getMemoryPtr());
|
||||
}
|
||||
//TODO: need to pass post ops data
|
||||
execPtrDeconv->exec(srcMemory, dstMemory, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!execPtr) {
|
||||
IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
|
||||
}
|
||||
@ -774,7 +827,7 @@ void Deconvolution::createPrimitive() {
|
||||
|
||||
const AttrPtr pAttr = makePrimitiveAttr(outDims);
|
||||
auto prim_desc = createInt8MkldnnDeconvDesc(inDesc->getDnnlDesc(), wgh_candidate, dnnlBiasDesc, outDesc->getDnnlDesc(), withBiases,
|
||||
stride, dilation, paddingL, paddingR, *pAttr, getEngine());
|
||||
deconvAttrs.stride, deconvAttrs.dilation, deconvAttrs.paddingL, deconvAttrs.paddingR, *pAttr, getEngine());
|
||||
|
||||
const bool found = DnnlExtensionUtils::find_implementation(prim_desc, selectedImpl);
|
||||
|
||||
@ -803,10 +856,26 @@ void Deconvolution::prepareParams() {
|
||||
IE_THROW() << "Input memory has not been allocated.";
|
||||
if (!wghMemPtr || !wghMemPtr->isAllocated())
|
||||
IE_THROW() << "Weight memory has not been allocated.";
|
||||
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
|
||||
|
||||
if (useACL) {
|
||||
std::vector<MemoryDescPtr> srcMemoryDescs;
|
||||
for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
|
||||
srcMemoryDescs.push_back(getParentEdgesAtPort(i).front()->getMemory().getDescWithType<DnnlMemoryDesc>());
|
||||
}
|
||||
std::vector<MemoryDescPtr> dstMemoryDescs;
|
||||
for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
|
||||
dstMemoryDescs.push_back(getChildEdgesAtPort(i).front()->getMemory().getDescWithType<DnnlMemoryDesc>());
|
||||
}
|
||||
|
||||
execPtrDeconv = selected_pd->getExecutorFactoryAs<DeconvExecutorFactory>()->makeExecutor(deconvAttrs, srcMemoryDescs,
|
||||
dstMemoryDescs, *attr);
|
||||
selected_pd->setImplementationType(execPtrDeconv->getImplType());
|
||||
return;
|
||||
}
|
||||
|
||||
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().getDescWithType<DnnlMemoryDesc>();
|
||||
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().getDescWithType<DnnlMemoryDesc>();
|
||||
|
||||
@ -817,8 +886,8 @@ void Deconvolution::prepareParams() {
|
||||
}
|
||||
pAttrLocal = pAttr;
|
||||
if (autoPad || externOutShape) {
|
||||
paddingL = shapeInference->get_pads_begin();
|
||||
paddingR = shapeInference->get_pads_end();
|
||||
deconvAttrs.paddingL = shapeInference->get_pads_begin();
|
||||
deconvAttrs.paddingR = shapeInference->get_pads_end();
|
||||
}
|
||||
initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
|
||||
} else {
|
||||
@ -846,10 +915,10 @@ void Deconvolution::prepareParams() {
|
||||
wghDesc,
|
||||
biasDesc,
|
||||
outMemoryDesc,
|
||||
stride,
|
||||
dilation,
|
||||
paddingL,
|
||||
paddingR,
|
||||
deconvAttrs.stride,
|
||||
deconvAttrs.dilation,
|
||||
deconvAttrs.paddingL,
|
||||
deconvAttrs.paddingR,
|
||||
isInt8,
|
||||
*pAttrLocal,
|
||||
selected_pd->getImplementationType()};
|
||||
@ -1006,14 +1075,16 @@ void Deconvolution::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc
|
||||
}
|
||||
dnnl::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, bias_candidate,
|
||||
out_candidate, withBiases, stride, dilation, paddingL, paddingR, *attr, getEngine()));
|
||||
out_candidate, withBiases, deconvAttrs.stride, deconvAttrs.dilation,
|
||||
deconvAttrs.paddingL, deconvAttrs.paddingR, *attr, getEngine()));
|
||||
} else {
|
||||
dnnl::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(getWeightDims()),
|
||||
dnnlInDesc.getDataType(), memory::format_tag::any);
|
||||
convolution_backward_data::primitive_desc deconv_desc;
|
||||
convolution_forward::primitive_desc fwd_conv_pd;
|
||||
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, dnnl::algorithm::convolution_direct,
|
||||
stride, dilation, paddingL, paddingR, *attr, getEngine());
|
||||
deconvAttrs.stride, deconvAttrs.dilation, deconvAttrs.paddingL,
|
||||
deconvAttrs.paddingR, *attr, getEngine());
|
||||
IE_ASSERT(fwd_conv_pd && deconv_desc && deconv_desc.get(true) != nullptr)
|
||||
<< "Failed to create convolution_backward_data::primitive_desc: " << "Node: ##" << getName();
|
||||
fwdConvPD.push_back(fwd_conv_pd); // oneDNN requires forward pd to exists until primitive is created
|
||||
@ -1120,6 +1191,44 @@ bool Deconvolution::canFuseBias() const {
|
||||
(externOutShape ? getParentEdges().size() == 3 : getParentEdges().size() == 2));
|
||||
}
|
||||
|
||||
void Deconvolution::initSupportedPrimitiveDescriptors() {
|
||||
if (!useACL) {
|
||||
Node::initSupportedPrimitiveDescriptors();
|
||||
return;
|
||||
}
|
||||
|
||||
auto& creatorsMap = BlockedDescCreator::getCommonCreators();
|
||||
auto pushDesc = [&](LayoutType format) {
|
||||
NodeConfig config;
|
||||
config.inConfs.resize(getParentEdges().size());
|
||||
config.outConfs.resize(getOriginalOutputsNumber());
|
||||
|
||||
for (size_t i = 0; i < getParentEdges().size(); ++i) {
|
||||
config.inConfs[i].setMemDesc(
|
||||
// ACL expected equal precision
|
||||
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getInputShapeAtPort(i)));
|
||||
}
|
||||
config.outConfs[0].setMemDesc(
|
||||
// ACL expected equal precision
|
||||
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getOutputShapeAtPort(0)));
|
||||
|
||||
std::vector<MemoryDescPtr> srcMemoryDescs;
|
||||
for (size_t i = 0; i < config.inConfs.size(); i++) {
|
||||
srcMemoryDescs.push_back(config.inConfs[i].getMemDesc());
|
||||
}
|
||||
std::vector<MemoryDescPtr> dstMemoryDescs;
|
||||
for (size_t i = 0; i < config.outConfs.size(); i++) {
|
||||
dstMemoryDescs.push_back(config.outConfs[i].getMemDesc());
|
||||
}
|
||||
|
||||
auto factory = std::make_shared<DeconvExecutorFactory>(deconvAttrs, srcMemoryDescs, dstMemoryDescs,
|
||||
std::make_shared<ExecutorContext>(context, getImplPriority()));
|
||||
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::acl, factory);
|
||||
};
|
||||
pushDesc(LayoutType::ncsp);
|
||||
}
|
||||
|
||||
|
||||
} // namespace node
|
||||
} // namespace intel_cpu
|
||||
|
@ -11,6 +11,8 @@
|
||||
#include <vector>
|
||||
#include "common/dnnl_executor.h"
|
||||
|
||||
#include "executors/deconv_list.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace node {
|
||||
@ -20,6 +22,7 @@ public:
|
||||
Deconvolution(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
|
||||
const std::vector<MemoryDescPtr>& outputDesc) override;
|
||||
void createPrimitive() override;
|
||||
@ -41,7 +44,7 @@ public:
|
||||
bool canFuse(const NodePtr& node) const override;
|
||||
|
||||
const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
|
||||
const std::vector<ptrdiff_t>& getStride() const { return stride; }
|
||||
const std::vector<ptrdiff_t>& getStride() const { return deconvAttrs.stride; }
|
||||
|
||||
void prepareParams() override;
|
||||
void execute(dnnl::stream strm) override;
|
||||
@ -55,6 +58,7 @@ protected:
|
||||
AttrPtr initPrimitiveAttr() override;
|
||||
AttrPtr makePrimitiveAttr(const VectorDims& dims);
|
||||
std::vector<dnnl::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const override;
|
||||
std::shared_ptr<DeconvExecutor> execPtrDeconv = nullptr;
|
||||
|
||||
private:
|
||||
using executorPtr = std::shared_ptr<DnnlExecutor>;
|
||||
@ -89,16 +93,13 @@ private:
|
||||
size_t groupNum = 1;
|
||||
size_t IC = 0;
|
||||
size_t OC = 0;
|
||||
std::vector<ptrdiff_t> kernel;
|
||||
std::vector<ptrdiff_t> stride;
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
ov::CoordinateDiff paddingL;
|
||||
ov::CoordinateDiff paddingR;
|
||||
ov::CoordinateDiff outputPadding;
|
||||
std::vector<int32_t> lastOutputSpatialDims;
|
||||
VectorDims int8WeightDims;
|
||||
VectorDims expectedBiasDims {};
|
||||
|
||||
bool useACL = false;
|
||||
DeconvAttrs deconvAttrs;
|
||||
|
||||
Shape inShape;
|
||||
|
||||
AttrPtr pAttr;
|
||||
|
248
src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
Normal file
248
src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
Normal file
@ -0,0 +1,248 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "acl_deconv.hpp"
|
||||
#include "ie_parallel.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
using namespace arm_compute;
|
||||
|
||||
ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs) {
|
||||
auto srcDims = srcDescs[0]->getShape().getDims();
|
||||
auto weiDims = srcDescs[1]->getShape().getDims();
|
||||
// swap input and output channels dimensions to be align with ACL
|
||||
// weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
|
||||
std::swap(weiDims[0], weiDims[1]);
|
||||
auto dstDims = dstDescs[0]->getShape().getDims();
|
||||
|
||||
VectorDims biasDims;
|
||||
TensorInfo biasTensorInfo;
|
||||
|
||||
if (deconvAttrs.withBiasesParam) {
|
||||
biasDims = srcDescs[2]->getShape().getStaticDims();
|
||||
biasTensorInfo = TensorInfo(shapeCast(biasDims), 1,
|
||||
precisionToAclDataType(srcDescs[2]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2]));
|
||||
}
|
||||
|
||||
TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1,
|
||||
precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
|
||||
TensorInfo weiTensorInfo = TensorInfo(shapeCast(weiDims), 1,
|
||||
precisionToAclDataType(srcDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[1]));
|
||||
TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1,
|
||||
precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));
|
||||
|
||||
unsigned int pad_l =
|
||||
(deconvAttrs.paddingL.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingL.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
|
||||
unsigned int pad_r =
|
||||
(deconvAttrs.paddingR.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingR.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingR.at(0));
|
||||
unsigned int pad_t = static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
|
||||
unsigned int pad_b = static_cast<unsigned int>(deconvAttrs.paddingR.at(0));
|
||||
unsigned int stride_x = (deconvAttrs.stride.size() > 1) ? deconvAttrs.stride.at(1) : deconvAttrs.stride.at(0);
|
||||
unsigned int stride_y = deconvAttrs.stride.at(0);
|
||||
PadStrideInfo deconv_info(stride_x, stride_y, pad_l, pad_r, pad_t, pad_b, DimensionRoundingType::FLOOR);
|
||||
|
||||
return ACLDeconvTensorInfo{srcTensorInfo, weiTensorInfo, biasTensorInfo, dstTensorInfo, deconv_info};
|
||||
}
|
||||
|
||||
AclDeconvExecutor::AclDeconvExecutor(const ExecutorContext::CPtr context) : DeconvExecutor(context) {}
|
||||
|
||||
bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs,
|
||||
const dnnl::primitive_attr &attr) {
|
||||
this->deconvAttrs = deconvAttrs;
|
||||
ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
|
||||
TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
|
||||
TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo;
|
||||
TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo;
|
||||
TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
|
||||
PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;
|
||||
|
||||
arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
|
||||
&weiTensorInfo,
|
||||
deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
|
||||
&dstTensorInfo,
|
||||
deconv_info);
|
||||
if (!status) {
|
||||
DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
|
||||
return false;
|
||||
}
|
||||
|
||||
srcTensor.allocator()->init(srcTensorInfo);
|
||||
weiTensor.allocator()->init(weiTensorInfo);
|
||||
dstTensor.allocator()->init(dstTensorInfo);
|
||||
if (deconvAttrs.withBiasesParam)
|
||||
biasTensor.allocator()->init(biasTensorInfo);
|
||||
|
||||
deconv = std::make_unique<arm_compute::NEDeconvolutionLayer>();
|
||||
deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info);
|
||||
|
||||
// weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
|
||||
weiBuffer = std::vector<float>(srcDescs[1]->getShape().getStaticDims()[0] *
|
||||
srcDescs[1]->getShape().getStaticDims()[1] *
|
||||
srcDescs[1]->getShape().getStaticDims()[2] *
|
||||
srcDescs[1]->getShape().getStaticDims()[3]);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
|
||||
const auto src_data = reinterpret_cast<float*>(srcMemPtr->getData());
|
||||
|
||||
const int DIM0 = srcMemPtr->getStaticDims()[0];
|
||||
const int DIM1 = srcMemPtr->getStaticDims()[1];
|
||||
const int DIM2 = srcMemPtr->getStaticDims()[2];
|
||||
const int DIM3 = srcMemPtr->getStaticDims()[3];
|
||||
|
||||
parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) {
|
||||
for (int dim3 = 0; dim3 < DIM3; ++dim3) {
|
||||
const int src_off = dim0 * DIM1 * DIM2 * DIM3 +
|
||||
dim1 * DIM2 * DIM3 +
|
||||
dim2 * DIM3 +
|
||||
dim3;
|
||||
const int dst_off = dim1 * DIM0 * DIM2 * DIM3 +
|
||||
dim0 * DIM2 * DIM3 +
|
||||
dim2 * DIM3 +
|
||||
dim3;
|
||||
|
||||
dst_data[dst_off] = src_data[src_off];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) {
|
||||
// TODO: Remove transpose from exec
|
||||
transpose_to_1023(src[1], weiBuffer);
|
||||
|
||||
srcTensor.allocator()->import_memory(src[0]->getData());
|
||||
dstTensor.allocator()->import_memory(dst[0]->getData());
|
||||
weiTensor.allocator()->import_memory(weiBuffer.data());
|
||||
if (deconvAttrs.withBiasesParam)
|
||||
biasTensor.allocator()->import_memory(src[2]->getData());
|
||||
deconv->run();
|
||||
|
||||
srcTensor.allocator()->free();
|
||||
dstTensor.allocator()->free();
|
||||
weiTensor.allocator()->free();
|
||||
if (deconvAttrs.withBiasesParam)
|
||||
biasTensor.allocator()->free();
|
||||
}
|
||||
|
||||
bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs,
|
||||
const std::vector<MemoryDescPtr> &srcDescs,
|
||||
const std::vector<MemoryDescPtr> &dstDescs) {
|
||||
if ((srcDescs[0]->getShape().getDims().size() != 3 && srcDescs[0]->getShape().getDims().size() != 4) ||
|
||||
dstDescs[0]->getShape().getDims().size() != srcDescs[0]->getShape().getDims().size() ||
|
||||
srcDescs[1]->getShape().getDims().size() != 4) {
|
||||
DEBUG_LOG("AclDeconvExecutor does not support dimension:",
|
||||
" src[0]=", srcDescs[0]->getShape().getDims().size(),
|
||||
" src[1]=", srcDescs[1]->getShape().getDims().size(),
|
||||
" dst[0]=", dstDescs[0]->getShape().getDims().size());
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Ticket CVS-114087 - enable FP16 when check FP16 scoup
|
||||
if (!(one_of(srcDescs[0]->getPrecision(), /*InferenceEngine::Precision::FP16, */InferenceEngine::Precision::FP32) &&
|
||||
srcDescs[0]->getPrecision() == srcDescs[1]->getPrecision() &&
|
||||
srcDescs[1]->getPrecision() == dstDescs[0]->getPrecision())) {
|
||||
DEBUG_LOG("AclDeconvExecutor does not support precisions:",
|
||||
" src[0]=", srcDescs[0]->getPrecision(),
|
||||
" src[1]=", srcDescs[1]->getPrecision(),
|
||||
" dst[0]=", dstDescs[0]->getPrecision());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (deconvAttrs.withBiasesParam && srcDescs[2]->getPrecision() != srcDescs[0]->getPrecision()) {
|
||||
DEBUG_LOG("AclDeconvExecutor does not support precisions:",
|
||||
" src[2]=", srcDescs[2]->getPrecision());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) &&
|
||||
srcDescs[1]->hasLayoutType(LayoutType::ncsp) &&
|
||||
dstDescs[0]->hasLayoutType(LayoutType::ncsp)) &&
|
||||
!(srcDescs[0]->hasLayoutType(LayoutType::nspc) &&
|
||||
srcDescs[1]->hasLayoutType(LayoutType::nspc) &&
|
||||
dstDescs[0]->hasLayoutType(LayoutType::nspc))) {
|
||||
DEBUG_LOG("AclDeconvExecutor does not support layouts:",
|
||||
" src[0]=", srcDescs[0]->serializeFormat(),
|
||||
" src[1]=", srcDescs[1]->serializeFormat(),
|
||||
" dst=", dstDescs[0]->serializeFormat());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (deconvAttrs.withBiasesParam &&
|
||||
!(srcDescs[2]->hasLayoutType(LayoutType::ncsp)) &&
|
||||
!(srcDescs[2]->hasLayoutType(LayoutType::nspc))) {
|
||||
DEBUG_LOG("AclDeconvExecutor does not support layouts:",
|
||||
" src[0]=", srcDescs[0]->serializeFormat(),
|
||||
" src[1]=", srcDescs[1]->serializeFormat(),
|
||||
" src[2]=", srcDescs[2]->serializeFormat(),
|
||||
" dst=", dstDescs[0]->serializeFormat());
|
||||
return false;
|
||||
}
|
||||
|
||||
ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
|
||||
TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
|
||||
TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo;
|
||||
TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo;
|
||||
TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
|
||||
PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;
|
||||
|
||||
unsigned int kernel_x = (deconvAttrs.kernel.size() > 1) ? deconvAttrs.kernel.at(1) : deconvAttrs.kernel.at(0);
|
||||
unsigned int kernel_y = deconvAttrs.kernel.at(0);
|
||||
|
||||
// After stride=8 up-sampling in ACL Deconvolution layer slower than reference
|
||||
if (deconv_info.stride().first >= 8 || deconv_info.stride().second >= 8) return false;
|
||||
|
||||
unsigned int dilation_x = (deconvAttrs.dilation.size() > 1) ? deconvAttrs.dilation.at(1) : deconvAttrs.dilation.at(0);
|
||||
unsigned int dilation_y = deconvAttrs.dilation.at(0);
|
||||
if (!one_of(dilation_x, static_cast<unsigned int >(0), static_cast<unsigned int >(1)) ||
|
||||
!one_of(dilation_y, static_cast<unsigned int >(0), static_cast<unsigned int >(1))) return false;
|
||||
|
||||
size_t in_h = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[2] : srcDescs[0]->getShape().getDims()[1];
|
||||
size_t in_w = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[3] : srcDescs[0]->getShape().getDims()[2];
|
||||
|
||||
// Validate function has bug (https://github.com/ARM-software/ComputeLibrary/issues/1061) with error exception.
|
||||
// We copy deconvolution_output_dimensions function for get correct validation
|
||||
// TODO: remove after fix
|
||||
if (validate_deconvolution_output_dimensions(in_w, in_h, kernel_x, kernel_y, deconv_info)) {
|
||||
DEBUG_LOG("NEDeconvolutionLayer arm_compute::deconvolution_output_dimensions failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
|
||||
&weiTensorInfo,
|
||||
deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
|
||||
&dstTensorInfo,
|
||||
deconv_info);
|
||||
if (!status) {
|
||||
DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AclDeconvExecutorBuilder::validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
|
||||
unsigned int kernel_width,
|
||||
unsigned int kernel_height,
|
||||
const PadStrideInfo &pad_stride_info) {
|
||||
const unsigned int pad_left = pad_stride_info.pad_left();
|
||||
const unsigned int pad_top = pad_stride_info.pad_top();
|
||||
const unsigned int pad_right = pad_stride_info.pad_right();
|
||||
const unsigned int pad_bottom = pad_stride_info.pad_bottom();
|
||||
const unsigned int stride_x = pad_stride_info.stride().first;
|
||||
const unsigned int stride_y = pad_stride_info.stride().second;
|
||||
|
||||
if (!((in_width < 1 || in_height < 1) ||
|
||||
(((in_width - 1) * stride_x + kernel_width) < (pad_left + pad_right)) ||
|
||||
(((in_height - 1) * stride_y + kernel_height) < (pad_top + pad_bottom)))) { return false; }
|
||||
return true;
|
||||
}
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
78
src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp
Normal file
78
src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp
Normal file
@ -0,0 +1,78 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "nodes/executors/deconv.hpp"
|
||||
#include "arm_compute/runtime/NEON/NEFunctions.h"
|
||||
#include "utils/debug_capabilities.h"
|
||||
#include "acl_utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
struct ACLDeconvTensorInfo {
|
||||
arm_compute::TensorInfo srcTensorInfo;
|
||||
arm_compute::TensorInfo weiTensorInfo;
|
||||
arm_compute::TensorInfo biasTensorInfo;
|
||||
arm_compute::TensorInfo dstTensorInfo;
|
||||
arm_compute::PadStrideInfo deconv_info;
|
||||
};
|
||||
|
||||
ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs);
|
||||
|
||||
class AclDeconvExecutor : public DeconvExecutor {
|
||||
public:
|
||||
explicit AclDeconvExecutor(const ExecutorContext::CPtr context);
|
||||
bool init(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs,
|
||||
const dnnl::primitive_attr &attr) override;
|
||||
void exec(const std::vector<MemoryCPtr>& src,
|
||||
const std::vector<MemoryPtr>& dst,
|
||||
const void *post_ops_data_) override;
|
||||
|
||||
impl_desc_type getImplType() const override {
|
||||
return implType;
|
||||
}
|
||||
|
||||
private:
|
||||
DeconvAttrs deconvAttrs;
|
||||
impl_desc_type implType = impl_desc_type::acl;
|
||||
|
||||
arm_compute::Tensor srcTensor;
|
||||
arm_compute::Tensor weiTensor;
|
||||
arm_compute::Tensor biasTensor;
|
||||
arm_compute::Tensor dstTensor;
|
||||
std::unique_ptr<arm_compute::NEDeconvolutionLayer> deconv = nullptr;
|
||||
|
||||
std::vector<float> weiBuffer;
|
||||
};
|
||||
|
||||
class AclDeconvExecutorBuilder : public DeconvExecutorBuilder {
|
||||
public:
|
||||
static bool customIsSupported(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs);
|
||||
|
||||
bool isSupported(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs) const override {
|
||||
return customIsSupported(deconvAttrs, srcDescs, dstDescs);
|
||||
}
|
||||
|
||||
DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override {
|
||||
return std::make_shared<AclDeconvExecutor>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
|
||||
unsigned int kernel_width, unsigned int kernel_height,
|
||||
const arm_compute::PadStrideInfo &pad_stride_info);
|
||||
};
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
13
src/plugins/intel_cpu/src/nodes/executors/deconv.cpp
Normal file
13
src/plugins/intel_cpu/src/nodes/executors/deconv.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "deconv.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
60
src/plugins/intel_cpu/src/nodes/executors/deconv.hpp
Normal file
60
src/plugins/intel_cpu/src/nodes/executors/deconv.hpp
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cpu_memory.h"
|
||||
#include "onednn/iml_type_mapper.h"
|
||||
#include "executor.hpp"
|
||||
#include <common/primitive_hashing_utils.hpp>
|
||||
#include "nodes/common/dnnl_executor.h"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
struct DeconvAttrs {
|
||||
std::vector<ptrdiff_t> kernel;
|
||||
std::vector<ptrdiff_t> stride;
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
std::vector<ptrdiff_t> paddingL;
|
||||
std::vector<ptrdiff_t> paddingR;
|
||||
ov::CoordinateDiff outputPadding;
|
||||
bool withBiasesParam = false;
|
||||
};
|
||||
|
||||
class DeconvExecutor {
|
||||
public:
|
||||
explicit DeconvExecutor(const ExecutorContext::CPtr context) : context(context) {}
|
||||
|
||||
virtual bool init(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs,
|
||||
const dnnl::primitive_attr &attr) = 0;
|
||||
|
||||
virtual void exec(const std::vector<MemoryCPtr>& src,
|
||||
const std::vector<MemoryPtr>& dst,
|
||||
const void *post_ops_data_) = 0;
|
||||
virtual ~DeconvExecutor() = default;
|
||||
virtual impl_desc_type getImplType() const = 0;
|
||||
|
||||
protected:
|
||||
DeconvAttrs deconvAttrs;
|
||||
ExecutorContext::CPtr context;
|
||||
};
|
||||
|
||||
using DeconvExecutorPtr = std::shared_ptr<DeconvExecutor>;
|
||||
using DeconvExecutorCPtr = std::shared_ptr<const DeconvExecutor>;
|
||||
|
||||
class DeconvExecutorBuilder {
|
||||
public:
|
||||
~DeconvExecutorBuilder() = default;
|
||||
virtual bool isSupported(const DeconvAttrs& convAttrs, const std::vector<MemoryDescPtr>& srcDescs, const std::vector<MemoryDescPtr>& dstDescs) const = 0;
|
||||
virtual DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const = 0;
|
||||
};
|
||||
|
||||
using DeconvExecutorBuilderPtr = std::shared_ptr<DeconvExecutorBuilder>;
|
||||
using DeconvExecutorBuilderCPtr = std::shared_ptr<const DeconvExecutorBuilder>;
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
19
src/plugins/intel_cpu/src/nodes/executors/deconv_list.cpp
Normal file
19
src/plugins/intel_cpu/src/nodes/executors/deconv_list.cpp
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "deconv_list.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
const std::vector<DeconvExecutorDesc>& getDeconvExecutorsList() {
|
||||
static std::vector<DeconvExecutorDesc> descs = {
|
||||
OV_CPU_INSTANCE_ACL(ExecutorType::Acl, std::make_shared<AclDeconvExecutorBuilder>())
|
||||
};
|
||||
|
||||
return descs;
|
||||
}
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
79
src/plugins/intel_cpu/src/nodes/executors/deconv_list.hpp
Normal file
79
src/plugins/intel_cpu/src/nodes/executors/deconv_list.hpp
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "executor.hpp"
|
||||
|
||||
#include "deconv.hpp"
|
||||
#if defined(OV_CPU_WITH_ACL)
|
||||
#include "acl/acl_deconv.hpp"
|
||||
#endif
|
||||
|
||||
#include "onednn/iml_type_mapper.h"
|
||||
#include "common/primitive_cache.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
struct DeconvExecutorDesc {
|
||||
ExecutorType executorType;
|
||||
DeconvExecutorBuilderCPtr builder;
|
||||
};
|
||||
|
||||
const std::vector<DeconvExecutorDesc>& getDeconvExecutorsList();
|
||||
|
||||
class DeconvExecutorFactory : public ExecutorFactory {
|
||||
public:
|
||||
DeconvExecutorFactory(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs,
|
||||
const ExecutorContext::CPtr context) : ExecutorFactory(context) {
|
||||
for (auto& desc : getDeconvExecutorsList()) {
|
||||
if (desc.builder->isSupported(deconvAttrs, srcDescs, dstDescs)) {
|
||||
supportedDescs.push_back(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~DeconvExecutorFactory() = default;
|
||||
virtual DeconvExecutorPtr makeExecutor(const DeconvAttrs& deconvAttrs,
|
||||
const std::vector<MemoryDescPtr>& srcDescs,
|
||||
const std::vector<MemoryDescPtr>& dstDescs,
|
||||
const dnnl::primitive_attr &attr) {
|
||||
auto build = [&](const DeconvExecutorDesc* desc) {
|
||||
auto executor = desc->builder->makeExecutor(context);
|
||||
if (executor->init(deconvAttrs, srcDescs, dstDescs, attr)) {
|
||||
return executor;
|
||||
}
|
||||
DeconvExecutorPtr ptr = nullptr;
|
||||
return ptr;
|
||||
};
|
||||
|
||||
if (chosenDesc) {
|
||||
if (auto executor = build(chosenDesc)) {
|
||||
return executor;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& sd : supportedDescs) {
|
||||
if (auto executor = build(&sd)) {
|
||||
chosenDesc = &sd;
|
||||
return executor;
|
||||
}
|
||||
}
|
||||
|
||||
IE_THROW() << "DeconvExecutorFactory: Supported executor is not found";
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<DeconvExecutorDesc> supportedDescs;
|
||||
const DeconvExecutorDesc* chosenDesc = nullptr;
|
||||
};
|
||||
|
||||
using DeconvExecutorFactoryPtr = std::shared_ptr<DeconvExecutorFactory>;
|
||||
using DeconvExecutorFactoryCPtr = std::shared_ptr<const DeconvExecutorFactory>;
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
Loading…
Reference in New Issue
Block a user