[CPU] Default enable brgconv AVX512 (#12406)

This commit is contained in:
Luo Cheng 2022-08-11 13:19:16 +08:00 committed by GitHub
parent 3e648f2788
commit 2c06c36366
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 22 deletions

View File

@ -279,11 +279,6 @@ Convolution::Convolution(const std::shared_ptr<ngraph::Node>& op, const dnnl::en
paddingR = groupConvolutionOp->get_pads_end(); paddingR = groupConvolutionOp->get_pads_end();
autoPadding = one_of(groupConvolutionOp->get_auto_pad(), ov::op::PadType::SAME_UPPER, ov::op::PadType::SAME_LOWER); autoPadding = one_of(groupConvolutionOp->get_auto_pad(), ov::op::PadType::SAME_UPPER, ov::op::PadType::SAME_LOWER);
} }
// Due to performance issue, brgconv will only be enabled by default:
// 1, support amx
// 2, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948)
shouldTryBrgconv = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && !isDynamicNode();
} }
bool Convolution::canBeExecutedInInt8() const { bool Convolution::canBeExecutedInInt8() const {
@ -375,6 +370,8 @@ void Convolution::getSupportedDescriptors() {
withBiases = getOriginalInputsNumber() == 3; withBiases = getOriginalInputsNumber() == 3;
initTryBrgconvFlag();
if (!implPriorities.empty()) { if (!implPriorities.empty()) {
isPrimitivesPriorityDefined = true; isPrimitivesPriorityDefined = true;
// winograd support only constant weights and bias // winograd support only constant weights and bias
@ -383,7 +380,7 @@ void Convolution::getSupportedDescriptors() {
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Type::Input && getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Type::Input &&
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Type::Input) : true); (withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Type::Input) : true);
// AVX512 brconv is disabled by default due to performance issues. User can force it via Primitives priority mechanism. // AVX512 brconv may be disabled by heuristics due to performance issues. User can force it via Primitives priority mechanism.
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) { if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
std::for_each(implPriorities.begin(), implPriorities.end(), [&](const impl_desc_type& desc_type) { std::for_each(implPriorities.begin(), implPriorities.end(), [&](const impl_desc_type& desc_type) {
if (desc_type & impl_desc_type::brgconv_avx512) { if (desc_type & impl_desc_type::brgconv_avx512) {
@ -715,13 +712,12 @@ void Convolution::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty()) if (!supportedPrimitiveDescriptors.empty())
return; return;
// attr[0] - depthwise, quantize pInitAttrs[0] = std::make_shared<dnnl::primitive_attr>();
// attr[1] - binary
dnnl::primitive_attr attrs[2];
auto attrsNum = shouldTryBrgconv ? 2 : 1; auto attrsNum = shouldTryBrgconv ? 2 : 1;
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), true); setPostOps(*pInitAttrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), true);
if (shouldTryBrgconv) { if (shouldTryBrgconv && !pInitAttrs[1]) {
setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false); pInitAttrs[1] = std::make_shared<dnnl::primitive_attr>();
setPostOps(*pInitAttrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
} }
bool containJitImpl = false; bool containJitImpl = false;
@ -730,7 +726,7 @@ void Convolution::initSupportedPrimitiveDescriptors() {
if (containJitImpl && isPossibleToSkipInitConfig(desc)) if (containJitImpl && isPossibleToSkipInitConfig(desc))
continue; continue;
for (int i = 0; i < attrsNum; i++) { for (int i = 0; i < attrsNum; i++) {
auto &attr = attrs[i]; auto &attr = *pInitAttrs[i];
addZeroPoints(attr); addZeroPoints(attr);
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
while (static_cast<bool>(itpd)) { while (static_cast<bool>(itpd)) {
@ -942,14 +938,7 @@ void Convolution::initDescriptor(const NodeConfig& config) {
if (isStridedBlobsSupported) { if (isStridedBlobsSupported) {
createDescriptor({config.inConfs[0].getMemDesc()}, {config.outConfs[0].getMemDesc()}); createDescriptor({config.inConfs[0].getMemDesc()}, {config.outConfs[0].getMemDesc()});
} }
// attr[0] - depthwise, quantize
// attr[1] - binary
dnnl::primitive_attr attrs[2];
auto attrsNum = shouldTryBrgconv ? 2 : 1; auto attrsNum = shouldTryBrgconv ? 2 : 1;
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), true);
if (shouldTryBrgconv) {
setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
}
auto rightConfig = selectedPD->getConfig(); auto rightConfig = selectedPD->getConfig();
size_t selected_count = 0; size_t selected_count = 0;
@ -961,7 +950,7 @@ void Convolution::initDescriptor(const NodeConfig& config) {
if (containJitImpl && isPossibleToSkipInitConfig(desc)) if (containJitImpl && isPossibleToSkipInitConfig(desc))
continue; continue;
for (int n = 0; n < attrsNum; n++) { for (int n = 0; n < attrsNum; n++) {
auto &attr = attrs[n]; auto &attr = *pInitAttrs[n];
addZeroPoints(attr); addZeroPoints(attr);
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
while (static_cast<bool>(itpd)) { while (static_cast<bool>(itpd)) {
@ -1554,6 +1543,35 @@ void Convolution::appendZeroPointsArgs() {
} }
} }
void Convolution::initTryBrgconvFlag() {
// Due to performance issue, brgconv will only be enabled by default:
// 1, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948)
// 2, support amx
// 3, int8 without binary postops when avx512
if (!isDynamicNode()) {
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
shouldTryBrgconv = true;
} else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
// should remove after binary postops performance issue resolved
// heuristics: if it's int8 model and it has binary post ops we will not use brgconv
if (canBeExecutedInInt8()) {
shouldTryBrgconv = true;
dnnl::primitive_attr attrs;
setPostOps(attrs, MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
const auto& ops = attrs.get_post_ops();
for (int i = 0; i < ops.len(); i++) {
if (ops.kind(i) == dnnl::primitive::kind::binary) {
shouldTryBrgconv = false;
break;
}
}
if (shouldTryBrgconv)
pInitAttrs[1] = std::make_shared<dnnl::primitive_attr>(std::move(attrs));
}
}
}
}
} // namespace node } // namespace node
} // namespace intel_cpu } // namespace intel_cpu
} // namespace ov } // namespace ov

View File

@ -102,6 +102,7 @@ private:
MemoryPtr getOutputMemory() const; MemoryPtr getOutputMemory() const;
void appendZeroPointsArgs(); void appendZeroPointsArgs();
void initTryBrgconvFlag();
bool withBiases; bool withBiases;
bool withSum; bool withSum;
@ -136,8 +137,9 @@ private:
const size_t Y_AXIS = 1; const size_t Y_AXIS = 1;
bool isWino = false; bool isWino = false;
// if we have amx support and shape is static or user specified we will try brgconv
bool shouldTryBrgconv = false; bool shouldTryBrgconv = false;
// cache attr for later usage. [0] - depthwise, quantize, [1] - binary
AttrPtr pInitAttrs[2];
AttrPtr pAttr; AttrPtr pAttr;
bool autoPadding = false; bool autoPadding = false;
FusedSubgraphPtr subgraph; FusedSubgraphPtr subgraph;