[CPU] Default enable avx512 f32 brgconv (#12620)
This commit is contained in:
@@ -279,11 +279,6 @@ Convolution::Convolution(const std::shared_ptr<ngraph::Node>& op, const dnnl::en
|
||||
paddingR = groupConvolutionOp->get_pads_end();
|
||||
autoPadding = one_of(groupConvolutionOp->get_auto_pad(), ov::op::PadType::SAME_UPPER, ov::op::PadType::SAME_LOWER);
|
||||
}
|
||||
|
||||
// Due to performance issue, brgconv will only be enabled by default:
|
||||
// 1, support amx
|
||||
// 2, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948)
|
||||
shouldTryBrgconv = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && !isDynamicNode();
|
||||
}
|
||||
|
||||
bool Convolution::canBeExecutedInInt8() const {
|
||||
@@ -375,6 +370,8 @@ void Convolution::getSupportedDescriptors() {
|
||||
|
||||
withBiases = getOriginalInputsNumber() == 3;
|
||||
|
||||
initTryBrgconvFlag();
|
||||
|
||||
if (!implPriorities.empty()) {
|
||||
isPrimitivesPriorityDefined = true;
|
||||
// winograd support only constant weights and bias
|
||||
@@ -383,7 +380,7 @@ void Convolution::getSupportedDescriptors() {
|
||||
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Type::Input &&
|
||||
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Type::Input) : true);
|
||||
|
||||
// AVX512 brconv is disabled by default due to performance issues. User can force it via Primitives priority mechanism.
|
||||
// AVX512 brconv may be disabled by heuristics due to performance issues. User can force it via Primitives priority mechanism.
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
|
||||
std::for_each(implPriorities.begin(), implPriorities.end(), [&](const impl_desc_type& desc_type) {
|
||||
if (desc_type & impl_desc_type::brgconv_avx512) {
|
||||
@@ -715,13 +712,12 @@ void Convolution::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
// attr[0] - depthwise, quantize
|
||||
// attr[1] - binary
|
||||
dnnl::primitive_attr attrs[2];
|
||||
pInitAttrs[0] = std::make_shared<dnnl::primitive_attr>();
|
||||
auto attrsNum = shouldTryBrgconv ? 2 : 1;
|
||||
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), true);
|
||||
if (shouldTryBrgconv) {
|
||||
setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
|
||||
setPostOps(*pInitAttrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), true);
|
||||
if (shouldTryBrgconv && !pInitAttrs[1]) {
|
||||
pInitAttrs[1] = std::make_shared<dnnl::primitive_attr>();
|
||||
setPostOps(*pInitAttrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
|
||||
}
|
||||
|
||||
bool containJitImpl = false;
|
||||
@@ -730,7 +726,7 @@ void Convolution::initSupportedPrimitiveDescriptors() {
|
||||
if (containJitImpl && isPossibleToSkipInitConfig(desc))
|
||||
continue;
|
||||
for (int i = 0; i < attrsNum; i++) {
|
||||
auto &attr = attrs[i];
|
||||
auto &attr = *pInitAttrs[i];
|
||||
addZeroPoints(attr);
|
||||
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
|
||||
while (static_cast<bool>(itpd)) {
|
||||
@@ -942,14 +938,7 @@ void Convolution::initDescriptor(const NodeConfig& config) {
|
||||
if (isStridedBlobsSupported) {
|
||||
createDescriptor({config.inConfs[0].getMemDesc()}, {config.outConfs[0].getMemDesc()});
|
||||
}
|
||||
// attr[0] - depthwise, quantize
|
||||
// attr[1] - binary
|
||||
dnnl::primitive_attr attrs[2];
|
||||
auto attrsNum = shouldTryBrgconv ? 2 : 1;
|
||||
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), true);
|
||||
if (shouldTryBrgconv) {
|
||||
setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
|
||||
}
|
||||
|
||||
auto rightConfig = selectedPD->getConfig();
|
||||
size_t selected_count = 0;
|
||||
@@ -961,7 +950,7 @@ void Convolution::initDescriptor(const NodeConfig& config) {
|
||||
if (containJitImpl && isPossibleToSkipInitConfig(desc))
|
||||
continue;
|
||||
for (int n = 0; n < attrsNum; n++) {
|
||||
auto &attr = attrs[n];
|
||||
auto &attr = *pInitAttrs[n];
|
||||
addZeroPoints(attr);
|
||||
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
|
||||
while (static_cast<bool>(itpd)) {
|
||||
@@ -1554,6 +1543,33 @@ void Convolution::appendZeroPointsArgs() {
|
||||
}
|
||||
}
|
||||
|
||||
void Convolution::initTryBrgconvFlag() {
|
||||
// Due to performance issue, brgconv will only be enabled by default:
|
||||
// 1, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948)
|
||||
// 2, support amx
|
||||
// 3, support avx512 except int8 with binary postops
|
||||
if (!isDynamicNode()) {
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
|
||||
shouldTryBrgconv = true;
|
||||
} else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
|
||||
shouldTryBrgconv = true;
|
||||
// should remove after binary postops performance issue resolved
|
||||
// heuristics: if model has binary post ops we will not use brgconv
|
||||
dnnl::primitive_attr attrs;
|
||||
setPostOps(attrs, MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false);
|
||||
const auto& ops = attrs.get_post_ops();
|
||||
for (int i = 0; i < ops.len(); i++) {
|
||||
if (ops.kind(i) == dnnl::primitive::kind::binary) {
|
||||
shouldTryBrgconv = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (shouldTryBrgconv)
|
||||
pInitAttrs[1] = std::make_shared<dnnl::primitive_attr>(std::move(attrs));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace node
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
|
||||
@@ -102,6 +102,7 @@ private:
|
||||
MemoryPtr getOutputMemory() const;
|
||||
|
||||
void appendZeroPointsArgs();
|
||||
void initTryBrgconvFlag();
|
||||
|
||||
bool withBiases;
|
||||
bool withSum;
|
||||
@@ -136,8 +137,9 @@ private:
|
||||
const size_t Y_AXIS = 1;
|
||||
|
||||
bool isWino = false;
|
||||
// if we have amx support and shape is static or user specified we will try brgconv
|
||||
bool shouldTryBrgconv = false;
|
||||
// cache attr for later usage. [0] - depthwise, quantize, [1] - binary
|
||||
AttrPtr pInitAttrs[2];
|
||||
AttrPtr pAttr;
|
||||
bool autoPadding = false;
|
||||
FusedSubgraphPtr subgraph;
|
||||
|
||||
Reference in New Issue
Block a user