[CPU] Get rid of shouldTryBrgconv flag (#17539)

No code logic changed overall
This commit is contained in:
Egor Duplenskii
2023-05-17 08:17:36 +02:00
committed by GitHub
parent 2cc3a45959
commit 84b9262aea
2 changed files with 14 additions and 44 deletions

View File

@@ -363,16 +363,6 @@ const std::vector<impl_desc_type>& Convolution::getPrimitivesPriority() {
impl_desc_type::ref,
};
if (!shouldTryBrgconv) {
// remove brgconv_avx512_amx_1x1/brgconv_avx512_amx/brgconv_avx512/brgconv_avx512_1x1
for (auto it = priorities.begin(); it != priorities.end(); ) {
if (((*it) & brgconv_avx512) == brgconv_avx512)
it = priorities.erase(it);
else
++it;
}
}
for (const auto& impl : priorities) {
if (std::find(implPriorities.begin(), implPriorities.end(), impl) == implPriorities.end())
implPriorities.push_back(impl);
@@ -380,12 +370,14 @@ const std::vector<impl_desc_type>& Convolution::getPrimitivesPriority() {
return implPriorities;
}
const bool Convolution::isBrgConvAvailable = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
void Convolution::getSupportedDescriptors() {
if (!descs.empty())
return;
if (!attrs.empty())
IE_THROW() << "attrs vector is not empty '" << getName() << "'";
bool enforceBrgconv = false;
attrs.reserve(2);
withBiases = getOriginalInputsNumber() == 3;
@@ -396,15 +388,6 @@ void Convolution::getSupportedDescriptors() {
dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) && !canBeExecutedInInt8() &&
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Type::Input &&
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Type::Input) : true);
// AVX512 brconv may be disabled by heuristics due to performance issues. User can force it via Primitives priority mechanism.
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) &&
std::any_of(implPriorities.begin(), implPriorities.end(), [](const impl_desc_type& desc_type) {
return static_cast<bool>(desc_type & impl_desc_type::brgconv_avx512);
})) {
shouldTryBrgconv = true;
enforceBrgconv = true;
}
}
int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
@@ -509,9 +492,7 @@ void Convolution::getSupportedDescriptors() {
if (canBeExecutedInInt8()) {
DEBUG_LOG(getName(), "Creating I8 descriptor");
// initTryBrgconvFlag depends on outputDataType, should be after outputDataType computed
if (!enforceBrgconv)
initTryBrgconvFlag();
SetPostOpsAndZeroPoints(attrs);
in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType, nspc);
@@ -548,9 +529,7 @@ void Convolution::getSupportedDescriptors() {
outputDataType = memory::data_type::f32;
eltwisePrecision = Precision::FP32;
}
// initTryBrgconvFlag depends on outputDataType and eltwisePrecision.
if (!enforceBrgconv)
initTryBrgconvFlag();
SetPostOpsAndZeroPoints(attrs);
if (!one_of(ndims, 3, 4, 5))
@@ -560,10 +539,10 @@ void Convolution::getSupportedDescriptors() {
auto outputShape = getOutputShapeAtPort(0);
#if defined(OPENVINO_ARCH_X86_64)
bool acceptedFormat = inputDataType == memory::data_type::bf16;
// nspc shows better performance only with brgconv implementation
bool nspcFirst = isBrgConvAvailable && one_of(inputDataType, memory::data_type::bf16, memory::data_type::f32);
bool nspcAdded = false;
acceptedFormat |= (shouldTryBrgconv && inputDataType == memory::data_type::f32);
if (acceptedFormat && impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) {
if (nspcFirst) {
in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(inputShape, inputDataType, nspc);
out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(outputShape, outputDataType, nspc);
createDescriptor({ in_candidate }, { out_candidate });
@@ -986,7 +965,8 @@ void Convolution::SetPostOpsAndZeroPoints(std::vector<dnnl::primitive_attr> &att
if (attrContainsPostOp(attrs[0], dnnl::impl::primitive_kind::convolution)) {
return;
}
//no matter whether shouldTryBrgconv is true, 1 attribute is enough. Avoid duplicated attribute
// no matter if brgconv is available, 1 attribute is enough. Avoid duplicated attribute
if (inputZeroPointType == zpType::None &&
!attrContainsPostOp(attrs[0], dnnl::impl::primitive_kind::depthwise) &&
!attrContainsPostOp(attrs[0], dnnl::impl::primitive_kind::quantization)) {
@@ -997,11 +977,11 @@ void Convolution::SetPostOpsAndZeroPoints(std::vector<dnnl::primitive_attr> &att
DEBUG_LOG(getName(), ": Per channel zero point can only supported on attr[0].Avoid extra useless attribute.");
return;
}
if (!shouldTryBrgconv) {
DEBUG_LOG(getName(), ": shouldTryBrgconv = false. Skip extra attribute");
if (!isBrgConvAvailable) {
DEBUG_LOG(getName(), ": brgconv is not available. Skip extra attribute");
return;
}
// Try 2 attributes. Consider the shouldTRyBrgconv could be set via RTinfo to enforce brgconv.
// Try 2 attributes.
attrs.resize(2);
if (inputZeroPointType == zpType::PerTensor && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
//WR to ONEDNN limitation. attr[1] - legacy post ops + stock zero point.
@@ -1651,15 +1631,6 @@ void Convolution::appendZeroPointsArgs() {
}
}
// brgconv will be enabled by default when HW supports avx512+
void Convolution::initTryBrgconvFlag() {
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
shouldTryBrgconv = true;
}
DEBUG_LOG(getName(), ": shouldTryBrgconv = ", shouldTryBrgconv);
}
void Convolution::initializeInputZeroPoints(const uint8_t* inputZpData, const size_t inputZpSize) {
if (!inputZeroPoints.empty() || !legacyInputZeroPoints.empty())
IE_THROW() << "input zero point is not empty '" << getName() << "'";

View File

@@ -116,7 +116,6 @@ private:
VectorDims outputStaticShape() const;
void appendLegacyZeroPointsArgs();
void appendZeroPointsArgs();
void initTryBrgconvFlag();
bool withBiases;
bool withSum;
@@ -156,7 +155,7 @@ private:
const size_t Y_AXIS = 1;
bool isWino = false;
bool shouldTryBrgconv = false;
static const bool isBrgConvAvailable;
std::vector<dnnl::primitive_attr> attrs;
AttrPtr pAttr;
bool autoPadding = false;