[CPU] returned old behavior for fp32 avx2 1x1 conv with dw conv fusing (#4484)

This commit is contained in:
Anton Voronov 2021-03-29 13:55:13 +03:00 committed by GitHub
parent 1a6b823e5a
commit 4d112b1f90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 29 deletions

View File

@ -846,7 +846,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
bool isSupportedParams = layer->_group == 1 &&
is1x1Convolution(layer) && // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions
everyone_is(1, layer->_stride[X_AXIS], layer->_stride[Y_AXIS]) &&
one_of(layer->outData[0].get()->getPrecision(), Precision::FP32, Precision::U8) &&
one_of(layer->outData[0].get()->getPrecision(), Precision::FP32) &&
node->getChildEdgeAt(0)->getDims().ndims() == 4;
if (!isSupportedParams) return false;
@ -886,6 +886,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
if (!childConvolutionNode->inputZeroPoints.empty() || !childConvolutionNode->weightsZeroPoints.empty())
return false;
bool withBias = (childLayer->_biases != nullptr && childLayer->_biases->size() != 0) ||
childConvolutionNode->getBaseIntputsNumber() == 3;
auto allPads = getPaddings(*childLayer);
bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
@ -895,13 +898,36 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
everyone_is(1, allPads.end[X_AXIS], allPads.end[Y_AXIS]) &&
everyone_is(1, childLayer->_dilation[X_AXIS], childLayer->_dilation[Y_AXIS]) &&
childLayer->_stride[X_AXIS] == childLayer->_stride[Y_AXIS] &&
false && // TODO [oneDNN]: disabled while not ported
one_of(childLayer->_stride[X_AXIS], 1 /*, 2*/) && // TODO [oneDNN]: stride 2 should also be supported
withBias &&
one_of(childLayer->_stride[X_AXIS], 1, 2) &&
childNode->getChildEdgeAt(0)->getDims().ndims() == 4;
return isSupportedParams;
};
auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
auto layer = std::dynamic_pointer_cast<ConvolutionLayer>(childNode->getCnnLayer());
if (layer == nullptr)
IE_THROW() << "Cannot get convolution layer " << childNode->getName();
auto inDims = childNode->inDims[0];
auto outDims = childNode->outDims[0];
int elemSize = layer->precision.size();
int L3_cache_size = utils::get_cache_size(3, false);
int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
auto parentConvolutionNode = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(parentNode);
if (parentConvolutionNode == nullptr)
IE_THROW() << "Cannot get convolution node " << parentNode->getName();
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common))
return false;
return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
};
for (int i = 0; i < graphNodes.size(); i++) {
if (!isConvolutionNode(graphNodes[i])) continue;
@ -911,6 +937,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
parentConvNode->fuseWith(childConvNode);
for (auto node : childConvNode->getFusedWith())

View File

@ -420,31 +420,31 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::format_tag::x, biases->buffer(),
dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
// rewrite onto append_dw_k3s2p1
// ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
// dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
// mkldnn::memory::convert_to_c(dw_conv_in_dt),
// (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
// (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
// todo: rewrite onto append_dw_k3s2p1
ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
mkldnn::memory::convert_to_c(dw_conv_in_dt),
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
blob_idx += 2;
} else {
// rewrite onto append_dw_k3s2p1
// ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
// dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
// mkldnn::memory::convert_to_c(dw_conv_in_dt),
// static_cast<float *>(getParentEdgeAt(
// baseInputsNumber + 0)->getMemory().GetData()),
// static_cast<float *>(getParentEdgeAt(
// baseInputsNumber + 1)->getMemory().GetData()));
// todo: rewrite onto append_dw_k3s2p1
ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
mkldnn::memory::convert_to_c(dw_conv_in_dt),
static_cast<const float *>(getParentEdgeAt(
baseInputsNumber + 0)->getMemory().GetData()),
static_cast<const float *>(getParentEdgeAt(
baseInputsNumber + 1)->getMemory().GetData()));
}
} else {
// rewrite onto append_dw_k3s2p1
// ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
// dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
// mkldnn::memory::convert_to_c(dw_conv_in_dt),
// nullptr,
// nullptr);
// todo: rewrite onto append_dw_k3s2p1
ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
mkldnn::memory::convert_to_c(dw_conv_in_dt),
nullptr,
nullptr);
}
if (convolutionNode->wScale != nullptr) {
@ -482,14 +482,11 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift,
(const float *)PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *)PostOpsIntBlobMemory[blob_idx + 1]->GetData());
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
blob_idx += 2;
}
IE_THROW() << "append_dw_conv is not ported";
continue;
}

@ -1 +1 @@
Subproject commit b5497010eb48fed033d91d4499c50d797452be74
Subproject commit fdf537051e8d30adcf56f0a56afa3cc3abddc7a4