[CPU] returned old behavior for fp32 avx2 1x1 conv with dw conv fusing (#4484)
This commit is contained in:
parent
1a6b823e5a
commit
4d112b1f90
@ -846,7 +846,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
|
||||
bool isSupportedParams = layer->_group == 1 &&
|
||||
is1x1Convolution(layer) && // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions
|
||||
everyone_is(1, layer->_stride[X_AXIS], layer->_stride[Y_AXIS]) &&
|
||||
one_of(layer->outData[0].get()->getPrecision(), Precision::FP32, Precision::U8) &&
|
||||
one_of(layer->outData[0].get()->getPrecision(), Precision::FP32) &&
|
||||
node->getChildEdgeAt(0)->getDims().ndims() == 4;
|
||||
if (!isSupportedParams) return false;
|
||||
|
||||
@ -886,6 +886,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
|
||||
if (!childConvolutionNode->inputZeroPoints.empty() || !childConvolutionNode->weightsZeroPoints.empty())
|
||||
return false;
|
||||
|
||||
bool withBias = (childLayer->_biases != nullptr && childLayer->_biases->size() != 0) ||
|
||||
childConvolutionNode->getBaseIntputsNumber() == 3;
|
||||
|
||||
auto allPads = getPaddings(*childLayer);
|
||||
|
||||
bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
|
||||
@ -895,13 +898,36 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
|
||||
everyone_is(1, allPads.end[X_AXIS], allPads.end[Y_AXIS]) &&
|
||||
everyone_is(1, childLayer->_dilation[X_AXIS], childLayer->_dilation[Y_AXIS]) &&
|
||||
childLayer->_stride[X_AXIS] == childLayer->_stride[Y_AXIS] &&
|
||||
false && // TODO [oneDNN]: disabled while not ported
|
||||
one_of(childLayer->_stride[X_AXIS], 1 /*, 2*/) && // TODO [oneDNN]: stride 2 should also be supported
|
||||
withBias &&
|
||||
one_of(childLayer->_stride[X_AXIS], 1, 2) &&
|
||||
childNode->getChildEdgeAt(0)->getDims().ndims() == 4;
|
||||
|
||||
return isSupportedParams;
|
||||
};
|
||||
|
||||
auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
|
||||
auto layer = std::dynamic_pointer_cast<ConvolutionLayer>(childNode->getCnnLayer());
|
||||
if (layer == nullptr)
|
||||
IE_THROW() << "Cannot get convolution layer " << childNode->getName();
|
||||
|
||||
auto inDims = childNode->inDims[0];
|
||||
auto outDims = childNode->outDims[0];
|
||||
int elemSize = layer->precision.size();
|
||||
|
||||
int L3_cache_size = utils::get_cache_size(3, false);
|
||||
int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
|
||||
int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
|
||||
|
||||
auto parentConvolutionNode = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(parentNode);
|
||||
if (parentConvolutionNode == nullptr)
|
||||
IE_THROW() << "Cannot get convolution node " << parentNode->getName();
|
||||
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common))
|
||||
return false;
|
||||
|
||||
return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
|
||||
};
|
||||
|
||||
for (int i = 0; i < graphNodes.size(); i++) {
|
||||
if (!isConvolutionNode(graphNodes[i])) continue;
|
||||
|
||||
@ -911,6 +937,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
|
||||
auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
|
||||
if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
|
||||
|
||||
if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
|
||||
|
||||
parentConvNode->fuseWith(childConvNode);
|
||||
|
||||
for (auto node : childConvNode->getFusedWith())
|
||||
|
@ -420,31 +420,31 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::format_tag::x, biases->buffer(),
|
||||
dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
|
||||
// rewrite onto append_dw_k3s2p1
|
||||
// ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
|
||||
// dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
|
||||
// mkldnn::memory::convert_to_c(dw_conv_in_dt),
|
||||
// (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
// (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
// todo: rewrite onto append_dw_k3s2p1
|
||||
ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
|
||||
dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
|
||||
mkldnn::memory::convert_to_c(dw_conv_in_dt),
|
||||
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
|
||||
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
|
||||
|
||||
blob_idx += 2;
|
||||
} else {
|
||||
// rewrite onto append_dw_k3s2p1
|
||||
// ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
|
||||
// dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
|
||||
// mkldnn::memory::convert_to_c(dw_conv_in_dt),
|
||||
// static_cast<float *>(getParentEdgeAt(
|
||||
// baseInputsNumber + 0)->getMemory().GetData()),
|
||||
// static_cast<float *>(getParentEdgeAt(
|
||||
// baseInputsNumber + 1)->getMemory().GetData()));
|
||||
// todo: rewrite onto append_dw_k3s2p1
|
||||
ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
|
||||
dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
|
||||
mkldnn::memory::convert_to_c(dw_conv_in_dt),
|
||||
static_cast<const float *>(getParentEdgeAt(
|
||||
baseInputsNumber + 0)->getMemory().GetData()),
|
||||
static_cast<const float *>(getParentEdgeAt(
|
||||
baseInputsNumber + 1)->getMemory().GetData()));
|
||||
}
|
||||
} else {
|
||||
// rewrite onto append_dw_k3s2p1
|
||||
// ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
|
||||
// dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
|
||||
// mkldnn::memory::convert_to_c(dw_conv_in_dt),
|
||||
// nullptr,
|
||||
// nullptr);
|
||||
// todo: rewrite onto append_dw_k3s2p1
|
||||
ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
|
||||
dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
|
||||
mkldnn::memory::convert_to_c(dw_conv_in_dt),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
if (convolutionNode->wScale != nullptr) {
|
||||
@ -482,14 +482,11 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift,
|
||||
(const float *)PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *)PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
|
||||
static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
|
||||
|
||||
blob_idx += 2;
|
||||
}
|
||||
|
||||
IE_THROW() << "append_dw_conv is not ported";
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
2
inference-engine/thirdparty/mkl-dnn
vendored
2
inference-engine/thirdparty/mkl-dnn
vendored
@ -1 +1 @@
|
||||
Subproject commit b5497010eb48fed033d91d4499c50d797452be74
|
||||
Subproject commit fdf537051e8d30adcf56f0a56afa3cc3abddc7a4
|
Loading…
Reference in New Issue
Block a user