[CPU] Winograd convolution support (#5699)

This commit is contained in:
Maxim Andronov 2021-05-26 09:19:11 +03:00 committed by GitHub
parent 97a9a76ff9
commit 06fb16d799
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 154 additions and 14 deletions

View File

@ -66,6 +66,8 @@ public:
int getInputNum();
int getOutputNum();
void setChildPort(const size_t port) { child_port = port; }
void sharedMemFrom(const MKLDNNEdgePtr& edge);
MKLDNNEdgePtr getSharedEdge() const;
MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;

View File

@ -16,6 +16,8 @@
#include <mkldnn_extension_utils.h>
#include <utils/general_utils.h>
#include <ngraph/ops.hpp>
#include <cpu/x64/jit_generator.hpp>
#include "common/cpu_convert.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -48,8 +50,6 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
IE_THROW(NotImplemented) << errorMessage;
}
isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0;
auto convolutionOp = ngraph::as_type_ptr<ngraph::op::v1::Convolution>(op);
auto groupConvolutionOp = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolution>(op);
@ -133,6 +133,26 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
withBiases = getOriginalInputsNumber() == 3;
if (!implPriorities.empty()) {
isPrimitivesPriorityDefined = true;
// winograd support only constant weights and bias
isWino = std::find(implPriorities.begin(), implPriorities.end(), impl_desc_type::jit_avx512_winograd) != implPriorities.end() &&
mkldnn::impl::cpu::x64::mayiuse(mkldnn::impl::cpu::x64::avx512_common) && !canBeExecutedInInt8() &&
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Input &&
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true);
}
if (isWinograd()) {
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
});
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
if (!withBiases)
return MKLDNNMemoryDesc();
return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1));
});
}
withSum = false;
int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
for (int i = 0; i < fusedWith.size(); i++) {
@ -149,6 +169,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
}
}
// we can't convert winograd memory descriptor to TensorDesc, so we removed weight and bias edges and put data into internalBlobs
if (isWinograd()) {
std::vector<MKLDNNEdgePtr> edgesToRemove;
internalBlobs.push_back(createInternalBlob(weightDims, 1, isGrouped));
edgesToRemove.push_back(getParentEdgeAt(1));
if (withBiases) {
internalBlobs.push_back(createInternalBlob(biasesDims, 2));
edgesToRemove.push_back(getParentEdgeAt(2));
}
if (expectedInputEdgesNum - getOriginalInputsNumber() > 0) {
size_t reconnectPort = 1;
for (size_t startPort = 2 + (withBiases ? 1 : 0); startPort < expectedInputEdgesNum; startPort++) {
getParentEdgeAt(startPort)->setChildPort(reconnectPort);
reconnectPort++;
}
}
for (size_t i = 0; i < edgesToRemove.size(); i++) {
removeEdge(edgesToRemove[i]);
}
expectedInputEdgesNum -= getOriginalInputsNumber() - 1;
if (withBiases) {
inDims.erase(inDims.begin() + 2);
}
inDims.erase(inDims.begin() + 1);
}
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
if (!inputZeroPoints.empty())
inputDataType = memory::data_type::u8;
@ -440,14 +490,11 @@ void MKLDNNConvolutionNode::createPrimitive() {
prim.reset(new convolution_forward(prim_desc));
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
auto wei = getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPrimitive();
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
if (withBiases) {
auto bias = getParentEdgesAtPort(2)[0]->getMemoryPtr()->GetPrimitive();
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_BIAS, bias}, {DNNL_ARG_DST, dst}};
} else {
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_DST, dst}};
}
if (withBiases)
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_BIAS, getBias()}, {DNNL_ARG_DST, dst}};
else
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DST, dst}};
}
bool MKLDNNConvolutionNode::created() const {
@ -474,8 +521,8 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
std::vector<mkldnn::algorithm> algorithms;
// TODO [NM]: We cannot map wino_format on tensor descriptor for now
// algorithms.push_back(algorithm::convolution_winograd);
if (isWinograd())
algorithms.push_back(mkldnn::algorithm::convolution_winograd);
algorithms.push_back(mkldnn::algorithm::convolution_direct);
for (auto alg : algorithms) {
@ -722,6 +769,14 @@ bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
return canFuseSimpleOperation(node);
}
const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const {
return isWinograd() ? internalBlobMemory[0]->GetPrimitive() : getParentEdgeAt(1)->getMemory().GetPrimitive();
}
const mkldnn::memory& MKLDNNConvolutionNode::getBias() const {
return isWinograd() ? internalBlobMemory[1]->GetPrimitive() : getParentEdgeAt(2)->getMemory().GetPrimitive();
}
InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
std::vector<InferenceEngine::Precision> inputPrecisions;
// Don't take bias precision into account
@ -812,4 +867,28 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const {
return true;
}
InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) {
const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgeAt(edgeNum)->getParent());
if (!constNode) {
IE_THROW() << "Cannot cast " << edgeNum << " input to Input node for " << getName() << ".";
}
InferenceEngine::Blob::CPtr blb = constNode->getConstBlob();
if (blb == nullptr)
IE_THROW() << "Cannot get const blob for node " << getName() << ".";
InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, dims, getWeightsLayoutByDims(dims, isGrouped));
Blob::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
internalBlob->allocate();
if (internalBlob->size() != blb->size()) {
IE_THROW() << "Created internal blob and const blob has different size for node: " << getName() << ".";
}
cpu_convert(blb->cbuffer(), internalBlob->buffer(), blb->getTensorDesc().getPrecision(), internalBlob->getTensorDesc().getPrecision(),
internalBlob->size());
return internalBlob;
}
REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution);

View File

@ -32,8 +32,12 @@ public:
}
InferenceEngine::Precision getRuntimePrecision() const override;
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
const mkldnn::memory& getWeights() const;
const mkldnn::memory& getBias() const;
size_t descInputNumbers(MKLDNNDescriptor desc) override {
return static_cast<size_t>(getOriginalInputsNumber());
return static_cast<size_t>(isWinograd() ? 1 : getOriginalInputsNumber());
}
bool canBeExecutedInInt8() const;
@ -54,6 +58,8 @@ public:
return isGrouped && 1 == groupOC && 1 == groupIC;
}
bool isWinograd() const { return isWino; }
protected:
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
@ -63,12 +69,13 @@ private:
void filterSupportedDescriptors();
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
bool isNspcAvailable() const;
InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false);
bool withBiases;
bool withSum;
bool withDWConv;
bool isGrouped;
bool isPrimitivesPriorityDefined;
bool isPrimitivesPriorityDefined = false;
std::vector<ptrdiff_t> stride;
std::vector<ptrdiff_t> dilation;
std::vector<ptrdiff_t> paddingL;
@ -92,6 +99,8 @@ private:
const size_t X_AXIS = 0;
const size_t Y_AXIS = 1;
bool isWino = false;
};
} // namespace MKLDNNPlugin

View File

@ -90,7 +90,7 @@ protected:
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
if (postOpMgrPtr)
isBias = postOpMgrPtr->getFusedOpsNames() == "Add(PerChannel)";
isBias = (postOpMgrPtr->getFusedOpsNames() == "Add(PerChannel)" && selectedType != "jit_avx512_winograd");
convSpecificParams convParams;
std::vector<size_t> inputShape;
@ -722,4 +722,52 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
/* ============= */
} // namespace
/* ============= Winograd ============= */
namespace winograd {
const std::vector<fusingSpecificParams> fusingParamsSet{
emptyFusingSpec,
fusingRelu,
fusingSum,
fusingAddPerChannel // bias
};
const SizeVector numOutChannels = { 32 };
const std::vector<SizeVector> kernels2d = { {3, 3} };
const std::vector<SizeVector> strides2d = { {1, 1} };
const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
const std::vector<SizeVector> dilations2d = { {1, 1} };
const auto convParams_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
INSTANTIATE_TEST_CASE_P(smoke_Conv_winograd, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 1, 16, 10, 10 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(std::vector<CPUSpecificParams>{conv_winograd})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
} // namespace winograd
} // namespace CPULayerTestsDefinitions

View File

@ -70,4 +70,6 @@ namespace CPUTestUtils {
const auto conv_sse42_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
const auto conv_winograd = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_winograd"}, "jit_avx512_winograd"};
} // namespace CPUTestUtils