[CPU] Winograd convolution support (#5699)
This commit is contained in:
parent
97a9a76ff9
commit
06fb16d799
@ -66,6 +66,8 @@ public:
|
||||
int getInputNum();
|
||||
int getOutputNum();
|
||||
|
||||
void setChildPort(const size_t port) { child_port = port; }
|
||||
|
||||
void sharedMemFrom(const MKLDNNEdgePtr& edge);
|
||||
MKLDNNEdgePtr getSharedEdge() const;
|
||||
MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <utils/general_utils.h>
|
||||
#include <ngraph/ops.hpp>
|
||||
#include <cpu/x64/jit_generator.hpp>
|
||||
#include "common/cpu_convert.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -48,8 +50,6 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0;
|
||||
|
||||
auto convolutionOp = ngraph::as_type_ptr<ngraph::op::v1::Convolution>(op);
|
||||
auto groupConvolutionOp = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolution>(op);
|
||||
|
||||
@ -133,6 +133,26 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
|
||||
withBiases = getOriginalInputsNumber() == 3;
|
||||
|
||||
if (!implPriorities.empty()) {
|
||||
isPrimitivesPriorityDefined = true;
|
||||
// winograd support only constant weights and bias
|
||||
isWino = std::find(implPriorities.begin(), implPriorities.end(), impl_desc_type::jit_avx512_winograd) != implPriorities.end() &&
|
||||
mkldnn::impl::cpu::x64::mayiuse(mkldnn::impl::cpu::x64::avx512_common) && !canBeExecutedInInt8() &&
|
||||
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Input &&
|
||||
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true);
|
||||
}
|
||||
|
||||
if (isWinograd()) {
|
||||
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
|
||||
return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
|
||||
});
|
||||
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
|
||||
if (!withBiases)
|
||||
return MKLDNNMemoryDesc();
|
||||
return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1));
|
||||
});
|
||||
}
|
||||
|
||||
withSum = false;
|
||||
int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
@ -149,6 +169,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
// we can't convert winograd memory descriptor to TensorDesc, so we removed weight and bias edges and put data into internalBlobs
|
||||
if (isWinograd()) {
|
||||
std::vector<MKLDNNEdgePtr> edgesToRemove;
|
||||
internalBlobs.push_back(createInternalBlob(weightDims, 1, isGrouped));
|
||||
edgesToRemove.push_back(getParentEdgeAt(1));
|
||||
|
||||
if (withBiases) {
|
||||
internalBlobs.push_back(createInternalBlob(biasesDims, 2));
|
||||
edgesToRemove.push_back(getParentEdgeAt(2));
|
||||
}
|
||||
|
||||
if (expectedInputEdgesNum - getOriginalInputsNumber() > 0) {
|
||||
size_t reconnectPort = 1;
|
||||
for (size_t startPort = 2 + (withBiases ? 1 : 0); startPort < expectedInputEdgesNum; startPort++) {
|
||||
getParentEdgeAt(startPort)->setChildPort(reconnectPort);
|
||||
reconnectPort++;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < edgesToRemove.size(); i++) {
|
||||
removeEdge(edgesToRemove[i]);
|
||||
}
|
||||
|
||||
expectedInputEdgesNum -= getOriginalInputsNumber() - 1;
|
||||
if (withBiases) {
|
||||
inDims.erase(inDims.begin() + 2);
|
||||
}
|
||||
inDims.erase(inDims.begin() + 1);
|
||||
}
|
||||
|
||||
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
|
||||
if (!inputZeroPoints.empty())
|
||||
inputDataType = memory::data_type::u8;
|
||||
@ -440,14 +490,11 @@ void MKLDNNConvolutionNode::createPrimitive() {
|
||||
prim.reset(new convolution_forward(prim_desc));
|
||||
|
||||
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
auto wei = getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPrimitive();
|
||||
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
if (withBiases) {
|
||||
auto bias = getParentEdgesAtPort(2)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_BIAS, bias}, {DNNL_ARG_DST, dst}};
|
||||
} else {
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_DST, dst}};
|
||||
}
|
||||
if (withBiases)
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_BIAS, getBias()}, {DNNL_ARG_DST, dst}};
|
||||
else
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DST, dst}};
|
||||
}
|
||||
|
||||
bool MKLDNNConvolutionNode::created() const {
|
||||
@ -474,8 +521,8 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
|
||||
|
||||
std::vector<mkldnn::algorithm> algorithms;
|
||||
|
||||
// TODO [NM]: We cannot map wino_format on tensor descriptor for now
|
||||
// algorithms.push_back(algorithm::convolution_winograd);
|
||||
if (isWinograd())
|
||||
algorithms.push_back(mkldnn::algorithm::convolution_winograd);
|
||||
algorithms.push_back(mkldnn::algorithm::convolution_direct);
|
||||
|
||||
for (auto alg : algorithms) {
|
||||
@ -722,6 +769,14 @@ bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
return canFuseSimpleOperation(node);
|
||||
}
|
||||
|
||||
const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const {
|
||||
return isWinograd() ? internalBlobMemory[0]->GetPrimitive() : getParentEdgeAt(1)->getMemory().GetPrimitive();
|
||||
}
|
||||
|
||||
const mkldnn::memory& MKLDNNConvolutionNode::getBias() const {
|
||||
return isWinograd() ? internalBlobMemory[1]->GetPrimitive() : getParentEdgeAt(2)->getMemory().GetPrimitive();
|
||||
}
|
||||
|
||||
InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
|
||||
std::vector<InferenceEngine::Precision> inputPrecisions;
|
||||
// Don't take bias precision into account
|
||||
@ -812,4 +867,28 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) {
|
||||
const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgeAt(edgeNum)->getParent());
|
||||
if (!constNode) {
|
||||
IE_THROW() << "Cannot cast " << edgeNum << " input to Input node for " << getName() << ".";
|
||||
}
|
||||
InferenceEngine::Blob::CPtr blb = constNode->getConstBlob();
|
||||
if (blb == nullptr)
|
||||
IE_THROW() << "Cannot get const blob for node " << getName() << ".";
|
||||
|
||||
InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, dims, getWeightsLayoutByDims(dims, isGrouped));
|
||||
|
||||
Blob::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
|
||||
internalBlob->allocate();
|
||||
|
||||
if (internalBlob->size() != blb->size()) {
|
||||
IE_THROW() << "Created internal blob and const blob has different size for node: " << getName() << ".";
|
||||
}
|
||||
|
||||
cpu_convert(blb->cbuffer(), internalBlob->buffer(), blb->getTensorDesc().getPrecision(), internalBlob->getTensorDesc().getPrecision(),
|
||||
internalBlob->size());
|
||||
|
||||
return internalBlob;
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution);
|
||||
|
@ -32,8 +32,12 @@ public:
|
||||
}
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
|
||||
|
||||
const mkldnn::memory& getWeights() const;
|
||||
const mkldnn::memory& getBias() const;
|
||||
|
||||
size_t descInputNumbers(MKLDNNDescriptor desc) override {
|
||||
return static_cast<size_t>(getOriginalInputsNumber());
|
||||
return static_cast<size_t>(isWinograd() ? 1 : getOriginalInputsNumber());
|
||||
}
|
||||
|
||||
bool canBeExecutedInInt8() const;
|
||||
@ -54,6 +58,8 @@ public:
|
||||
return isGrouped && 1 == groupOC && 1 == groupIC;
|
||||
}
|
||||
|
||||
bool isWinograd() const { return isWino; }
|
||||
|
||||
protected:
|
||||
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
|
||||
|
||||
@ -63,12 +69,13 @@ private:
|
||||
void filterSupportedDescriptors();
|
||||
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
|
||||
bool isNspcAvailable() const;
|
||||
InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false);
|
||||
|
||||
bool withBiases;
|
||||
bool withSum;
|
||||
bool withDWConv;
|
||||
bool isGrouped;
|
||||
bool isPrimitivesPriorityDefined;
|
||||
bool isPrimitivesPriorityDefined = false;
|
||||
std::vector<ptrdiff_t> stride;
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
std::vector<ptrdiff_t> paddingL;
|
||||
@ -92,6 +99,8 @@ private:
|
||||
|
||||
const size_t X_AXIS = 0;
|
||||
const size_t Y_AXIS = 1;
|
||||
|
||||
bool isWino = false;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -90,7 +90,7 @@ protected:
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
|
||||
if (postOpMgrPtr)
|
||||
isBias = postOpMgrPtr->getFusedOpsNames() == "Add(PerChannel)";
|
||||
isBias = (postOpMgrPtr->getFusedOpsNames() == "Add(PerChannel)" && selectedType != "jit_avx512_winograd");
|
||||
|
||||
convSpecificParams convParams;
|
||||
std::vector<size_t> inputShape;
|
||||
@ -722,4 +722,52 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
|
||||
/* ============= */
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
/* ============= Winograd ============= */
|
||||
namespace winograd {
|
||||
|
||||
const std::vector<fusingSpecificParams> fusingParamsSet{
|
||||
emptyFusingSpec,
|
||||
fusingRelu,
|
||||
fusingSum,
|
||||
fusingAddPerChannel // bias
|
||||
};
|
||||
|
||||
const SizeVector numOutChannels = { 32 };
|
||||
|
||||
const std::vector<SizeVector> kernels2d = { {3, 3} };
|
||||
const std::vector<SizeVector> strides2d = { {1, 1} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
|
||||
const std::vector<SizeVector> dilations2d = { {1, 1} };
|
||||
|
||||
const auto convParams_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
::testing::ValuesIn(strides2d),
|
||||
::testing::ValuesIn(padBegins2d),
|
||||
::testing::ValuesIn(padEnds2d),
|
||||
::testing::ValuesIn(dilations2d),
|
||||
::testing::ValuesIn(numOutChannels),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Conv_winograd, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 1, 16, 10, 10 })),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(std::vector<CPUSpecificParams>{conv_winograd})),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace winograd
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
|
@ -70,4 +70,6 @@ namespace CPUTestUtils {
|
||||
const auto conv_sse42_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
|
||||
const auto conv_avx2_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
|
||||
const auto conv_avx512_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
|
||||
|
||||
const auto conv_winograd = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_winograd"}, "jit_avx512_winograd"};
|
||||
} // namespace CPUTestUtils
|
||||
|
Loading…
Reference in New Issue
Block a user