[CPU] Convolution plus sum fusing in the case of dynamic shapes (#10235)

This commit is contained in:
Maksim Kutakov
2022-02-15 13:12:07 +03:00
committed by GitHub
parent ccc38d22a8
commit 788a5bb9f2
14 changed files with 610 additions and 61 deletions

View File

@@ -80,6 +80,38 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg
CPU_DEBUG_CAP_ENABLE(serialize(*this));
}
void MKLDNNGraph::CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
const std::vector<MKLDNNEdgePtr> &graphEdges,
MKLDNNWeightsSharing::Ptr &w_cache,
std::string name) {
if (IsReady())
ForgetGraphData();
// disable weights caching if graph was created only once
weightsCache = config.streamExecutorConfig._streams != 1 ? w_cache : nullptr;
rtParamsCache = std::make_shared<MultiCache>(config.rtCacheCapacity);
this->_name = std::move(name);
this->reuse_io_tensors = false;
this->graphNodes = graphNodes;
this->graphEdges = graphEdges;
for (auto node : graphNodes) {
if ("Parameter" == node->getTypeStr()) {
inputNodesMap[node->getName()] = node;
} else if ("Result" == node->getTypeStr()) {
outputNodesMap[node->getName()] = node;
}
}
InitGraph();
status = Ready;
CPU_DEBUG_CAP_ENABLE(serialize(*this));
}
template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
template void MKLDNNGraph::CreateGraph(const CNNNetwork&,
@@ -1073,6 +1105,7 @@ Config MKLDNNGraph::getProperty() const {
void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) {
if ((*it) == edge) {
edge->drop();
graphEdges.erase(it);
return;
}

View File

@@ -50,6 +50,11 @@ public:
const MKLDNNExtensionManager::Ptr& extMgr,
MKLDNNWeightsSharing::Ptr &w_cache);
void CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
const std::vector<MKLDNNEdgePtr> &graphEdges,
MKLDNNWeightsSharing::Ptr &w_cache,
std::string name);
bool hasMeanImageFor(const std::string& name) {
return _normalizePreprocMap.find(name) != _normalizePreprocMap.end();
}

View File

@@ -239,14 +239,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
int inNum = 0;
if (remEdge) {
inNum = remEdge->getInputNum();
remEdge->drop();
graph.RemoveEdge(remEdge);
}
remEdge = childs[j].lock();
int outNum = 0;
if (remEdge) {
outNum = remEdge->getOutputNum();
remEdge->drop();
graph.RemoveEdge(remEdge);
}
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
@@ -259,7 +257,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
int inNum = 0;
if (remEdge) {
inNum = remEdge->getInputNum();
remEdge->drop();
graph.RemoveEdge(remEdge);
}
@@ -1074,8 +1071,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
};
for (auto &graphNode : graphNodes) {
// TODO [DS]: at this moment this transformation prohibit for dynamic case
if (graphNode->getType() != Eltwise || graphNode->getAlgorithm() != EltwiseAdd || graphNode->isDynamicNode() ||
if (graphNode->getType() != Eltwise || graphNode->getAlgorithm() != EltwiseAdd ||
std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isWithBroadcast())
continue;
@@ -1227,9 +1223,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
if (mergedConv->fusedWith.size() > 0 &&
(mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
// Merged with DW_conv. Shape may change
mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->outputShapes[0]);
mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->getOutputShapeAtPort(0));
} else {
mergedConv->inputShapes.push_back(mergedConv->outputShapes[0]);
mergedConv->inputShapes.push_back(sum->getInputShapeAtPort(1));
}
size_t childIdx = 0lu;
@@ -1536,14 +1532,12 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
int inNum = 0;
if (remEdge) {
inNum = remEdge->getInputNum();
remEdge->drop();
graph.RemoveEdge(remEdge);
}
remEdge = children[j].lock();
int outNum = 0;
if (remEdge) {
outNum = remEdge->getOutputNum();
remEdge->drop();
graph.RemoveEdge(remEdge);
}
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
@@ -1563,7 +1557,6 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
if (childNode->getAlgorithm() == EltwiseMulAdd) {
outNum = initialParentInNum + remEdge->getOutputNum() - 1;
}
remEdge->drop();
graph.RemoveEdge(remEdge);
}
@@ -2068,7 +2061,6 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {
graphEdges.push_back(newEdge);
graphNodes.push_back(cpuConstant);
edge->drop();
graph.RemoveEdge(edge);
}
}

View File

@@ -1546,3 +1546,7 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
}
return false;
}
void MKLDNNNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
fusedWith.push_back(fusingNode);
}

View File

@@ -186,9 +186,7 @@ public:
bool isFusedWith(Type type) const;
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
fusedWith.push_back(fusingNode);
}
virtual void addFusedNode(const MKLDNNNodePtr &fusingNode);
virtual void fuseInto(MKLDNNNodePtr& parentNode) {
// The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
@@ -332,7 +330,7 @@ public:
virtual void execute(mkldnn::stream strm);
void executeDynamic(mkldnn::stream strm);
void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
virtual void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
virtual void initSupportedPrimitiveDescriptors();

View File

@@ -9,6 +9,7 @@
#include "mkldnn_fake_quantize_node.h"
#include "mkldnn_pooling_node.h"
#include "mkldnn_concat_node.h"
#include "mkldnn_graph.h"
#include "cpu/x64/cpu_isa_traits.hpp"
#include <string>
#include <vector>
@@ -95,6 +96,101 @@ bool ConvKey::operator==(const ConvKey &rhs) const {
} // namespace
class MKLDNNConvolutionNode::FusedSubgraph {
public:
FusedSubgraph(const std::vector<MKLDNNNodePtr> &opList, const MKLDNNConvolutionNode &conv, MKLDNNWeightsSharing::Ptr weightCache) {
_graph = std::unique_ptr<MKLDNNGraph>(new MKLDNNGraph());
std::unordered_set<MKLDNNNodePtr> nodesSet;
std::vector<MKLDNNEdgePtr> edges;
auto addEdge = [&](const MKLDNNNodePtr& parent, const MKLDNNNodePtr& child, size_t parentPort, size_t childPort) -> void {
auto edge = std::make_shared<MKLDNNEdge>(parent, child, parentPort, childPort);
child->addEdge(edge);
edges.push_back(edge);
nodesSet.insert(parent);
nodesSet.insert(child);
};
//Make inputs
const auto &inpMemDesc1 = conv.getBaseMemDescAtOutputPort(0);
auto inp0 = std::make_shared<MKLDNNInputNode>(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache);
inputs.push_back(inp0);
const size_t sumPortNum = conv.getParentEdges().size() - 1;
const auto &inpMemDesc2 = conv.getBaseMemDescAtInputPort(sumPortNum);
auto inp1 = std::make_shared<MKLDNNInputNode>(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache);
inputs.push_back(inp1);
auto itr = std::find_if(opList.begin(), opList.end(), [](const MKLDNNNodePtr &node) {
if (auto eltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(node)) {
return eltwise->isSpecialConvolutionAddFusing();
}
return false;
});
auto sumNode = *itr;
addEdge(inp0, sumNode, 0, 0);
addEdge(inp1, sumNode, 0, 1);
//Replicate the rest of the subgraph
auto parentItr = itr;
while (++itr != opList.end()) {
auto parentNode = *parentItr;
auto currentNode = *itr;
if (FakeQuantize == currentNode->getType()) {
parentNode->addFusedNode(currentNode);
} else {
addEdge(parentNode, currentNode, 0, 0);
auto constantsItr = conv.fusedConstNodes.find(currentNode);
if (constantsItr != conv.fusedConstNodes.end()) {
size_t inpPort = 1lu;
for (const auto& item : constantsItr->second) {
addEdge(item, currentNode, 0, inpPort++);
}
}
parentItr = itr;
}
}
//Make output
const auto &outMemDesc = conv.getBaseMemDescAtOutputPort(0);
auto out = std::make_shared<MKLDNNInputNode>(outMemDesc, "out", "Result", conv.getEngine(), weightCache);
addEdge(*parentItr, out, 0, 0);
outputs.push_back(out);
std::vector<MKLDNNNodePtr> nodes(nodesSet.begin(), nodesSet.end());
_graph->CreateGraph(nodes, edges, weightCache, "fused_subgraph");
}
std::shared_ptr<MKLDNNInputNode> getInput(size_t idx) const {
if (idx < inputs.size()) {
return inputs[idx];
} else {
IE_THROW(OutOfBounds) << "Unexpected input index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx
<< " inputs.size()=" << inputs.size();
}
}
std::shared_ptr<MKLDNNInputNode> getOutput(size_t idx) const {
if (idx < outputs.size()) {
return outputs[idx];
} else {
IE_THROW(OutOfBounds) << "Unexpected output index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx
<< " inputs.size()=" << outputs.size();
}
}
void infer() {
_graph->ResetInferCount();
_graph->Infer();
}
private:
std::unique_ptr<MKLDNNGraph> _graph;
std::vector<std::shared_ptr<MKLDNNInputNode>> inputs;
std::vector<std::shared_ptr<MKLDNNInputNode>> outputs;
};
bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) {
@@ -220,7 +316,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true);
}
withSum = false;
int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
for (int i = 0; i < fusedWith.size(); i++) {
if (fusedWith[i]->getType() == Convolution) {
@@ -230,7 +325,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
withSum = true;
expectedInputEdgesNum++;
}
}
@@ -418,6 +512,9 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
if (withSumBroadcast) {
break;
}
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
} else {
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
@@ -536,7 +633,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
if (withSum) {
dataConfig.inPlace(-1);
dataConfig.setMemDesc(dataConfig.getMemDesc()->cloneWithNewPrecision(dataConfig.getMemDesc()->getPrecision()));
dataConfig.setMemDesc(getSumMemDesc(itpd)->cloneWithNewPrecision(dataConfig.getMemDesc()->getPrecision()));
config.inConfs.push_back(dataConfig);
}
}
@@ -993,7 +1090,7 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn
void MKLDNNConvolutionNode::prepareParams() {
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
auto dstMemPtr = getOutputMemory();
if (!dstMemPtr || !dstMemPtr->isAllocated())
IE_THROW() << "Destination memory was not allocated.";
if (!srcMemPtr || !srcMemPtr->isAllocated())
@@ -1030,7 +1127,7 @@ void MKLDNNConvolutionNode::prepareParams() {
AttrPtr pAttrLocal;
if (isDynamicNode()) {
if (!pAttr) {
if (!pAttr || withSum) {
pAttr = initPrimitiveAttr();
}
pAttrLocal = pAttr;
@@ -1197,6 +1294,23 @@ void MKLDNNConvolutionNode::execute(mkldnn::stream strm) {
void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
if (withSumBroadcast) {
if (!subgraph) {
IE_THROW(Unexpected) << "Fused ops subgraph has not been created in " << getTypeStr() << " with name " << getName();
}
const size_t sumPortNum = getParentEdges().size() - 1;
const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory();
auto inp1 = subgraph->getInput(1);
inp1->getChildEdgesAtPort(0).front()->getMemoryPtr()->setDataHandle(sumInpMem.GetData());
subgraph->infer();
auto out = subgraph->getOutput(0);
const auto& outMem = out->getParentEdgesAtPort(0).front()->getMemory();
auto convOutMem = getChildEdgesAtPort(0).front()->getMemoryPtr();
convOutMem->redefineDesc(getBaseMemDescAtOutputPort(0)->cloneWithNewDims(outMem.getStaticDims()));
convOutMem->SetData(outMem);
}
}
void MKLDNNConvolutionNode::updatePadding() {
@@ -1207,6 +1321,69 @@ void MKLDNNConvolutionNode::updatePadding() {
}
}
void MKLDNNConvolutionNode::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
if (withSum) {
const size_t sumPortNum = getParentEdges().size() - 1;
const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory();
if (newOutputShapes.front() != sumInpMem.getStaticDims()) {
withSumBroadcast = true;
if (!subgraph) {
subgraph = std::make_shared<FusedSubgraph>(fusedWith, *this, weightCache);
}
auto inp0 = subgraph->getInput(0);
inp0->redefineOutputMemory(newOutputShapes);
auto inp1 = subgraph->getInput(1);
inp1->redefineOutputMemory({sumInpMem.getStaticDims()});
// here we postpone output memory reallocation due to the fact that it is the same memory with the sum second input
return;
} else {
withSumBroadcast = false;
}
}
MKLDNNNode::redefineOutputMemory(newOutputShapes);
}
MemoryDescPtr MKLDNNConvolutionNode::getSumMemDesc(primitive_desc_iterator &primitive_desc_it) {
if (getOutputShapeAtPort(0).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(0), getInputShapeAtPort(getParentEdges().size() - 1));
}
return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(0));
}
MKLDNNMemoryPtr MKLDNNConvolutionNode::getOutputMemory() const {
if (withSumBroadcast) {
if (!subgraph) {
IE_THROW(Unexpected) << "Fused ops subgraph has not been created in " << getTypeStr() << " with name " << getName();
}
auto inp0 = subgraph->getInput(0);
return inp0->getChildEdgesAtPort(0).front()->getMemoryPtr();
} else {
return getChildEdgesAtPort(0).front()->getMemoryPtr();
}
}
void MKLDNNPlugin::MKLDNNConvolutionNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
if (Eltwise == fusingNode->getType()) {
if (fusingNode->getAlgorithm() == EltwiseAdd) {
auto eltwiseNode = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(fusingNode);
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
withSum = true;
}
}
if (withSum && isDynamicNode()) {
for (size_t i = 0; i < fusingNode->getParentEdges().size(); ++i) {
auto edge = fusingNode->getParentEdgesAtPort(i).front();
auto parent = edge->getParent();
if ("Constant" == parent->getTypeStr()) {
fusedConstNodes[fusingNode].push_back(parent);
}
}
}
}
MKLDNNNode::addFusedNode(fusingNode);
}
void MKLDNNConvolutionNode::appendZeroPointsArgs() {
if (inputZeroPointsMemPtr != nullptr) {
primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = inputZeroPointsMemPtr->GetPrimitive();
@@ -1218,5 +1395,4 @@ void MKLDNNConvolutionNode::appendZeroPointsArgs() {
primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST] = outputCompensationMemPtr->GetPrimitive();
}
}
REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution);

View File

@@ -65,8 +65,12 @@ public:
protected:
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
void redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) override;
void addFusedNode(const MKLDNNNodePtr &fusingNode) override;
private:
class FusedSubgraph;
using FusedSubgraphPtr = std::shared_ptr<FusedSubgraph>;
using executorPtr = std::shared_ptr<DnnlExecutor>;
executorPtr execPtr = nullptr;
@@ -91,6 +95,8 @@ private:
InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false);
void updatePadding();
MemoryDescPtr getSumMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it);
MKLDNNMemoryPtr getOutputMemory() const;
void appendZeroPointsArgs();
@@ -99,6 +105,7 @@ private:
bool withDWConv;
bool isGrouped;
bool isPrimitivesPriorityDefined = false;
bool withSumBroadcast = false;
std::vector<size_t> stride;
std::vector<ptrdiff_t> dilation;
std::vector<ptrdiff_t> paddingL;
@@ -126,6 +133,8 @@ private:
bool isWino = false;
AttrPtr pAttr;
bool autoPadding = false;
FusedSubgraphPtr subgraph;
std::unordered_map<MKLDNNNodePtr, std::vector<MKLDNNNodePtr>> fusedConstNodes;
MKLDNNMemoryPtr inputZeroPointsMemPtr;
MKLDNNMemoryPtr weightsZeroPointsMemPtr;

View File

@@ -1590,14 +1590,13 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const {
}
}
// TODO [DS]: used only in FuseConvolutionSumAndConvolutionSumActivation
// fix when reimplement this transformation for dynamic shapes
bool MKLDNNEltwiseNode::isWithBroadcast() {
auto oDims = getOutputShapeAtPort(0).getStaticDims();
const auto& oDims = getOutputShapeAtPort(0).getDims();
for (size_t i = 0; i < inputShapes.size(); i++) {
auto iDims = getInputShapeAtPort(i).getStaticDims();
if (iDims != oDims)
const auto& iDims = getInputShapeAtPort(i).getDims();
if (!dimsEqualWeak(iDims, oDims)) {
return true;
}
}
return false;
@@ -2014,9 +2013,8 @@ bool MKLDNNEltwiseNode::canBeInPlace() const {
void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
// Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
// TODO [DS]: at this moment this transformation prohibit for dynamic case
specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
getInputShapeAtPort(0) == getInputShapeAtPort(1);
dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims());
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,

View File

@@ -363,6 +363,12 @@ MKLDNNInputNode::MKLDNNInputNode(const Shape& shape, const InferenceEngine::Prec
}
}
MKLDNNInputNode::MKLDNNInputNode(MemoryDescPtr memDesc, const std::string &name, const std::string &type,
const mkldnn::engine &eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNInputNode(memDesc->getShape(), memDesc->getPrecision(), name, type, eng, cache) {
extMemDesc = memDesc;
}
void MKLDNNInputNode::withMeanImage() {
isMeanImage = true;
}
@@ -389,29 +395,11 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
std::vector<PortConfigurator> inPortConfs;
std::vector<PortConfigurator> outPortConfs;
if (getType() == Input || getType() == MemoryInput) {
auto precision = getOriginalOutputPrecisionAtPort(0);
if (precision == Precision::U16 || isMeanImage) {
precision = Precision::FP32;
}
outPortConfs.push_back({LayoutType::ncsp, precision});
if (!getParentEdges().empty()) {
inPortConfs.push_back({LayoutType::ncsp, precision, true});
}
} else if (getType() == Output) {
auto precision = getOriginalInputPrecisionAtPort(0);
if (precision == Precision::U16) precision = Precision::FP32;
inPortConfs.push_back({LayoutType::ncsp, precision});
if (extMemDesc) {
initSupportedPdFromMemDesc();
} else {
initSupportedPdDefault();
}
addSupportedPrimDesc(inPortConfs,
outPortConfs,
impl_desc_type::unknown);
}
void MKLDNNInputNode::createPrimitive() {
@@ -437,5 +425,45 @@ bool MKLDNNInputNode::created() const {
return getType() == Input || getType() == Output;
}
void MKLDNNInputNode::initSupportedPdDefault() {
std::vector<PortConfigurator> inPortConfs;
std::vector<PortConfigurator> outPortConfs;
if (getType() == Input || getType() == MemoryInput) {
auto precision = getOriginalOutputPrecisionAtPort(0);
if (precision == Precision::U16 || isMeanImage) {
precision = Precision::FP32;
}
outPortConfs.push_back({LayoutType::ncsp, precision});
if (!getParentEdges().empty()) {
inPortConfs.push_back({LayoutType::ncsp, precision, true});
}
} else if (getType() == Output) {
auto precision = getOriginalInputPrecisionAtPort(0);
if (precision == Precision::U16) precision = Precision::FP32;
inPortConfs.push_back({LayoutType::ncsp, precision});
}
addSupportedPrimDesc(inPortConfs,
outPortConfs,
impl_desc_type::unknown);
}
void MKLDNNInputNode::initSupportedPdFromMemDesc() {
NodeConfig config;
PortConfig portConfig;
portConfig.inPlace(-1);
portConfig.constant(false);
portConfig.setMemDesc(extMemDesc);
if (getType() == Input || getType() == MemoryInput) {
config.outConfs.push_back(portConfig);
} else if (getType() == Output) {
config.inConfs.push_back(portConfig);
}
supportedPrimitiveDescriptors.emplace_back(std::move(config), impl_desc_type::unknown);
}
REG_MKLDNN_PRIM_FOR(MKLDNNInputNode, Input);
REG_MKLDNN_PRIM_FOR(MKLDNNInputNode, Output);

View File

@@ -16,6 +16,8 @@ public:
MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name,
const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
MKLDNNInputNode(MemoryDescPtr memDesc, const std::string &name, const std::string &type, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
@@ -35,10 +37,13 @@ public:
private:
void cloneBlobIfRequired();
void initSupportedPdDefault();
void initSupportedPdFromMemDesc();
private:
std::shared_ptr<ngraph::op::Constant> constOp;
MKLDNNMemoryCPtr memoryPtr;
MemoryDescPtr extMemDesc = nullptr;
bool isMeanImage = false;
};

View File

@@ -144,9 +144,6 @@ std::vector<std::string> disabledTestPatterns() {
*IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
// Issue: 69222
R"(.*smoke_PriorBoxClustered.*PriorBoxClusteredLayerCPUTest.*_netPRC=f16_.*)",
// TODO : CVS-69533
R"(.*ConvolutionLayerCPUTest.*IS=\{.+\}.*_Fused=.*Add\(Parameters\).*)",
R"(.*GroupConvolutionLayerCPUTest.*IS=\{.+\}.*_Fused=.*Add\(Parameters\).*)",
// Issue: 74817
// Sporadic failings with NAN on Dynamic shape cases with jit implementation
R"(.*DefConvLayoutTest7.*)",

View File

@@ -116,10 +116,35 @@ protected:
ngraph::ParameterVector &params,
const std::shared_ptr<ngraph::Node> &lastNode) override {
auto retNode = CpuTestWithFusing::modifyGraph(ngPrc, params, lastNode);
for (size_t i = targetStaticShapes.front().size(); i < params.size(); ++i) {
const auto& shape = params[i]->get_output_partial_shape(0);
if (shape.is_static()) {
targetStaticShapes.front().push_back(shape.get_shape());
std::shared_ptr<ngraph::Node> opToShapeInfer = nullptr;
for (auto& targetShapes : targetStaticShapes) {
for (size_t i = targetShapes.size(); i < params.size(); ++i) {
const auto &shape = params[i]->get_output_partial_shape(0);
if (shape.is_static()) {
targetShapes.push_back(shape.get_shape());
} else {
// It is assumed that in such tests we have second parameter only if sum fusion is tested.
// Considering this fact, we need to set the appropriate static shape for the second term of the sum operation, and
// it has to match the convolution output shape. So the most suitable solution here is to perform shape inference on the
// convolution node
if (!opToShapeInfer) {
ngraph::OutputVector inputsForShapeInfer;
for (size_t j = 0; j < lastNode->get_input_size(); j++) {
if (ngraph::is_type<ngraph::opset1::Constant>(lastNode->get_input_node_ptr(j))) {
inputsForShapeInfer.push_back(lastNode->get_input_node_shared_ptr(j));
} else {
inputsForShapeInfer.push_back(std::make_shared<ngraph::opset1::Parameter>(lastNode->get_input_element_type(j),
lastNode->get_input_partial_shape(j)));
}
}
opToShapeInfer = lastNode->clone_with_new_inputs(inputsForShapeInfer);
}
std::vector<ov::Shape> secondParameterShapes;
opToShapeInfer->get_input_tensor(0).set_partial_shape(targetShapes.front());
opToShapeInfer->validate_and_infer_types();
targetShapes.push_back(opToShapeInfer->get_output_shape(0));
}
}
}
return retNode;

View File

@@ -117,10 +117,35 @@ protected:
ngraph::ParameterVector &params,
const std::shared_ptr<ngraph::Node> &lastNode) override {
auto retNode = CpuTestWithFusing::modifyGraph(ngPrc, params, lastNode);
for (size_t i = targetStaticShapes.front().size(); i < params.size(); ++i) {
const auto& shape = params[i]->get_output_partial_shape(0);
if (shape.is_static()) {
targetStaticShapes.front().push_back(shape.get_shape());
std::shared_ptr<ngraph::Node> opToShapeInfer = nullptr;
for (auto& targetShapes : targetStaticShapes) {
for (size_t i = targetShapes.size(); i < params.size(); ++i) {
const auto &shape = params[i]->get_output_partial_shape(0);
if (shape.is_static()) {
targetShapes.push_back(shape.get_shape());
} else {
// It is assumed that in such tests we have second parameter only if sum fusion is tested.
// Considering this fact, we need to set the appropriate static shape for the second term of the sum operation, and
// it has to match the convolution output shape. So the most suitable solution here is to perform shape inference on the
// convolution node
if (!opToShapeInfer) {
ngraph::OutputVector inputsForShapeInfer;
for (size_t j = 0; j < lastNode->get_input_size(); j++) {
if (ngraph::is_type<ngraph::opset1::Constant>(lastNode->get_input_node_ptr(j))) {
inputsForShapeInfer.push_back(lastNode->get_input_node_shared_ptr(j));
} else {
inputsForShapeInfer.push_back(std::make_shared<ngraph::opset1::Parameter>(lastNode->get_input_element_type(j),
lastNode->get_input_partial_shape(j)));
}
}
opToShapeInfer = lastNode->clone_with_new_inputs(inputsForShapeInfer);
}
std::vector<ov::Shape> secondParameterShapes;
opToShapeInfer->get_input_tensor(0).set_partial_shape(targetShapes.front());
opToShapeInfer->validate_and_infer_types();
targetShapes.push_back(opToShapeInfer->get_output_shape(0));
}
}
}
return retNode;

View File

@@ -0,0 +1,254 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils/cpu_test_utils.hpp"
#include "test_utils/fusing_test_utils.hpp"
#include "test_utils/convolution_params.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
using namespace CPUTestUtils;
using namespace InferenceEngine;
using namespace ov::test;
namespace SubgraphTestsDefinitions {
typedef std::tuple<
InputShape, //convShape
InputShape, //second term shape
bool, // bias flag
fusingSpecificParams,
std::map<std::string, std::string> // config
> convSumBroadcastParamSet;
class ConcatConvSumInPlaceTest : public testing::WithParamInterface<convSumBroadcastParamSet>,
virtual public SubgraphBaseTest, public CpuTestWithFusing {
public:
static std::string getTestCaseName(const testing::TestParamInfo<convSumBroadcastParamSet>& obj) {
InputShape convShape;
InputShape secondShape;
bool bias;
fusingSpecificParams fusingParams;
std::map<std::string, std::string> additionalConfig;
std::tie(convShape, secondShape, bias, fusingParams, additionalConfig) = obj.param;
std::ostringstream result;
result << "IS=";
result << CommonTestUtils::partialShape2str({convShape.first, secondShape.first}) << "_";
result << "TS=";
for (const auto& shape : {convShape, secondShape}) {
result << "(";
if (!shape.second.empty()) {
for (const auto& itr : shape.second) {
result << CommonTestUtils::vec2str(itr);
}
}
result << ")_";
}
result << "bias=" << (bias ? "True" : "False");
result << CpuTestWithFusing::getTestCaseName(fusingParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto& item : additionalConfig) {
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
void SetUp() override {
InputShape convShape;
InputShape secondShape;
bool bias;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
std::map<std::string, std::string> additionalConfig;
std::tie(convShape, secondShape, bias, fusingParams, additionalConfig) = this->GetParam();
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
configuration.insert(additionalConfig.begin(), additionalConfig.end());
init_input_shapes({convShape, secondShape});
const InferenceEngine::SizeVector kernel = {3, 3};
const InferenceEngine::SizeVector stride = {1, 1};
const InferenceEngine::SizeVector dilation = {1, 1};
const std::vector<ptrdiff_t> padBegin = {0, 0};
const std::vector<ptrdiff_t> padEnd = {0, 0};
const size_t convOutChannels = 64;
auto netType = ngraph::element::f32;
auto inputParams = ngraph::builder::makeDynamicParams(netType, inputDynamicShapes);
auto conv = ngraph::builder::makeConvolution(inputParams[0], ngraph::element::f32, kernel, stride, padBegin,
padEnd, dilation, ngraph::op::PadType::EXPLICIT, convOutChannels);
if (bias) {
auto biasNode = ngraph::builder::makeConstant<float>(ngraph::element::Type_t::f32, ngraph::Shape({1, convOutChannels, 1, 1}), {}, true);
conv = std::make_shared<ngraph::opset3::Add>(conv, biasNode);
}
auto sum = std::make_shared<ngraph::opset3::Add>(conv, inputParams[1]);
fusedOps.insert(fusedOps.begin(), "Add"); // as we always fuse the sum first
auto runtimeType = netType;
if (configuration.count(PluginConfigParams::KEY_ENFORCE_BF16) &&
PluginConfigParams::YES == configuration[PluginConfigParams::KEY_ENFORCE_BF16].as<std::string>()) {
runtimeType = ngraph::element::Type_t::bf16;
}
selectedType = makeSelectedTypeStr(getPrimitiveType(), runtimeType);
function = makeNgraphFunction(netType, inputParams, sum, "ConvolutionSumBroadcast");
targetDevice = CommonTestUtils::DEVICE_CPU;
}
};
TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
CheckPluginRelatedResults(executableNetwork, "Convolution");
}
namespace {
const auto fusingMulAddFQMullAdd = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
ngraph::Shape newShape = generatePerChannelShape(inpNode);
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
}, "Multiply(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
ngraph::Shape newShape = generatePerChannelShape(inpNode);
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
}, "Add(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
auto localPrc = inpNode->get_element_type();
ngraph::Shape newShape = generatePerChannelShape(inpNode);
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
}, "FakeQuantize(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
ngraph::Shape newShape = generatePerChannelShape(inpNode);
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
}, "Multiply(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
ngraph::Shape newShape = generatePerChannelShape(inpNode);
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
}, "Add(PerChannel)"}}), {"Add"} };
const auto fusingDivSubFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Divide>(inpNode, secondMultInput);
}, "Divide(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Subtract>(inpNode, secondMultInput);
}, "Subtract(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
auto localPrc = inpNode->get_element_type();
ngraph::Shape newShape = generatePerChannelShape(inpNode);
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
}, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };
const auto fusingSigmoidFQFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid);
}, "Sigmoid"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
auto localPrc = inpNode->get_element_type();
ngraph::Shape newShape = generatePerChannelShape(inpNode);
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
}, "FakeQuantize(PerChannel)"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
auto localPrc = inpNode->get_element_type();
ngraph::Shape newShape = generatePerChannelShape(inpNode);
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
}, "FakeQuantize(PerChannel)"}}), {"Sigmoid", "FakeQuantize", "FakeQuantize"} };
const auto fusingClampFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
}, "Clamp"},
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
auto localPrc = inpNode->get_element_type();
ngraph::Shape newShape = generatePerChannelShape(inpNode);
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
}, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };
const std::vector<fusingSpecificParams> fusingParamsSet{
emptyFusingSpec,
fusingSigmoid,
fusingFakeQuantizePerTensorRelu,
fusingFakeQuantizePerChannelRelu,
fusingFQPerChannelSigmoidFQPerChannel,
fusingReluScaleShift,
fusingMulAddFQMullAdd,
fusingSigmoidFQFQ,
// fusingClampFQ // TODO: we need investigation, this particular pattern does not work even in static case
fusingDivSubFQ
};
const std::vector<fusingSpecificParams> fusingParamsSetBF16{
emptyFusingSpec,
fusingSigmoid,
fusingReluScaleShift
};
InputShape convInpShape = {
//dynamic shapes
{-1, 32, -1, -1},
{ //target static shapes
{1, 32, 10, 10},
{1, 32, 10, 10},
{1, 32, 10, 10},
{1, 32, 3, 3},
{1, 32, 3, 10}
}
};
InputShape secondInp = {
//dynamic shapes
{-1, -1, -1, -1},
{ //target static shapes
{1, 64, 1, 8},
{1, 64, 1, 8},
{1, 64, 8, 8},
{1, 64, 8, 8},
{1, 64, 8, 1}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_FP32, ConcatConvSumInPlaceTest,
::testing::Combine(
::testing::Values(convInpShape),
::testing::Values(secondInp),
::testing::Values(true, false),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConcatConvSumInPlaceTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_BF16, ConcatConvSumInPlaceTest,
::testing::Combine(
::testing::Values(convInpShape),
::testing::Values(secondInp),
::testing::Values(true, false),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConcatConvSumInPlaceTest::getTestCaseName);
} // namespace
} // namespace SubgraphTestsDefinitions