[CPU] Convolution plus sum fusing in the case of dynamic shapes (#10235)
This commit is contained in:
@@ -80,6 +80,38 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg
|
||||
CPU_DEBUG_CAP_ENABLE(serialize(*this));
|
||||
}
|
||||
|
||||
void MKLDNNGraph::CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
|
||||
const std::vector<MKLDNNEdgePtr> &graphEdges,
|
||||
MKLDNNWeightsSharing::Ptr &w_cache,
|
||||
std::string name) {
|
||||
if (IsReady())
|
||||
ForgetGraphData();
|
||||
// disable weights caching if graph was created only once
|
||||
weightsCache = config.streamExecutorConfig._streams != 1 ? w_cache : nullptr;
|
||||
|
||||
rtParamsCache = std::make_shared<MultiCache>(config.rtCacheCapacity);
|
||||
|
||||
this->_name = std::move(name);
|
||||
this->reuse_io_tensors = false;
|
||||
|
||||
this->graphNodes = graphNodes;
|
||||
this->graphEdges = graphEdges;
|
||||
|
||||
for (auto node : graphNodes) {
|
||||
if ("Parameter" == node->getTypeStr()) {
|
||||
inputNodesMap[node->getName()] = node;
|
||||
} else if ("Result" == node->getTypeStr()) {
|
||||
outputNodesMap[node->getName()] = node;
|
||||
}
|
||||
}
|
||||
|
||||
InitGraph();
|
||||
|
||||
status = Ready;
|
||||
|
||||
CPU_DEBUG_CAP_ENABLE(serialize(*this));
|
||||
}
|
||||
|
||||
template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
|
||||
const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
|
||||
template void MKLDNNGraph::CreateGraph(const CNNNetwork&,
|
||||
@@ -1073,6 +1105,7 @@ Config MKLDNNGraph::getProperty() const {
|
||||
void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
|
||||
for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) {
|
||||
if ((*it) == edge) {
|
||||
edge->drop();
|
||||
graphEdges.erase(it);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -50,6 +50,11 @@ public:
|
||||
const MKLDNNExtensionManager::Ptr& extMgr,
|
||||
MKLDNNWeightsSharing::Ptr &w_cache);
|
||||
|
||||
void CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
|
||||
const std::vector<MKLDNNEdgePtr> &graphEdges,
|
||||
MKLDNNWeightsSharing::Ptr &w_cache,
|
||||
std::string name);
|
||||
|
||||
bool hasMeanImageFor(const std::string& name) {
|
||||
return _normalizePreprocMap.find(name) != _normalizePreprocMap.end();
|
||||
}
|
||||
|
||||
@@ -239,14 +239,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
|
||||
int inNum = 0;
|
||||
if (remEdge) {
|
||||
inNum = remEdge->getInputNum();
|
||||
remEdge->drop();
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
remEdge = childs[j].lock();
|
||||
int outNum = 0;
|
||||
if (remEdge) {
|
||||
outNum = remEdge->getOutputNum();
|
||||
remEdge->drop();
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
|
||||
@@ -259,7 +257,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
|
||||
int inNum = 0;
|
||||
if (remEdge) {
|
||||
inNum = remEdge->getInputNum();
|
||||
remEdge->drop();
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
|
||||
@@ -1074,8 +1071,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
|
||||
};
|
||||
|
||||
for (auto &graphNode : graphNodes) {
|
||||
// TODO [DS]: at this moment this transformation prohibit for dynamic case
|
||||
if (graphNode->getType() != Eltwise || graphNode->getAlgorithm() != EltwiseAdd || graphNode->isDynamicNode() ||
|
||||
if (graphNode->getType() != Eltwise || graphNode->getAlgorithm() != EltwiseAdd ||
|
||||
std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isWithBroadcast())
|
||||
continue;
|
||||
|
||||
@@ -1227,9 +1223,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
|
||||
if (mergedConv->fusedWith.size() > 0 &&
|
||||
(mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
|
||||
// Merged with DW_conv. Shape may change
|
||||
mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->outputShapes[0]);
|
||||
mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->getOutputShapeAtPort(0));
|
||||
} else {
|
||||
mergedConv->inputShapes.push_back(mergedConv->outputShapes[0]);
|
||||
mergedConv->inputShapes.push_back(sum->getInputShapeAtPort(1));
|
||||
}
|
||||
|
||||
size_t childIdx = 0lu;
|
||||
@@ -1536,14 +1532,12 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
|
||||
int inNum = 0;
|
||||
if (remEdge) {
|
||||
inNum = remEdge->getInputNum();
|
||||
remEdge->drop();
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
remEdge = children[j].lock();
|
||||
int outNum = 0;
|
||||
if (remEdge) {
|
||||
outNum = remEdge->getOutputNum();
|
||||
remEdge->drop();
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
|
||||
@@ -1563,7 +1557,6 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
|
||||
if (childNode->getAlgorithm() == EltwiseMulAdd) {
|
||||
outNum = initialParentInNum + remEdge->getOutputNum() - 1;
|
||||
}
|
||||
remEdge->drop();
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
|
||||
@@ -2068,7 +2061,6 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {
|
||||
graphEdges.push_back(newEdge);
|
||||
graphNodes.push_back(cpuConstant);
|
||||
|
||||
edge->drop();
|
||||
graph.RemoveEdge(edge);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1546,3 +1546,7 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MKLDNNNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
fusedWith.push_back(fusingNode);
|
||||
}
|
||||
|
||||
@@ -186,9 +186,7 @@ public:
|
||||
|
||||
bool isFusedWith(Type type) const;
|
||||
|
||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
fusedWith.push_back(fusingNode);
|
||||
}
|
||||
virtual void addFusedNode(const MKLDNNNodePtr &fusingNode);
|
||||
|
||||
virtual void fuseInto(MKLDNNNodePtr& parentNode) {
|
||||
// The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
|
||||
@@ -332,7 +330,7 @@ public:
|
||||
|
||||
virtual void execute(mkldnn::stream strm);
|
||||
void executeDynamic(mkldnn::stream strm);
|
||||
void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
|
||||
virtual void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
|
||||
|
||||
virtual void initSupportedPrimitiveDescriptors();
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "mkldnn_fake_quantize_node.h"
|
||||
#include "mkldnn_pooling_node.h"
|
||||
#include "mkldnn_concat_node.h"
|
||||
#include "mkldnn_graph.h"
|
||||
#include "cpu/x64/cpu_isa_traits.hpp"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -95,6 +96,101 @@ bool ConvKey::operator==(const ConvKey &rhs) const {
|
||||
|
||||
} // namespace
|
||||
|
||||
class MKLDNNConvolutionNode::FusedSubgraph {
|
||||
public:
|
||||
FusedSubgraph(const std::vector<MKLDNNNodePtr> &opList, const MKLDNNConvolutionNode &conv, MKLDNNWeightsSharing::Ptr weightCache) {
|
||||
_graph = std::unique_ptr<MKLDNNGraph>(new MKLDNNGraph());
|
||||
|
||||
std::unordered_set<MKLDNNNodePtr> nodesSet;
|
||||
std::vector<MKLDNNEdgePtr> edges;
|
||||
|
||||
auto addEdge = [&](const MKLDNNNodePtr& parent, const MKLDNNNodePtr& child, size_t parentPort, size_t childPort) -> void {
|
||||
auto edge = std::make_shared<MKLDNNEdge>(parent, child, parentPort, childPort);
|
||||
child->addEdge(edge);
|
||||
edges.push_back(edge);
|
||||
nodesSet.insert(parent);
|
||||
nodesSet.insert(child);
|
||||
};
|
||||
|
||||
//Make inputs
|
||||
const auto &inpMemDesc1 = conv.getBaseMemDescAtOutputPort(0);
|
||||
auto inp0 = std::make_shared<MKLDNNInputNode>(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache);
|
||||
inputs.push_back(inp0);
|
||||
const size_t sumPortNum = conv.getParentEdges().size() - 1;
|
||||
const auto &inpMemDesc2 = conv.getBaseMemDescAtInputPort(sumPortNum);
|
||||
auto inp1 = std::make_shared<MKLDNNInputNode>(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache);
|
||||
inputs.push_back(inp1);
|
||||
|
||||
auto itr = std::find_if(opList.begin(), opList.end(), [](const MKLDNNNodePtr &node) {
|
||||
if (auto eltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(node)) {
|
||||
return eltwise->isSpecialConvolutionAddFusing();
|
||||
}
|
||||
return false;
|
||||
});
|
||||
auto sumNode = *itr;
|
||||
addEdge(inp0, sumNode, 0, 0);
|
||||
addEdge(inp1, sumNode, 0, 1);
|
||||
|
||||
//Replicate the rest of the subgraph
|
||||
auto parentItr = itr;
|
||||
while (++itr != opList.end()) {
|
||||
auto parentNode = *parentItr;
|
||||
auto currentNode = *itr;
|
||||
if (FakeQuantize == currentNode->getType()) {
|
||||
parentNode->addFusedNode(currentNode);
|
||||
} else {
|
||||
addEdge(parentNode, currentNode, 0, 0);
|
||||
auto constantsItr = conv.fusedConstNodes.find(currentNode);
|
||||
if (constantsItr != conv.fusedConstNodes.end()) {
|
||||
size_t inpPort = 1lu;
|
||||
for (const auto& item : constantsItr->second) {
|
||||
addEdge(item, currentNode, 0, inpPort++);
|
||||
}
|
||||
}
|
||||
parentItr = itr;
|
||||
}
|
||||
}
|
||||
|
||||
//Make output
|
||||
const auto &outMemDesc = conv.getBaseMemDescAtOutputPort(0);
|
||||
auto out = std::make_shared<MKLDNNInputNode>(outMemDesc, "out", "Result", conv.getEngine(), weightCache);
|
||||
addEdge(*parentItr, out, 0, 0);
|
||||
outputs.push_back(out);
|
||||
|
||||
std::vector<MKLDNNNodePtr> nodes(nodesSet.begin(), nodesSet.end());
|
||||
|
||||
_graph->CreateGraph(nodes, edges, weightCache, "fused_subgraph");
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNInputNode> getInput(size_t idx) const {
|
||||
if (idx < inputs.size()) {
|
||||
return inputs[idx];
|
||||
} else {
|
||||
IE_THROW(OutOfBounds) << "Unexpected input index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx
|
||||
<< " inputs.size()=" << inputs.size();
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNInputNode> getOutput(size_t idx) const {
|
||||
if (idx < outputs.size()) {
|
||||
return outputs[idx];
|
||||
} else {
|
||||
IE_THROW(OutOfBounds) << "Unexpected output index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx
|
||||
<< " inputs.size()=" << outputs.size();
|
||||
}
|
||||
}
|
||||
|
||||
void infer() {
|
||||
_graph->ResetInferCount();
|
||||
_graph->Infer();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<MKLDNNGraph> _graph;
|
||||
std::vector<std::shared_ptr<MKLDNNInputNode>> inputs;
|
||||
std::vector<std::shared_ptr<MKLDNNInputNode>> outputs;
|
||||
};
|
||||
|
||||
bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) {
|
||||
@@ -220,7 +316,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true);
|
||||
}
|
||||
|
||||
withSum = false;
|
||||
int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
if (fusedWith[i]->getType() == Convolution) {
|
||||
@@ -230,7 +325,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
|
||||
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
withSum = true;
|
||||
expectedInputEdgesNum++;
|
||||
}
|
||||
}
|
||||
@@ -418,6 +512,9 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
|
||||
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
if (withSumBroadcast) {
|
||||
break;
|
||||
}
|
||||
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
|
||||
} else {
|
||||
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
@@ -536,7 +633,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
if (withSum) {
|
||||
dataConfig.inPlace(-1);
|
||||
dataConfig.setMemDesc(dataConfig.getMemDesc()->cloneWithNewPrecision(dataConfig.getMemDesc()->getPrecision()));
|
||||
dataConfig.setMemDesc(getSumMemDesc(itpd)->cloneWithNewPrecision(dataConfig.getMemDesc()->getPrecision()));
|
||||
config.inConfs.push_back(dataConfig);
|
||||
}
|
||||
}
|
||||
@@ -993,7 +1090,7 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn
|
||||
void MKLDNNConvolutionNode::prepareParams() {
|
||||
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
|
||||
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
auto dstMemPtr = getOutputMemory();
|
||||
if (!dstMemPtr || !dstMemPtr->isAllocated())
|
||||
IE_THROW() << "Destination memory was not allocated.";
|
||||
if (!srcMemPtr || !srcMemPtr->isAllocated())
|
||||
@@ -1030,7 +1127,7 @@ void MKLDNNConvolutionNode::prepareParams() {
|
||||
AttrPtr pAttrLocal;
|
||||
|
||||
if (isDynamicNode()) {
|
||||
if (!pAttr) {
|
||||
if (!pAttr || withSum) {
|
||||
pAttr = initPrimitiveAttr();
|
||||
}
|
||||
pAttrLocal = pAttr;
|
||||
@@ -1197,6 +1294,23 @@ void MKLDNNConvolutionNode::execute(mkldnn::stream strm) {
|
||||
|
||||
void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
if (withSumBroadcast) {
|
||||
if (!subgraph) {
|
||||
IE_THROW(Unexpected) << "Fused ops subgraph has not been created in " << getTypeStr() << " with name " << getName();
|
||||
}
|
||||
const size_t sumPortNum = getParentEdges().size() - 1;
|
||||
const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory();
|
||||
auto inp1 = subgraph->getInput(1);
|
||||
inp1->getChildEdgesAtPort(0).front()->getMemoryPtr()->setDataHandle(sumInpMem.GetData());
|
||||
|
||||
subgraph->infer();
|
||||
|
||||
auto out = subgraph->getOutput(0);
|
||||
const auto& outMem = out->getParentEdgesAtPort(0).front()->getMemory();
|
||||
auto convOutMem = getChildEdgesAtPort(0).front()->getMemoryPtr();
|
||||
convOutMem->redefineDesc(getBaseMemDescAtOutputPort(0)->cloneWithNewDims(outMem.getStaticDims()));
|
||||
convOutMem->SetData(outMem);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::updatePadding() {
|
||||
@@ -1207,6 +1321,69 @@ void MKLDNNConvolutionNode::updatePadding() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
|
||||
if (withSum) {
|
||||
const size_t sumPortNum = getParentEdges().size() - 1;
|
||||
const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory();
|
||||
if (newOutputShapes.front() != sumInpMem.getStaticDims()) {
|
||||
withSumBroadcast = true;
|
||||
if (!subgraph) {
|
||||
subgraph = std::make_shared<FusedSubgraph>(fusedWith, *this, weightCache);
|
||||
}
|
||||
auto inp0 = subgraph->getInput(0);
|
||||
inp0->redefineOutputMemory(newOutputShapes);
|
||||
|
||||
auto inp1 = subgraph->getInput(1);
|
||||
inp1->redefineOutputMemory({sumInpMem.getStaticDims()});
|
||||
// here we postpone output memory reallocation due to the fact that it is the same memory with the sum second input
|
||||
return;
|
||||
} else {
|
||||
withSumBroadcast = false;
|
||||
}
|
||||
}
|
||||
MKLDNNNode::redefineOutputMemory(newOutputShapes);
|
||||
}
|
||||
|
||||
MemoryDescPtr MKLDNNConvolutionNode::getSumMemDesc(primitive_desc_iterator &primitive_desc_it) {
|
||||
if (getOutputShapeAtPort(0).isDynamic()) {
|
||||
return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(0), getInputShapeAtPort(getParentEdges().size() - 1));
|
||||
}
|
||||
return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(0));
|
||||
}
|
||||
|
||||
MKLDNNMemoryPtr MKLDNNConvolutionNode::getOutputMemory() const {
|
||||
if (withSumBroadcast) {
|
||||
if (!subgraph) {
|
||||
IE_THROW(Unexpected) << "Fused ops subgraph has not been created in " << getTypeStr() << " with name " << getName();
|
||||
}
|
||||
auto inp0 = subgraph->getInput(0);
|
||||
return inp0->getChildEdgesAtPort(0).front()->getMemoryPtr();
|
||||
} else {
|
||||
return getChildEdgesAtPort(0).front()->getMemoryPtr();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNPlugin::MKLDNNConvolutionNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
if (Eltwise == fusingNode->getType()) {
|
||||
if (fusingNode->getAlgorithm() == EltwiseAdd) {
|
||||
auto eltwiseNode = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(fusingNode);
|
||||
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
withSum = true;
|
||||
}
|
||||
}
|
||||
if (withSum && isDynamicNode()) {
|
||||
for (size_t i = 0; i < fusingNode->getParentEdges().size(); ++i) {
|
||||
auto edge = fusingNode->getParentEdgesAtPort(i).front();
|
||||
auto parent = edge->getParent();
|
||||
if ("Constant" == parent->getTypeStr()) {
|
||||
fusedConstNodes[fusingNode].push_back(parent);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MKLDNNNode::addFusedNode(fusingNode);
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::appendZeroPointsArgs() {
|
||||
if (inputZeroPointsMemPtr != nullptr) {
|
||||
primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = inputZeroPointsMemPtr->GetPrimitive();
|
||||
@@ -1218,5 +1395,4 @@ void MKLDNNConvolutionNode::appendZeroPointsArgs() {
|
||||
primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST] = outputCompensationMemPtr->GetPrimitive();
|
||||
}
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution);
|
||||
|
||||
@@ -65,8 +65,12 @@ public:
|
||||
|
||||
protected:
|
||||
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
|
||||
void redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) override;
|
||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) override;
|
||||
|
||||
private:
|
||||
class FusedSubgraph;
|
||||
using FusedSubgraphPtr = std::shared_ptr<FusedSubgraph>;
|
||||
using executorPtr = std::shared_ptr<DnnlExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
|
||||
@@ -91,6 +95,8 @@ private:
|
||||
InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false);
|
||||
|
||||
void updatePadding();
|
||||
MemoryDescPtr getSumMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it);
|
||||
MKLDNNMemoryPtr getOutputMemory() const;
|
||||
|
||||
void appendZeroPointsArgs();
|
||||
|
||||
@@ -99,6 +105,7 @@ private:
|
||||
bool withDWConv;
|
||||
bool isGrouped;
|
||||
bool isPrimitivesPriorityDefined = false;
|
||||
bool withSumBroadcast = false;
|
||||
std::vector<size_t> stride;
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
std::vector<ptrdiff_t> paddingL;
|
||||
@@ -126,6 +133,8 @@ private:
|
||||
bool isWino = false;
|
||||
AttrPtr pAttr;
|
||||
bool autoPadding = false;
|
||||
FusedSubgraphPtr subgraph;
|
||||
std::unordered_map<MKLDNNNodePtr, std::vector<MKLDNNNodePtr>> fusedConstNodes;
|
||||
|
||||
MKLDNNMemoryPtr inputZeroPointsMemPtr;
|
||||
MKLDNNMemoryPtr weightsZeroPointsMemPtr;
|
||||
|
||||
@@ -1590,14 +1590,13 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO [DS]: used only in FuseConvolutionSumAndConvolutionSumActivation
|
||||
// fix when reimplement this transformation for dynamic shapes
|
||||
bool MKLDNNEltwiseNode::isWithBroadcast() {
|
||||
auto oDims = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto& oDims = getOutputShapeAtPort(0).getDims();
|
||||
for (size_t i = 0; i < inputShapes.size(); i++) {
|
||||
auto iDims = getInputShapeAtPort(i).getStaticDims();
|
||||
if (iDims != oDims)
|
||||
const auto& iDims = getInputShapeAtPort(i).getDims();
|
||||
if (!dimsEqualWeak(iDims, oDims)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -2014,9 +2013,8 @@ bool MKLDNNEltwiseNode::canBeInPlace() const {
|
||||
|
||||
void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
|
||||
// Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
|
||||
// TODO [DS]: at this moment this transformation prohibit for dynamic case
|
||||
specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
|
||||
getInputShapeAtPort(0) == getInputShapeAtPort(1);
|
||||
dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims());
|
||||
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
|
||||
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
|
||||
if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
|
||||
|
||||
@@ -363,6 +363,12 @@ MKLDNNInputNode::MKLDNNInputNode(const Shape& shape, const InferenceEngine::Prec
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNInputNode::MKLDNNInputNode(MemoryDescPtr memDesc, const std::string &name, const std::string &type,
|
||||
const mkldnn::engine &eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNInputNode(memDesc->getShape(), memDesc->getPrecision(), name, type, eng, cache) {
|
||||
extMemDesc = memDesc;
|
||||
}
|
||||
|
||||
void MKLDNNInputNode::withMeanImage() {
|
||||
isMeanImage = true;
|
||||
}
|
||||
@@ -389,29 +395,11 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
std::vector<PortConfigurator> inPortConfs;
|
||||
std::vector<PortConfigurator> outPortConfs;
|
||||
|
||||
if (getType() == Input || getType() == MemoryInput) {
|
||||
auto precision = getOriginalOutputPrecisionAtPort(0);
|
||||
if (precision == Precision::U16 || isMeanImage) {
|
||||
precision = Precision::FP32;
|
||||
}
|
||||
|
||||
outPortConfs.push_back({LayoutType::ncsp, precision});
|
||||
if (!getParentEdges().empty()) {
|
||||
inPortConfs.push_back({LayoutType::ncsp, precision, true});
|
||||
}
|
||||
} else if (getType() == Output) {
|
||||
auto precision = getOriginalInputPrecisionAtPort(0);
|
||||
if (precision == Precision::U16) precision = Precision::FP32;
|
||||
|
||||
inPortConfs.push_back({LayoutType::ncsp, precision});
|
||||
if (extMemDesc) {
|
||||
initSupportedPdFromMemDesc();
|
||||
} else {
|
||||
initSupportedPdDefault();
|
||||
}
|
||||
|
||||
addSupportedPrimDesc(inPortConfs,
|
||||
outPortConfs,
|
||||
impl_desc_type::unknown);
|
||||
}
|
||||
|
||||
void MKLDNNInputNode::createPrimitive() {
|
||||
@@ -437,5 +425,45 @@ bool MKLDNNInputNode::created() const {
|
||||
return getType() == Input || getType() == Output;
|
||||
}
|
||||
|
||||
void MKLDNNInputNode::initSupportedPdDefault() {
|
||||
std::vector<PortConfigurator> inPortConfs;
|
||||
std::vector<PortConfigurator> outPortConfs;
|
||||
|
||||
if (getType() == Input || getType() == MemoryInput) {
|
||||
auto precision = getOriginalOutputPrecisionAtPort(0);
|
||||
if (precision == Precision::U16 || isMeanImage) {
|
||||
precision = Precision::FP32;
|
||||
}
|
||||
|
||||
outPortConfs.push_back({LayoutType::ncsp, precision});
|
||||
if (!getParentEdges().empty()) {
|
||||
inPortConfs.push_back({LayoutType::ncsp, precision, true});
|
||||
}
|
||||
} else if (getType() == Output) {
|
||||
auto precision = getOriginalInputPrecisionAtPort(0);
|
||||
if (precision == Precision::U16) precision = Precision::FP32;
|
||||
|
||||
inPortConfs.push_back({LayoutType::ncsp, precision});
|
||||
}
|
||||
|
||||
addSupportedPrimDesc(inPortConfs,
|
||||
outPortConfs,
|
||||
impl_desc_type::unknown);
|
||||
}
|
||||
|
||||
void MKLDNNInputNode::initSupportedPdFromMemDesc() {
|
||||
NodeConfig config;
|
||||
PortConfig portConfig;
|
||||
portConfig.inPlace(-1);
|
||||
portConfig.constant(false);
|
||||
portConfig.setMemDesc(extMemDesc);
|
||||
if (getType() == Input || getType() == MemoryInput) {
|
||||
config.outConfs.push_back(portConfig);
|
||||
} else if (getType() == Output) {
|
||||
config.inConfs.push_back(portConfig);
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(std::move(config), impl_desc_type::unknown);
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNInputNode, Input);
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNInputNode, Output);
|
||||
|
||||
@@ -16,6 +16,8 @@ public:
|
||||
MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name,
|
||||
const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
MKLDNNInputNode(MemoryDescPtr memDesc, const std::string &name, const std::string &type, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &cache);
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
@@ -35,10 +37,13 @@ public:
|
||||
|
||||
private:
|
||||
void cloneBlobIfRequired();
|
||||
void initSupportedPdDefault();
|
||||
void initSupportedPdFromMemDesc();
|
||||
|
||||
private:
|
||||
std::shared_ptr<ngraph::op::Constant> constOp;
|
||||
MKLDNNMemoryCPtr memoryPtr;
|
||||
MemoryDescPtr extMemDesc = nullptr;
|
||||
bool isMeanImage = false;
|
||||
};
|
||||
|
||||
|
||||
@@ -144,9 +144,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
*IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
|
||||
// Issue: 69222
|
||||
R"(.*smoke_PriorBoxClustered.*PriorBoxClusteredLayerCPUTest.*_netPRC=f16_.*)",
|
||||
// TODO : CVS-69533
|
||||
R"(.*ConvolutionLayerCPUTest.*IS=\{.+\}.*_Fused=.*Add\(Parameters\).*)",
|
||||
R"(.*GroupConvolutionLayerCPUTest.*IS=\{.+\}.*_Fused=.*Add\(Parameters\).*)",
|
||||
// Issue: 74817
|
||||
// Sporadic failings with NAN on Dynamic shape cases with jit implementation
|
||||
R"(.*DefConvLayoutTest7.*)",
|
||||
|
||||
@@ -116,10 +116,35 @@ protected:
|
||||
ngraph::ParameterVector ¶ms,
|
||||
const std::shared_ptr<ngraph::Node> &lastNode) override {
|
||||
auto retNode = CpuTestWithFusing::modifyGraph(ngPrc, params, lastNode);
|
||||
for (size_t i = targetStaticShapes.front().size(); i < params.size(); ++i) {
|
||||
const auto& shape = params[i]->get_output_partial_shape(0);
|
||||
if (shape.is_static()) {
|
||||
targetStaticShapes.front().push_back(shape.get_shape());
|
||||
std::shared_ptr<ngraph::Node> opToShapeInfer = nullptr;
|
||||
for (auto& targetShapes : targetStaticShapes) {
|
||||
for (size_t i = targetShapes.size(); i < params.size(); ++i) {
|
||||
const auto &shape = params[i]->get_output_partial_shape(0);
|
||||
if (shape.is_static()) {
|
||||
targetShapes.push_back(shape.get_shape());
|
||||
} else {
|
||||
// It is assumed that in such tests we have second parameter only if sum fusion is tested.
|
||||
// Considering this fact, we need to set the appropriate static shape for the second term of the sum operation, and
|
||||
// it has to match the convolution output shape. So the most suitable solution here is to perform shape inference on the
|
||||
// convolution node
|
||||
if (!opToShapeInfer) {
|
||||
ngraph::OutputVector inputsForShapeInfer;
|
||||
for (size_t j = 0; j < lastNode->get_input_size(); j++) {
|
||||
if (ngraph::is_type<ngraph::opset1::Constant>(lastNode->get_input_node_ptr(j))) {
|
||||
inputsForShapeInfer.push_back(lastNode->get_input_node_shared_ptr(j));
|
||||
} else {
|
||||
inputsForShapeInfer.push_back(std::make_shared<ngraph::opset1::Parameter>(lastNode->get_input_element_type(j),
|
||||
lastNode->get_input_partial_shape(j)));
|
||||
}
|
||||
}
|
||||
opToShapeInfer = lastNode->clone_with_new_inputs(inputsForShapeInfer);
|
||||
}
|
||||
|
||||
std::vector<ov::Shape> secondParameterShapes;
|
||||
opToShapeInfer->get_input_tensor(0).set_partial_shape(targetShapes.front());
|
||||
opToShapeInfer->validate_and_infer_types();
|
||||
targetShapes.push_back(opToShapeInfer->get_output_shape(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
return retNode;
|
||||
|
||||
@@ -117,10 +117,35 @@ protected:
|
||||
ngraph::ParameterVector ¶ms,
|
||||
const std::shared_ptr<ngraph::Node> &lastNode) override {
|
||||
auto retNode = CpuTestWithFusing::modifyGraph(ngPrc, params, lastNode);
|
||||
for (size_t i = targetStaticShapes.front().size(); i < params.size(); ++i) {
|
||||
const auto& shape = params[i]->get_output_partial_shape(0);
|
||||
if (shape.is_static()) {
|
||||
targetStaticShapes.front().push_back(shape.get_shape());
|
||||
std::shared_ptr<ngraph::Node> opToShapeInfer = nullptr;
|
||||
for (auto& targetShapes : targetStaticShapes) {
|
||||
for (size_t i = targetShapes.size(); i < params.size(); ++i) {
|
||||
const auto &shape = params[i]->get_output_partial_shape(0);
|
||||
if (shape.is_static()) {
|
||||
targetShapes.push_back(shape.get_shape());
|
||||
} else {
|
||||
// It is assumed that in such tests we have second parameter only if sum fusion is tested.
|
||||
// Considering this fact, we need to set the appropriate static shape for the second term of the sum operation, and
|
||||
// it has to match the convolution output shape. So the most suitable solution here is to perform shape inference on the
|
||||
// convolution node
|
||||
if (!opToShapeInfer) {
|
||||
ngraph::OutputVector inputsForShapeInfer;
|
||||
for (size_t j = 0; j < lastNode->get_input_size(); j++) {
|
||||
if (ngraph::is_type<ngraph::opset1::Constant>(lastNode->get_input_node_ptr(j))) {
|
||||
inputsForShapeInfer.push_back(lastNode->get_input_node_shared_ptr(j));
|
||||
} else {
|
||||
inputsForShapeInfer.push_back(std::make_shared<ngraph::opset1::Parameter>(lastNode->get_input_element_type(j),
|
||||
lastNode->get_input_partial_shape(j)));
|
||||
}
|
||||
}
|
||||
opToShapeInfer = lastNode->clone_with_new_inputs(inputsForShapeInfer);
|
||||
}
|
||||
|
||||
std::vector<ov::Shape> secondParameterShapes;
|
||||
opToShapeInfer->get_input_tensor(0).set_partial_shape(targetShapes.front());
|
||||
opToShapeInfer->validate_and_infer_types();
|
||||
targetShapes.push_back(opToShapeInfer->get_output_shape(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
return retNode;
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "test_utils/fusing_test_utils.hpp"
|
||||
#include "test_utils/convolution_params.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
typedef std::tuple<
|
||||
InputShape, //convShape
|
||||
InputShape, //second term shape
|
||||
bool, // bias flag
|
||||
fusingSpecificParams,
|
||||
std::map<std::string, std::string> // config
|
||||
> convSumBroadcastParamSet;
|
||||
|
||||
|
||||
class ConcatConvSumInPlaceTest : public testing::WithParamInterface<convSumBroadcastParamSet>,
|
||||
virtual public SubgraphBaseTest, public CpuTestWithFusing {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<convSumBroadcastParamSet>& obj) {
|
||||
InputShape convShape;
|
||||
InputShape secondShape;
|
||||
bool bias;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(convShape, secondShape, bias, fusingParams, additionalConfig) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=";
|
||||
result << CommonTestUtils::partialShape2str({convShape.first, secondShape.first}) << "_";
|
||||
result << "TS=";
|
||||
for (const auto& shape : {convShape, secondShape}) {
|
||||
result << "(";
|
||||
if (!shape.second.empty()) {
|
||||
for (const auto& itr : shape.second) {
|
||||
result << CommonTestUtils::vec2str(itr);
|
||||
}
|
||||
}
|
||||
result << ")_";
|
||||
}
|
||||
result << "bias=" << (bias ? "True" : "False");
|
||||
result << CpuTestWithFusing::getTestCaseName(fusingParams);
|
||||
|
||||
if (!additionalConfig.empty()) {
|
||||
result << "_PluginConf";
|
||||
for (auto& item : additionalConfig) {
|
||||
result << "_" << item.first << "=" << item.second;
|
||||
}
|
||||
}
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
InputShape convShape;
|
||||
InputShape secondShape;
|
||||
bool bias;
|
||||
CPUSpecificParams cpuParams;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(convShape, secondShape, bias, fusingParams, additionalConfig) = this->GetParam();
|
||||
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
init_input_shapes({convShape, secondShape});
|
||||
|
||||
const InferenceEngine::SizeVector kernel = {3, 3};
|
||||
const InferenceEngine::SizeVector stride = {1, 1};
|
||||
const InferenceEngine::SizeVector dilation = {1, 1};
|
||||
const std::vector<ptrdiff_t> padBegin = {0, 0};
|
||||
const std::vector<ptrdiff_t> padEnd = {0, 0};
|
||||
const size_t convOutChannels = 64;
|
||||
|
||||
auto netType = ngraph::element::f32;
|
||||
auto inputParams = ngraph::builder::makeDynamicParams(netType, inputDynamicShapes);
|
||||
|
||||
auto conv = ngraph::builder::makeConvolution(inputParams[0], ngraph::element::f32, kernel, stride, padBegin,
|
||||
padEnd, dilation, ngraph::op::PadType::EXPLICIT, convOutChannels);
|
||||
if (bias) {
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngraph::element::Type_t::f32, ngraph::Shape({1, convOutChannels, 1, 1}), {}, true);
|
||||
conv = std::make_shared<ngraph::opset3::Add>(conv, biasNode);
|
||||
}
|
||||
|
||||
auto sum = std::make_shared<ngraph::opset3::Add>(conv, inputParams[1]);
|
||||
|
||||
fusedOps.insert(fusedOps.begin(), "Add"); // as we always fuse the sum first
|
||||
|
||||
auto runtimeType = netType;
|
||||
if (configuration.count(PluginConfigParams::KEY_ENFORCE_BF16) &&
|
||||
PluginConfigParams::YES == configuration[PluginConfigParams::KEY_ENFORCE_BF16].as<std::string>()) {
|
||||
runtimeType = ngraph::element::Type_t::bf16;
|
||||
}
|
||||
|
||||
selectedType = makeSelectedTypeStr(getPrimitiveType(), runtimeType);
|
||||
|
||||
function = makeNgraphFunction(netType, inputParams, sum, "ConvolutionSumBroadcast");
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
|
||||
CheckPluginRelatedResults(executableNetwork, "Convolution");
|
||||
}
|
||||
|
||||
namespace {
|
||||
const auto fusingMulAddFQMullAdd = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
|
||||
}, "Multiply(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
|
||||
}, "Add(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
|
||||
}, "Multiply(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
|
||||
}, "Add(PerChannel)"}}), {"Add"} };
|
||||
|
||||
const auto fusingDivSubFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Divide>(inpNode, secondMultInput);
|
||||
}, "Divide(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Subtract>(inpNode, secondMultInput);
|
||||
}, "Subtract(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };
|
||||
|
||||
const auto fusingSigmoidFQFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid);
|
||||
}, "Sigmoid"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"}}), {"Sigmoid", "FakeQuantize", "FakeQuantize"} };
|
||||
|
||||
const auto fusingClampFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
|
||||
}, "Clamp"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };
|
||||
|
||||
|
||||
|
||||
const std::vector<fusingSpecificParams> fusingParamsSet{
|
||||
emptyFusingSpec,
|
||||
fusingSigmoid,
|
||||
fusingFakeQuantizePerTensorRelu,
|
||||
fusingFakeQuantizePerChannelRelu,
|
||||
fusingFQPerChannelSigmoidFQPerChannel,
|
||||
fusingReluScaleShift,
|
||||
fusingMulAddFQMullAdd,
|
||||
fusingSigmoidFQFQ,
|
||||
// fusingClampFQ // TODO: we need investigation, this particular pattern does not work even in static case
|
||||
fusingDivSubFQ
|
||||
};
|
||||
|
||||
const std::vector<fusingSpecificParams> fusingParamsSetBF16{
|
||||
emptyFusingSpec,
|
||||
fusingSigmoid,
|
||||
fusingReluScaleShift
|
||||
};
|
||||
|
||||
InputShape convInpShape = {
|
||||
//dynamic shapes
|
||||
{-1, 32, -1, -1},
|
||||
{ //target static shapes
|
||||
{1, 32, 10, 10},
|
||||
{1, 32, 10, 10},
|
||||
{1, 32, 10, 10},
|
||||
{1, 32, 3, 3},
|
||||
{1, 32, 3, 10}
|
||||
}
|
||||
};
|
||||
|
||||
InputShape secondInp = {
|
||||
//dynamic shapes
|
||||
{-1, -1, -1, -1},
|
||||
{ //target static shapes
|
||||
{1, 64, 1, 8},
|
||||
{1, 64, 1, 8},
|
||||
{1, 64, 8, 8},
|
||||
{1, 64, 8, 8},
|
||||
{1, 64, 8, 1}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_FP32, ConcatConvSumInPlaceTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(convInpShape),
|
||||
::testing::Values(secondInp),
|
||||
::testing::Values(true, false),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConcatConvSumInPlaceTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_BF16, ConcatConvSumInPlaceTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(convInpShape),
|
||||
::testing::Values(secondInp),
|
||||
::testing::Values(true, false),
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
ConcatConvSumInPlaceTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
Reference in New Issue
Block a user