809 lines
30 KiB
C++
809 lines
30 KiB
C++
// Copyright (C) 2018-2021 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
#pragma once
|
|
|
|
#include <ie_api.h>
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <cassert>
|
|
#include <algorithm>
|
|
#include <caseless.hpp>
|
|
#include "mkldnn_memory.h"
|
|
#include "mkldnn_edge.h"
|
|
#include "mkldnn_descriptor.h"
|
|
#include "mkldnn_selective_build.h"
|
|
#include "mkldnn/iml_type_mapper.h"
|
|
#include "mkldnn_extension_mngr.h"
|
|
#include "mkldnn_primitive.h"
|
|
#include "mkldnn_weights_cache.hpp"
|
|
#include "mkldnn.hpp"
|
|
#include <openvino/itt.hpp>
|
|
#include "utils/ngraph_utils.hpp"
|
|
#include <ngraph/ops.hpp>
|
|
#include <ngraph/node.hpp>
|
|
#include <ie_precision.hpp>
|
|
#include <nodes/common/blocked_desc_creator.h>
|
|
#include "cpu_types.h"
|
|
#include "cpu_shape.h"
|
|
#include "memory_desc/cpu_memory_desc.h"
|
|
|
|
namespace MKLDNNPlugin {
|
|
|
|
using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
|
|
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
|
|
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
|
|
|
|
class PortConfigurator {
|
|
public:
|
|
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
|
|
bool constant = false, int inPlace = -1) :
|
|
blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), shape(shape), constant(constant), inPlace(inPlace) {}
|
|
|
|
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED,
|
|
bool constant = false, int inPlace = -1) :
|
|
blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), constant(constant), inPlace(inPlace) {}
|
|
|
|
MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr blockedDescCreator;
|
|
const InferenceEngine::Precision prc;
|
|
const Shape shape;
|
|
bool constant = false;
|
|
int inPlace = -1;
|
|
|
|
private:
|
|
static MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr getBlockedDescCreator(MKLDNNPlugin::LayoutType blockedDescType) {
|
|
auto& creators = MKLDNNPlugin::BlockedDescCreator::getCommonCreators();
|
|
if (creators.find(blockedDescType) == creators.end()) {
|
|
IE_THROW() << "Cannot find tensor descriptor creator";
|
|
}
|
|
return creators.at(blockedDescType);
|
|
}
|
|
};
|
|
|
|
struct PortConfig {
|
|
PortConfig() = default;
|
|
|
|
PortConfig(const PortConfig& rhs) {
|
|
this->constant = rhs.constant;
|
|
this->inPlace = rhs.inPlace;
|
|
if (rhs.desc) {
|
|
this->desc = rhs.desc;
|
|
}
|
|
}
|
|
|
|
PortConfig& operator=(const PortConfig& rhs) {
|
|
this->constant = rhs.constant;
|
|
this->inPlace = rhs.inPlace;
|
|
if (rhs.desc) {
|
|
this->desc = rhs.desc;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
PortConfig(PortConfig&& rhs) = default;
|
|
PortConfig& operator=(PortConfig&& rhs) = default;
|
|
|
|
// TODO [DS]: better to make private and const
|
|
bool constant = false;
|
|
int inPlace = -1;
|
|
MemoryDescPtr desc;
|
|
};
|
|
|
|
struct NodeConfig {
|
|
bool dynBatchSupport = false;
|
|
std::vector<PortConfig> inConfs;
|
|
std::vector<PortConfig> outConfs;
|
|
};
|
|
|
|
class NodeDesc {
|
|
public:
|
|
NodeDesc(const NodeConfig& conf, impl_desc_type type): config(conf) {
|
|
implementationType = type;
|
|
}
|
|
|
|
const NodeConfig& getConfig() const {
|
|
return config;
|
|
}
|
|
|
|
void setConfig(const NodeConfig& config) {
|
|
this->config = config;
|
|
}
|
|
|
|
impl_desc_type getImplementationType() const {
|
|
return implementationType;
|
|
}
|
|
|
|
void setImplementationType(impl_desc_type type) {
|
|
implementationType = type;
|
|
}
|
|
|
|
private:
|
|
NodeConfig config;
|
|
impl_desc_type implementationType;
|
|
};
|
|
|
|
class MKLDNNNode {
|
|
public:
|
|
template<typename T, int N>
|
|
struct Tag {};
|
|
|
|
struct PerfCounters {
|
|
PerfCounters(std::string const& name)
|
|
: execute(openvino::itt::handle(name))
|
|
, getSupportedDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 0>>("MKLDNNNode::getSupportedDescriptors"))
|
|
, initSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 1>>("MKLDNNNode::initSupportedPrimitiveDescriptors"))
|
|
, filterSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 2>>("MKLDNNNode::filterSupportedPrimitiveDescriptors"))
|
|
, selectOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<MKLDNNNode, 3>>("MKLDNNNode::selectOptimalPrimitiveDescriptor"))
|
|
, createPrimitive(openvino::itt::handle<Tag<MKLDNNNode, 4>>("MKLDNNNode::createPrimitive"))
|
|
, initOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<MKLDNNNode, 5>>("MKLDNNNode::initOptimalPrimitiveDescriptor"))
|
|
{}
|
|
|
|
template<typename NodeType>
|
|
void buildClassCounters(const std::string& type_name) {
|
|
getSupportedDescriptors = openvino::itt::handle<Tag<NodeType, 0>>(type_name + "::getSupportedDescriptors");
|
|
initSupportedPrimitiveDescriptors = openvino::itt::handle<Tag<NodeType, 1>>(type_name + "::initSupportedPrimitiveDescriptors");
|
|
filterSupportedPrimitiveDescriptors = openvino::itt::handle<Tag<NodeType, 2>>(type_name + "::filterSupportedPrimitiveDescriptors");
|
|
selectOptimalPrimitiveDescriptor = openvino::itt::handle<Tag<NodeType, 3>>(type_name + "::selectOptimalPrimitiveDescriptor");
|
|
createPrimitive = openvino::itt::handle<Tag<NodeType, 4>>(type_name + "::createPrimitive");
|
|
initOptimalPrimitiveDescriptor = openvino::itt::handle<Tag<NodeType, 5>>(type_name + "::initOptimalPrimitiveDescriptor");
|
|
}
|
|
|
|
openvino::itt::handle_t execute;
|
|
openvino::itt::handle_t getSupportedDescriptors;
|
|
openvino::itt::handle_t initSupportedPrimitiveDescriptors;
|
|
openvino::itt::handle_t filterSupportedPrimitiveDescriptors;
|
|
openvino::itt::handle_t selectOptimalPrimitiveDescriptor;
|
|
openvino::itt::handle_t createPrimitive;
|
|
openvino::itt::handle_t initOptimalPrimitiveDescriptor;
|
|
};
|
|
|
|
class NodesFactory;
|
|
static NodesFactory & factory();
|
|
|
|
virtual ~MKLDNNNode() = default;
|
|
|
|
void addEdge(const MKLDNNEdgeWeakPtr& edge);
|
|
void removeEdge(const MKLDNNEdgeWeakPtr& edge);
|
|
|
|
virtual void cleanup();
|
|
void remove();
|
|
|
|
const std::vector<MKLDNNEdgeWeakPtr> &getParentEdges() const noexcept {
|
|
return parentEdges;
|
|
}
|
|
|
|
const std::vector<MKLDNNEdgeWeakPtr> &getChildEdges() const noexcept {
|
|
return childEdges;
|
|
}
|
|
|
|
const MKLDNNEdgePtr getParentEdgeAt(size_t idx) const;
|
|
virtual const MKLDNNEdgePtr getChildEdgeAt(size_t idx) const;
|
|
|
|
const std::vector<MKLDNNEdgePtr> getParentEdgesAtPort(size_t idx) const;
|
|
const std::vector<MKLDNNEdgePtr> getChildEdgesAtPort(size_t idx) const;
|
|
|
|
bool isDropped() {
|
|
return (isEdgesEmpty(childEdges) && isEdgesEmpty(parentEdges));
|
|
}
|
|
|
|
const mkldnn::engine& getEngine() const {
|
|
return engine;
|
|
}
|
|
|
|
// must be called only after MKLDNNGraph::InitEdges()
|
|
virtual bool isExecutable() const {
|
|
return true;
|
|
}
|
|
|
|
bool isConstant();
|
|
|
|
bool isInplace() const;
|
|
|
|
bool isFusedWith(Type type) const;
|
|
|
|
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
|
fusedWith.push_back(fusingNode);
|
|
}
|
|
|
|
virtual void fuseInto(MKLDNNNodePtr& parentNode) {
|
|
// The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
|
|
for (int i = 0; i < getParentEdges().size(); i++) {
|
|
if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) {
|
|
setFusingPort(i);
|
|
break;
|
|
}
|
|
}
|
|
|
|
auto parentFusedNodes = parentNode->getFusedWith();
|
|
if (getFusingPort() < 0 && !parentFusedNodes.empty()) {
|
|
for (int i = 0; i < getParentEdges().size(); i++) {
|
|
if (getParentEdgesAtPort(i)[0]->getParent().get() == parentFusedNodes[parentFusedNodes.size() - 1].get()) {
|
|
setFusingPort(i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (getFusingPort() == -1) {
|
|
IE_THROW() << "Cannot determine fusing port between nodes: " << parentNode->getName() << " and " << getName();
|
|
}
|
|
|
|
parentNode->addFusedNode(getParentEdgesAtPort(getFusingPort())[0]->getChild());
|
|
parentNode->addOriginalLayer(getOriginalLayers());
|
|
}
|
|
|
|
void clearFusedWith() {
|
|
fusedWith.clear();
|
|
}
|
|
|
|
void mergeWith(const MKLDNNNodePtr &merge) {
|
|
mergedWith.push_back(merge);
|
|
}
|
|
|
|
const std::vector <MKLDNNNodePtr> &getMergeWith() {
|
|
return mergedWith;
|
|
}
|
|
|
|
const std::vector <MKLDNNNodePtr> &getFusedWith() {
|
|
return fusedWith;
|
|
}
|
|
|
|
int getFusingPort() const {
|
|
return fusingPort;
|
|
}
|
|
|
|
void setFusingPort(int fusingPort) {
|
|
this->fusingPort = fusingPort;
|
|
}
|
|
|
|
const std::string &getName() const {
|
|
return name;
|
|
}
|
|
|
|
void addOriginalLayer(const std::string& layerName);
|
|
|
|
const std::string &getOriginalLayers() const {
|
|
return originalLayers;
|
|
}
|
|
|
|
Type getType() const {
|
|
return type;
|
|
}
|
|
|
|
const std::vector<NodeDesc>& getSupportedPrimitiveDescriptors() const {
|
|
return supportedPrimitiveDescriptors;
|
|
}
|
|
|
|
inline const NodeDesc* getSelectedPrimitiveDescriptor() const {
|
|
if (selectedPrimitiveDescriptorIndex < 0 ||
|
|
selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size())
|
|
return nullptr;
|
|
return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex];
|
|
}
|
|
|
|
inline NodeDesc* getSelectedPrimitiveDescriptor() {
|
|
if (selectedPrimitiveDescriptorIndex < 0 ||
|
|
selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size())
|
|
return nullptr;
|
|
return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex];
|
|
}
|
|
|
|
/**
|
|
* @brief Returns input selected primitive descriptor on the specified port
|
|
* must be used after selectOptimalPrimitiveDescriptor stage
|
|
* @param portNum port number
|
|
* @return pointer to selected primitive descriptor with type MemoryDesc
|
|
*/
|
|
MemoryDescPtr getBaseMemDescAtInputPort(size_t portNum) const;
|
|
|
|
/**
|
|
* @brief Returns output selected primitive descriptor on the specified port
|
|
* must be used after selectOptimalPrimitiveDescriptor stage
|
|
* @param portNum port number
|
|
* @return pointer to selected primitive descriptor with type MemoryDesc
|
|
*/
|
|
MemoryDescPtr getBaseMemDescAtOutputPort(size_t portNum) const;
|
|
|
|
/**
|
|
* @brief Returns input selected primitive descriptor on the specified port
|
|
* must be used after selectOptimalPrimitiveDescriptor stage
|
|
* @param portNum port number
|
|
* @return pointer to selected primitive descriptor with type T
|
|
*/
|
|
template <typename T,
|
|
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
|
typename std::enable_if<std::is_base_of<MemoryDesc, T>::value, int>::type = 0>
|
|
std::shared_ptr<T> getInputMemDescAtPort(size_t portNum) const;
|
|
|
|
/**
|
|
* @brief Returns output selected primitive descriptor on the specified port
|
|
* must be used after selectOptimalPrimitiveDescriptor stage
|
|
* @param portNum port number
|
|
* @return pointer to selected primitive descriptor with type T
|
|
*/
|
|
template <typename T,
|
|
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
|
typename std::enable_if<std::is_base_of<MemoryDesc, T>::value, int>::type = 0>
|
|
std::shared_ptr<T> getOutputMemDescAtPort(size_t portNum) const;
|
|
|
|
void selectPrimitiveDescriptorByIndex(int index) {
|
|
if (index < 0 || index >= supportedPrimitiveDescriptors.size())
|
|
selectedPrimitiveDescriptorIndex = -1;
|
|
else
|
|
selectedPrimitiveDescriptorIndex = index;
|
|
}
|
|
|
|
std::string getPrimitiveDescriptorType();
|
|
|
|
PerfCount &PerfCounter() { return perfCounter; }
|
|
|
|
virtual void setDynamicBatchLim(int lim);
|
|
|
|
void resolveInPlaceEdges();
|
|
|
|
virtual void execute(mkldnn::stream strm);
|
|
void executeDynamic(mkldnn::stream strm);
|
|
void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
|
|
|
|
virtual void initSupportedPrimitiveDescriptors();
|
|
|
|
/**
|
|
* @brief Filters supportedPrimitiveDescriptors according to the input layouts specified in inputMemoryFormatsFilter
|
|
* and output layouts specified in outputMemoryFormatsFilter
|
|
*/
|
|
virtual void filterSupportedPrimitiveDescriptors();
|
|
|
|
virtual void createPrimitive() = 0;
|
|
|
|
virtual void selectOptimalPrimitiveDescriptor();
|
|
virtual void initOptimalPrimitiveDescriptor();
|
|
|
|
virtual void getSupportedDescriptors() = 0;
|
|
// TODO [DS]: Should be moved into Node derivative class
|
|
virtual void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
|
|
const std::vector<MemoryDescPtr>& outputDesc) {}
|
|
virtual void initDescriptor(const NodeConfig& config);
|
|
virtual bool created() const = 0;
|
|
virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) {
|
|
return created();
|
|
}
|
|
|
|
/**
|
|
* @brief Performs Node initialization based on graph context.
|
|
* This is an auxiliary method that allows to use information not available in Node constructor (e.g. connection information with other nodes)
|
|
*/
|
|
virtual void init() {}
|
|
|
|
template <class PD, class D, typename FPD = bool>
|
|
PD createPrimitiveDescriptor(const mkldnn::primitive_attr &attr = mkldnn::primitive_attr()) {
|
|
auto descsCompatible = [](const std::vector<MemoryDescPtr>& srcDescs,
|
|
const std::vector<PortConfig>& selectedDescs) {
|
|
if (srcDescs.empty() && selectedDescs.empty())
|
|
return true;
|
|
if (srcDescs.empty() || selectedDescs.empty())
|
|
return false;
|
|
for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
|
|
return srcDescs[i]->isCompatible(*selectedDescs[i].desc);
|
|
}
|
|
return true;
|
|
};
|
|
|
|
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
|
|
if (selected_pd == nullptr)
|
|
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
|
|
|
|
for (const auto& desc : descs) {
|
|
auto itpd = desc.createPrimitiveDescriptorIterator(engine, attr);
|
|
|
|
while (static_cast<bool>(itpd)) {
|
|
std::vector<MemoryDescPtr> srcDescs;
|
|
for (size_t i = 0; i < descInputNumbers(desc); i++)
|
|
srcDescs.push_back(getSrcMemDesc(itpd, i));
|
|
|
|
std::vector<MemoryDescPtr> dstDescs;
|
|
for (size_t i = 0; i < descOutputNumbers(desc); i++)
|
|
dstDescs.push_back(getDstMemDesc(itpd, i));
|
|
|
|
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
|
|
|
|
if (impl_type == selected_pd->getImplementationType() &&
|
|
descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
|
|
descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
|
|
prepareMemory(selected_pd, itpd);
|
|
PD prim_desc = createPd<PD, D, FPD>(desc);
|
|
return {itpd.get()};
|
|
}
|
|
if (!itpd.next_impl())
|
|
break;
|
|
}
|
|
}
|
|
|
|
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
|
|
}
|
|
|
|
int getExecIndex() const {
|
|
return execIndex;
|
|
}
|
|
|
|
std::string getTypeStr() const {
|
|
return typeStr;
|
|
}
|
|
|
|
void setTypeStr(const std::string &typeStr) {
|
|
this->typeStr = typeStr;
|
|
}
|
|
|
|
virtual size_t descInputNumbers(MKLDNNDescriptor desc) {
|
|
return desc.inputNumbers();
|
|
}
|
|
|
|
virtual size_t descOutputNumbers(MKLDNNDescriptor desc) {
|
|
return desc.outputNumbers();
|
|
}
|
|
|
|
const PerfCounters & perfCounters() const {
|
|
return profiling;
|
|
}
|
|
|
|
PerfCounters & perfCounters() {
|
|
return profiling;
|
|
}
|
|
|
|
/**
|
|
* @brief Returns runtime node precision based on input/output data types or data type used for computations
|
|
* @return Runtime node precision
|
|
*/
|
|
virtual InferenceEngine::Precision getRuntimePrecision() const;
|
|
|
|
const std::vector<InferenceEngine::Precision>& getOriginalInputPrecisions() const {
|
|
return originalInputPrecisions;
|
|
}
|
|
const std::vector<InferenceEngine::Precision>& getOriginalOutputPrecisions() const {
|
|
return originalOutputPrecisions;
|
|
}
|
|
|
|
InferenceEngine::Precision getOriginalInputPrecisionAtPort(size_t port) const {
|
|
if (originalInputPrecisions.size() <= port) {
|
|
IE_THROW() << "Incorrect input port number for node " << getName();
|
|
}
|
|
return originalInputPrecisions[port];
|
|
}
|
|
InferenceEngine::Precision getOriginalOutputPrecisionAtPort(size_t port) const {
|
|
if (originalOutputPrecisions.size() <= port) {
|
|
IE_THROW() << "Incorrect output port number for node " << getName();
|
|
}
|
|
return originalOutputPrecisions[port];
|
|
}
|
|
|
|
void setOriginalInputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
|
|
if (originalInputPrecisions.size() <= port) {
|
|
IE_THROW() << "Incorrect input port number for node " << getName();
|
|
}
|
|
originalInputPrecisions[port] = precision;
|
|
}
|
|
|
|
void setOriginalOutputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
|
|
if (originalOutputPrecisions.size() <= port) {
|
|
IE_THROW() << "Incorrect output port number for node " << getName();
|
|
}
|
|
originalOutputPrecisions[port] = precision;
|
|
}
|
|
|
|
void addOriginalInputPrecision(InferenceEngine::Precision precision) {
|
|
originalInputPrecisions.push_back(precision);
|
|
}
|
|
|
|
void addOriginalOutputPrecision(InferenceEngine::Precision precision) {
|
|
originalOutputPrecisions.push_back(precision);
|
|
}
|
|
|
|
// TODO: alighn behaviour for original(Input/Output)Precisions and (input/output)Shapes
|
|
/**
|
|
* @brief Returns inputs number which have ngraph nodes.
|
|
* Inputs number compute as size of originalInputPrecisions vector
|
|
* IMPORTANT!!!
|
|
* FuseConvolutionAndBias and FuseMultiplyAndAdd change originalInputPrecisions vector
|
|
* @return original inputs number
|
|
*/
|
|
size_t getOriginalInputsNumber() const {
|
|
return originalInputPrecisions.size();
|
|
}
|
|
|
|
/**
|
|
* @brief Returns outputs number which have ngraph nodes.
|
|
* Outputs number compute as size of originalOutputPrecisions vector
|
|
* @return original outputs number
|
|
*/
|
|
size_t getOriginalOutputsNumber() const {
|
|
return originalOutputPrecisions.size();
|
|
}
|
|
|
|
Algorithm getAlgorithm() const {
|
|
return algorithm;
|
|
}
|
|
|
|
void setAlgorithm(Algorithm alg) {
|
|
algorithm = alg;
|
|
}
|
|
|
|
virtual bool canFuse(const MKLDNNNodePtr& node) const {
|
|
return false;
|
|
}
|
|
|
|
void setQuantizedGraphFlag(bool flag) {
|
|
isInQuantizedGraph = flag;
|
|
}
|
|
|
|
bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const;
|
|
|
|
bool isDynamicNode() const {
|
|
return isDynamic;
|
|
}
|
|
|
|
const Shape& getInputShapeAtPort(size_t port) const {
|
|
if (inputShapes.size() <= port) {
|
|
IE_THROW() << "Incorrect input port number for node " << getName();
|
|
}
|
|
return inputShapes[port];
|
|
}
|
|
|
|
const Shape& getOutputShapeAtPort(size_t port) const {
|
|
if (outputShapes.size() <= port) {
|
|
IE_THROW() << "Incorrect output port number for node " << getName();
|
|
}
|
|
return outputShapes[port];
|
|
}
|
|
|
|
/**
|
|
* @brief Return scales and shift if nodes can be executed as ScaleShift, else raise exception
|
|
* If node has only scale or shift value, fill missing value with default values
|
|
* i.e. EltwiseAdd: fill shifts from constant, fill scales with default values = 1.0f
|
|
* @param parentNode
|
|
* node from which data comes
|
|
* @return pair of scales and shifts
|
|
*/
|
|
std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const MKLDNNNode *parentNode) const;
|
|
|
|
protected:
|
|
bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
|
|
|
|
void setType(Type type) {
|
|
this->type = type;
|
|
}
|
|
|
|
virtual size_t getMaxBatch() const;
|
|
|
|
|
|
virtual MemoryDescPtr getDefinedInputDesc(const NodeConfig &config, size_t idx) const;
|
|
virtual MemoryDescPtr getDefinedOutputDesc(const NodeConfig &config, size_t idx) const;
|
|
virtual MemoryDescPtr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
|
|
virtual MemoryDescPtr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
|
|
|
|
/**
|
|
* @brief Appends new item into ops list with the information on how the node should be executed as post operation.
|
|
* Seed node should call this routine and pass its post operations list as parameter.
|
|
* @param ops List of fused post operations
|
|
*/
|
|
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
|
|
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() const { return nullptr; }
|
|
|
|
typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
|
|
GetPrimitiveMemoryFormatFunc;
|
|
std::vector<GetPrimitiveMemoryFormatFunc> internalBlobDesc;
|
|
|
|
std::vector<Shape> inputShapes;
|
|
std::vector<Shape> outputShapes;
|
|
|
|
std::vector <MKLDNNNodePtr> fusedWith;
|
|
std::vector <MKLDNNNodePtr> mergedWith;
|
|
std::vector <impl_desc_type> implPriorities;
|
|
std::vector <mkldnn::memory::format_tag> inputMemoryFormatsFilter;
|
|
std::vector <mkldnn::memory::format_tag> outputMemoryFormatsFilter;
|
|
bool enforceBF16evenForGraphTail = false;
|
|
|
|
std::string originalLayers; // contains names of the original layers separated by comma
|
|
|
|
MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
|
|
MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
|
|
|
|
int selectedPrimitiveDescriptorIndex = -1;
|
|
bool permanent = false;
|
|
bool temporary = false;
|
|
int dynBatchLim = 0;
|
|
enum class ConstantType {
|
|
Unknown,
|
|
Const,
|
|
NoConst
|
|
};
|
|
ConstantType constant = ConstantType::Unknown;
|
|
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
|
|
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
|
std::vector<NodeDesc> supportedPrimitiveDescriptors;
|
|
std::unordered_map<int, mkldnn::memory> primArgs;
|
|
std::vector<mkldnn::memory> binaryPostOpsArgs;
|
|
MKLDNNPrimitive prim;
|
|
std::vector<MKLDNNDescriptor> descs;
|
|
|
|
MKLDNNWeightsSharing::Ptr weightCache;
|
|
|
|
Algorithm algorithm = Algorithm::Default;
|
|
|
|
bool isInQuantizedGraph = false;
|
|
|
|
friend class MKLDNNEdge;
|
|
friend class MKLDNNGraph;
|
|
friend class MKLDNNGraphOptimizer;
|
|
friend class NodeDumper;
|
|
|
|
void selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
|
|
bool isConfigDefined(const NodeConfig &config) const;
|
|
virtual bool canBeInPlace() const;
|
|
|
|
virtual const std::vector<impl_desc_type>& getPrimitivesPriority();
|
|
|
|
virtual std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const;
|
|
int batchToProcess() const;
|
|
|
|
InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped);
|
|
|
|
/**
|
|
* @brief Auxiliary function to get node input precisions
|
|
* @return Vector of precisions based on information from node input edges. Return empty vector in case edges are not initialized yet.
|
|
*/
|
|
virtual std::vector<InferenceEngine::Precision> getInputPrecisions() const;
|
|
|
|
/**
|
|
* @brief Auxiliary function to get node output precisions
|
|
* @return Vector of precisions based on information from node output edges. Return empty vector in case edges are not initialized yet.
|
|
*/
|
|
virtual std::vector<InferenceEngine::Precision> getOutputPrecisions() const;
|
|
|
|
void addSupportedPrimDesc(const std::vector<PortConfigurator>& inPortConfigs,
|
|
const std::vector<PortConfigurator>& outPortConfigs,
|
|
impl_desc_type implType,
|
|
bool dynBatchSupport = false) {
|
|
auto fill_port = [] (const PortConfigurator& portConfigurator, const Shape& shape,
|
|
InferenceEngine::Precision prc, std::vector<PortConfig>& port) -> bool {
|
|
// In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by blockedDescCreator.
|
|
// This should be suitable for major of scenarios since almost all nodes add `ncsp` blockedDescCreator which supports any shape rank.
|
|
if (shape.getRank() < portConfigurator.blockedDescCreator->getMinimalRank())
|
|
return false;
|
|
|
|
PortConfig portConfig;
|
|
portConfig.inPlace = portConfigurator.inPlace;
|
|
portConfig.constant = portConfigurator.constant;
|
|
portConfig.desc = portConfigurator.blockedDescCreator->createSharedDesc(prc, shape);
|
|
|
|
port.push_back(std::move(portConfig));
|
|
|
|
return true;
|
|
};
|
|
|
|
NodeConfig config;
|
|
for (size_t i = 0; i < inPortConfigs.size(); i++) {
|
|
auto shape = inPortConfigs[i].shape.getRank() == 0 ? getInputShapeAtPort(i) : inPortConfigs[i].shape;
|
|
auto prc = inPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) : inPortConfigs[i].prc;
|
|
if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs))
|
|
return;
|
|
}
|
|
|
|
for (size_t i = 0; i < outPortConfigs.size(); i++) {
|
|
auto dims = outPortConfigs[i].shape.getRank() == 0 ? getOutputShapeAtPort(i) : outPortConfigs[i].shape;
|
|
auto prc = outPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) : outPortConfigs[i].prc;
|
|
if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs))
|
|
return;
|
|
}
|
|
|
|
config.dynBatchSupport = dynBatchSupport;
|
|
supportedPrimitiveDescriptors.push_back({config, implType});
|
|
}
|
|
|
|
bool isDynamic = false;
|
|
|
|
bool inputShapesDefined() const;
|
|
void updateLastInputDims();
|
|
|
|
bool inputShapesModified() const;
|
|
virtual bool needShapeInfer() const;
|
|
std::vector<VectorDims> shapeInferGeneric(const std::vector<Shape>& inputDims) const;
|
|
virtual std::vector<VectorDims> shapeInfer() const;
|
|
// TODO [DS] : make pure after all nodes will be support dynamic shapes
|
|
virtual void executeDynamicImpl(mkldnn::stream strm) {
|
|
IE_THROW(NotImplemented) << "[DS] executeDynamicImpl not implemented for node with type: " << getTypeStr();
|
|
}
|
|
|
|
virtual bool needPrepareParams() const;
|
|
// TODO [mandrono]: add description
|
|
// called after memory allocation/reallocation
|
|
virtual void prepareParams() {
|
|
IE_THROW(NotImplemented) << "[DS] prapareParams not implemented for node with type " << NameFromType(getType());
|
|
}
|
|
|
|
std::vector<VectorDims> lastInputDims = {};
|
|
|
|
std::shared_ptr<ngraph::Node> opToShapeInfer;
|
|
|
|
private:
|
|
std::vector<MKLDNNEdgeWeakPtr> parentEdges;
|
|
std::vector<MKLDNNEdgeWeakPtr> childEdges;
|
|
|
|
std::vector<InferenceEngine::Precision> originalInputPrecisions;
|
|
std::vector<InferenceEngine::Precision> originalOutputPrecisions;
|
|
|
|
int fusingPort;
|
|
|
|
mkldnn::engine engine;
|
|
|
|
std::string name;
|
|
std::string typeStr;
|
|
Type type;
|
|
int execIndex = -1;
|
|
|
|
std::string typeToStr(Type type);
|
|
|
|
PerfCount perfCounter;
|
|
PerfCounters profiling;
|
|
|
|
bool isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const;
|
|
|
|
void createShapeInferSubgraph(const std::shared_ptr<ngraph::Node>& op);
|
|
|
|
template <class PD, class D, typename FPD>
|
|
typename std::enable_if<!std::is_same<FPD, bool>::value, PD>::type
|
|
createPd(MKLDNNDescriptor desc) {
|
|
std::shared_ptr<D> selected_desc_ptr = desc;
|
|
std::shared_ptr<FPD> backward_prim_desc_ptr = desc;
|
|
return PD(*selected_desc_ptr, engine, *backward_prim_desc_ptr);
|
|
}
|
|
|
|
template <class PD, class D, typename FPD>
|
|
typename std::enable_if<std::is_same<FPD, bool>::value, PD>::type
|
|
createPd(MKLDNNDescriptor desc) {
|
|
std::shared_ptr<D> selected_desc_ptr = desc;
|
|
return PD(*selected_desc_ptr, engine);
|
|
}
|
|
|
|
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
|
|
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
|
|
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
|
|
|
|
#ifdef CPU_DEBUG_CAPS
|
|
friend class Verbose;
|
|
#endif
|
|
};
|
|
|
|
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
|
|
MKLDNNNode*(const std::shared_ptr<ngraph::Node>& op,
|
|
const mkldnn::engine &,
|
|
MKLDNNWeightsSharing::Ptr &)> {
|
|
public:
|
|
NodesFactory()
|
|
: Factory("NodesFactory") {}
|
|
|
|
MKLDNNNode* create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
|
|
const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache);
|
|
};
|
|
|
|
template<typename MKLDNNNodeType>
|
|
struct MKLDNNNodeImpl : public MKLDNNNodeType {
|
|
MKLDNNNodeImpl(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
|
: MKLDNNNodeType(op, eng, cache) {
|
|
MKLDNNNodeType::perfCounters().template buildClassCounters<MKLDNNNodeType>(NameFromType(MKLDNNNodeType::getType()));
|
|
}
|
|
};
|
|
|
|
#define REG_MKLDNN_CONCAT3_(X, Y, Z) X ## Y ## Z
|
|
#define REG_MKLDNN_CONCAT3(X, Y, Z) REG_MKLDNN_CONCAT3_(X, Y, Z)
|
|
|
|
#define REG_MKLDNN_PRIM_FOR(__prim, __type) \
|
|
static struct REG_MKLDNN_CONCAT3(Registrar4, __prim, __LINE__) { \
|
|
REG_MKLDNN_CONCAT3(Registrar4, __prim, __LINE__)() { \
|
|
MKLDNNNode::factory() \
|
|
.registerNodeIfRequired(MKLDNNPlugin, __prim, __type, MKLDNNNodeImpl<__prim>); \
|
|
} \
|
|
} REG_MKLDNN_CONCAT3(_reg_, __prim, __LINE__);
|
|
|
|
} // namespace MKLDNNPlugin
|