CPU Plugin refactoring: class names (#10639)

This commit is contained in:
Vladislav Volkov 2022-03-16 17:16:29 +03:00 committed by GitHub
parent f7b2e3a8ca
commit ed8c9d6f9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
329 changed files with 5649 additions and 4849 deletions

View File

@ -38,7 +38,7 @@ The implementation `CompileNetwork` is fully device-specific.
The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps:
1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_IE_DG_lpt) guide.
2. Maps the transformed graph to a backend specific graph representation (for example, to MKLDNN graph for Intel CPU).
2. Maps the transformed graph to a backend specific graph representation (for example, to CPU plugin internal graph representation).
3. Allocates and fills memory for graph weights, backend specific memory handles and so on.
@snippet src/template_executable_network.cpp executable_network:map_graph

View File

@ -2,7 +2,7 @@
Inference Engine Plugin usually represents a wrapper around a backend. Backends can be:
- OpenCL-like backend (e.g. clDNN library) for GPU devices.
- MKLDNN backend for Intel CPU devices.
- oneDNN backend for Intel CPU devices.
- NVIDIA cuDNN for NVIDIA GPUs.
The responsibility of Inference Engine Plugin:

View File

@ -210,9 +210,9 @@ int main(int argc, char* argv[]) {
ov::Core core;
if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) {
// CPU (MKLDNN) extensions is loaded as a shared library
// CPU plugin extensions is loaded as a shared library
core.add_extension(FLAGS_l);
slog::info << "CPU (MKLDNN) extensions is loaded " << FLAGS_l << slog::endl;
slog::info << "CPU plugin extensions is loaded " << FLAGS_l << slog::endl;
}
// Load clDNN Extensions

View File

@ -202,7 +202,7 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
}
auto data_node_shape = data_output.get_shape();
// MKLDNN: "Crop supports only 2d, 4d and 5d blobs."
// Crop supports only 2d, 4d and 5d blobs
if (data_node_shape.size() != 2 && data_node_shape.size() != 4 && data_node_shape.size() != 5) {
return false;
}

View File

@ -137,7 +137,7 @@ auto get_num_result_children(const std::shared_ptr<const Node> &node) -> size_t
}
return result;
}
// Need to update tensor name manually, since MKLDNNGraph::Replicate() looks at input.get_tensor().get_name();
// Need to update tensor name manually, since intel_cpu::Graph::Replicate() looks at input.get_tensor().get_name();
// If subgraph->get_output_size() == 1, then the name will be restored correctly from the node name
auto update_out_tensor_name(std::shared_ptr<ngraph::snippets::op::Subgraph> &subgraph) -> void {
bool not_set = true;

View File

@ -209,7 +209,7 @@ onnx_model_eye_like_dyn_rank
# Constant network
# MKLDNNGraph::CreateGraph: No inputs for the topology
# intel_cpu::Graph::CreateGraph: No inputs for the topology
onnx_size_op_single
onnx_size_op_graph_end
onnx_size_op_graph_middle
@ -496,7 +496,7 @@ relu_4Dbackprop
# data [<name>] doesn't exist
parameter_as_output
# MKLDNNGraph::CreateGraph: No inputs for the topology
# intel_cpu::Graph::CreateGraph: No inputs for the topology
range_v0_int32
range_v0_float32
range_v4_int32

View File

@ -25,7 +25,7 @@ namespace InferenceEngine {
* @brief Interface for tasks execution manager.
* This is global point for getting task executor objects by string id.
* It's necessary in multiple asynchronous requests for having unique executors to avoid oversubscription.
* E.g. There 2 task executors for CPU device: one - in FPGA, another - in MKLDNN. Parallel execution both of them leads
* E.g. There 2 task executors for CPU device: one - in FPGA, another - in OneDNN. Parallel execution both of them leads
* to not optimal CPU usage. More efficient to run the corresponding tasks one by one via single executor.
* @ingroup ie_dev_api_threading
*/

View File

@ -5,13 +5,13 @@
#include "async_infer_request.h"
#include <memory>
ov::intel_cpu::MKLDNNAsyncInferRequest::MKLDNNAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr& inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
ov::intel_cpu::AsyncInferRequest::AsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr& inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
: InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor) {
static_cast<MKLDNNInferRequestBase*>(inferRequest.get())->SetAsyncRequest(this);
static_cast<InferRequestBase*>(inferRequest.get())->SetAsyncRequest(this);
}
ov::intel_cpu::MKLDNNAsyncInferRequest::~MKLDNNAsyncInferRequest() {
ov::intel_cpu::AsyncInferRequest::~AsyncInferRequest() {
StopAndWait();
}

View File

@ -12,12 +12,12 @@
namespace ov {
namespace intel_cpu {
class MKLDNNAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public:
MKLDNNAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
const InferenceEngine::ITaskExecutor::Ptr &taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor);
~MKLDNNAsyncInferRequest();
AsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
const InferenceEngine::ITaskExecutor::Ptr &taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor);
~AsyncInferRequest();
};
} // namespace intel_cpu

View File

@ -4,6 +4,10 @@
#include "multi_cache.h"
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
std::atomic_size_t MultiCache::_typeIdCounter{0};
std::atomic_size_t MultiCache::_typeIdCounter{0};
} // namespace intel_cpu
} // namespace ov

View File

@ -34,12 +34,12 @@ namespace {
}
} // namespace
MKLDNNMemory::MKLDNNMemory(const mkldnn::engine& eng) :
Memory::Memory(const mkldnn::engine& eng) :
eng(eng), mgrHandle(std::make_shared<DnnlMemoryMngr>(std::unique_ptr<MemoryMngrWithReuse>(new MemoryMngrWithReuse())), this) {}
MKLDNNMemory::MKLDNNMemory(const mkldnn::engine& eng, std::unique_ptr<IMemoryMngr> mngr) :
Memory::Memory(const mkldnn::engine& eng, std::unique_ptr<IMemoryMngr> mngr) :
eng(eng), mgrHandle(std::make_shared<DnnlMemoryMngr>(std::move(mngr)), this) {}
size_t MKLDNNMemory::GetSize() const {
size_t Memory::GetSize() const {
auto size = getDesc().getCurrentMemSize();
if (size == MemoryDesc::UNDEFINED_SIZE) {
IE_THROW() << "Can't get memory size for undefined shape";
@ -47,8 +47,8 @@ size_t MKLDNNMemory::GetSize() const {
return size;
}
void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bool pads_zeroing) {
// MKLDNN accepts not a const data, probably need to remove some level of consteness in a call stack
void Memory::Create(const mkldnn::memory::desc& desc, const void *data, bool pads_zeroing) {
// OneDNN accepts not a const data, probably need to remove some level of consteness in a call stack
// ========================
// Equivalent of constructor memory(const primitive_desc &desc, void *hdl)
@ -64,11 +64,11 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo
}
}
void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) {
void Memory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) {
Create(desc.clone(), data, pads_zeroing);
}
void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) {
void Memory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) {
pMemDesc = desc;
size_t memSize = MemoryDesc::UNDEFINED_SIZE;
@ -93,8 +93,8 @@ void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroin
}
}
void MKLDNNMemory::SetData(const MKLDNNMemory& src, bool ftz) const {
MKLDNNReorderNode::reorderData(src, *this);
void Memory::SetData(const Memory& src, bool ftz) const {
node::Reorder::reorderData(src, *this);
if (ftz
&& src.GetDataType() == memory::data_type::f32
@ -109,13 +109,13 @@ void MKLDNNMemory::SetData(const MKLDNNMemory& src, bool ftz) const {
}
}
void MKLDNNMemory::FillZero() {
void Memory::FillZero() {
void* dataPtr = GetData();
if (dataPtr != nullptr)
memset(dataPtr, 0, getDesc().getMaxMemSize());
}
void *MKLDNNMemory::GetPtr() const {
void *Memory::GetPtr() const {
auto ptr = static_cast<uint8_t*>(GetData());
const mkldnn_memory_desc_t md = prim->get_desc().data;
mkldnn::impl::memory_desc_wrapper wrapper(md);
@ -123,7 +123,7 @@ void *MKLDNNMemory::GetPtr() const {
return ptr;
}
void MKLDNNMemory::redefineDesc(MemoryDescPtr desc) {
void Memory::redefineDesc(MemoryDescPtr desc) {
if (!desc->hasDefinedMaxSize()) {
IE_THROW() << "Can not reset descriptor, memory upper bound is unknown.";
}
@ -132,27 +132,27 @@ void MKLDNNMemory::redefineDesc(MemoryDescPtr desc) {
}
template<>
DnnlMemoryDescPtr MKLDNNMemory::GetDescWithType<DnnlMemoryDesc, 0, 0>() const {
DnnlMemoryDescPtr Memory::GetDescWithType<DnnlMemoryDesc, 0, 0>() const {
return MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc);
}
void MKLDNNMemory::setDataHandle(void *data) {
void Memory::setDataHandle(void *data) {
size_t maxMemSize = pMemDesc->hasDefinedMaxSize() ? pMemDesc->getMaxMemSize() : 0;
mgrHandle->setExtBuff(data, maxMemSize);
prim->set_data_handle(mgrHandle->getRawPtr()); // for pads zeroing, to preserve mkldnn::memory::set_data_handle behaviour
}
void MKLDNNMemory::update() {
void Memory::update() {
if (isAllocated()) {
prim->set_data_handle_no_pads_proc(mgrHandle->getRawPtr());
}
}
void MKLDNNMemory::Create(const MemoryDesc &desc, DnnlMemoryMngrPtr memMgr) {
void Memory::Create(const MemoryDesc &desc, DnnlMemoryMngrPtr memMgr) {
Create(desc.clone(), memMgr);
}
void MKLDNNMemory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) {
void Memory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) {
mgrHandle = DnnlMemMngrHandle(memMgr, this);
bool memAllocated = mgrHandle->getRawPtr();
@ -160,7 +160,7 @@ void MKLDNNMemory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) {
}
template<>
BlockedMemoryDescPtr MKLDNNMemory::GetDescWithType<BlockedMemoryDesc, 0, 0>() const {
BlockedMemoryDescPtr Memory::GetDescWithType<BlockedMemoryDesc, 0, 0>() const {
return MemoryDescUtils::convertToBlockedMemoryDesc(pMemDesc);
}
@ -221,13 +221,13 @@ bool DnnlMemoryMngr::hasExtBuffer() const noexcept {
return _pMemMngr->hasExtBuffer();
}
void DnnlMemoryMngr::registerMemory(MKLDNNMemory* memPtr) {
void DnnlMemoryMngr::registerMemory(Memory* memPtr) {
if (memPtr) {
_setMemPtrs.insert(memPtr);
}
}
void DnnlMemoryMngr::unregisterMemory(MKLDNNMemory* memPtr) {
void DnnlMemoryMngr::unregisterMemory(Memory* memPtr) {
if (memPtr) {
_setMemPtrs.erase(memPtr);
}

View File

@ -6,7 +6,7 @@
#include "ie_layouts.h"
#include "memory_desc/cpu_memory_desc.h"
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include "memory_desc/cpu_memory_desc_utils.h"
#include <mkldnn.hpp>
#include <mkldnn_types.h>
@ -23,7 +23,7 @@
/**
* @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level.
*
* MKLDNNMemory is an abstraction of some real tensor which contains some data. As in short it's a pair of
* Memory is an abstraction of some real tensor which contains some data. As in short it's a pair of
* memory descriptor and raw buffer handler to contains data. In case of system memory raw buffer it's simple
* "void*" on some system memory buffer.
*
@ -32,7 +32,7 @@
namespace ov {
namespace intel_cpu {
class MKLDNNMemory;
class Memory;
/**
* @interface IMemoryMngr
@ -100,14 +100,14 @@ public:
void setExtBuff(void* ptr, size_t size) override;
bool resize(size_t size) override;
bool hasExtBuffer() const noexcept override;
void registerMemory(MKLDNNMemory* memPtr);
void unregisterMemory(MKLDNNMemory* memPtr);
void registerMemory(Memory* memPtr);
void unregisterMemory(Memory* memPtr);
private:
void notifyUpdate();
private:
std::unordered_set<MKLDNNMemory*> _setMemPtrs;
std::unordered_set<Memory*> _setMemPtrs;
std::unique_ptr<IMemoryMngr> _pMemMngr;
};
@ -116,7 +116,7 @@ using DnnlMemoryMngrCPtr = std::shared_ptr<const DnnlMemoryMngr>;
class DnnlMemMngrHandle {
public:
DnnlMemMngrHandle(DnnlMemoryMngrPtr pMgr, MKLDNNMemory* pMem) : _pMgr(pMgr), _pMem(pMem) {
DnnlMemMngrHandle(DnnlMemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) {
if (_pMgr) {
_pMgr->registerMemory(_pMem);
}
@ -151,19 +151,19 @@ public:
private:
DnnlMemoryMngrPtr _pMgr = nullptr;
MKLDNNMemory* _pMem = nullptr;
Memory* _pMem = nullptr;
};
class MKLDNNMemory {
class Memory {
public:
explicit MKLDNNMemory(const mkldnn::engine& eng);
MKLDNNMemory(const mkldnn::engine& eng, std::unique_ptr<IMemoryMngr> mngr);
explicit Memory(const mkldnn::engine& eng);
Memory(const mkldnn::engine& eng, std::unique_ptr<IMemoryMngr> mngr);
MKLDNNMemory(const MKLDNNMemory&) = delete;
MKLDNNMemory& operator= (const MKLDNNMemory&) = delete;
Memory(const Memory&) = delete;
Memory& operator= (const Memory&) = delete;
MKLDNNMemory(MKLDNNMemory&&) = delete;
MKLDNNMemory& operator= (MKLDNNMemory&&) = delete;
Memory(Memory&&) = delete;
Memory& operator= (Memory&&) = delete;
mkldnn::memory GetPrimitive() const {
if (isAllocated()) {
@ -213,7 +213,7 @@ public:
void* GetPtr() const;
mkldnn::memory::data_type GetDataType() const {
return MKLDNNExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision());
return DnnlExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision());
}
size_t GetSize() const;
@ -233,7 +233,7 @@ public:
// Caution!!! This action invalidates the previous data layout. The old data may become unreachable.
void redefineDesc(MemoryDescPtr desc);
void SetData(const MKLDNNMemory& memory, bool ftz = true) const;
void SetData(const Memory& memory, bool ftz = true) const;
void FillZero();
const VectorDims& getStaticDims() const {
@ -266,8 +266,8 @@ private:
DnnlMemMngrHandle mgrHandle;
};
using MKLDNNMemoryPtr = std::shared_ptr<MKLDNNMemory>;
using MKLDNNMemoryCPtr = std::shared_ptr<const MKLDNNMemory>;
using MemoryPtr = std::shared_ptr<Memory>;
using MemoryCPtr = std::shared_ptr<const Memory>;
} // namespace intel_cpu
} // namespace ov

View File

@ -6,7 +6,8 @@
#include "utils/general_utils.h"
#include "memory_desc/cpu_memory_desc_utils.h"
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
bool Shape::isCompatible(const VectorDims &vecDims) const {
if (getRank() != vecDims.size()) {
@ -47,3 +48,6 @@ std::string Shape::toString() const {
output << "}";
return output.str();
}
} // namespace intel_cpu
} // namespace ov

View File

@ -13,184 +13,184 @@ using Dim = std::size_t;
using VectorDims = std::vector<Dim>;
const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
{ "Constant", Input },
{ "Parameter", Input },
{ "Result", Output },
{ "Convolution", Convolution },
{ "GroupConvolution", Convolution },
{ "MatMul", MatMul },
{ "FullyConnected", FullyConnected },
{ "MaxPool", Pooling },
{ "AvgPool", Pooling },
{ "AdaptiveMaxPool", AdaptivePooling},
{ "AdaptiveAvgPool", AdaptivePooling},
{ "Add", Eltwise },
{ "Subtract", Eltwise },
{ "Multiply", Eltwise },
{ "Divide", Eltwise },
{ "SquaredDifference", Eltwise },
{ "Maximum", Eltwise },
{ "Minimum", Eltwise },
{ "Mod", Eltwise },
{ "FloorMod", Eltwise },
{ "Power", Eltwise },
{ "PowerStatic", Eltwise },
{ "Equal", Eltwise },
{ "NotEqual", Eltwise },
{ "Greater", Eltwise },
{ "GreaterEqual", Eltwise },
{ "Less", Eltwise },
{ "LessEqual", Eltwise },
{ "LogicalAnd", Eltwise },
{ "LogicalOr", Eltwise },
{ "LogicalXor", Eltwise },
{ "LogicalNot", Eltwise },
{ "Relu", Eltwise },
{ "LeakyRelu", Eltwise },
{ "Gelu", Eltwise },
{ "Elu", Eltwise },
{ "Tanh", Eltwise },
{ "Sigmoid", Eltwise },
{ "Abs", Eltwise },
{ "Sqrt", Eltwise },
{ "Clamp", Eltwise },
{ "Exp", Eltwise },
{ "SwishCPU", Eltwise },
{ "HSwish", Eltwise },
{ "Mish", Eltwise },
{ "HSigmoid", Eltwise },
{ "Round", Eltwise },
{ "PRelu", Eltwise },
{ "Erf", Eltwise },
{ "SoftPlus", Eltwise },
{ "Reshape", Reshape },
{ "Squeeze", Reshape },
{ "Unsqueeze", Reshape },
{ "ShapeOf", ShapeOf },
{ "NonZero", NonZero },
{ "Softmax", Softmax },
{ "Reorder", Reorder },
{ "BatchToSpace", BatchToSpace },
{ "SpaceToBatch", SpaceToBatch },
{ "DepthToSpace", DepthToSpace },
{ "SpaceToDepth", SpaceToDepth },
{ "Roll", Roll },
{ "LRN", Lrn },
{ "Split", Split },
{ "VariadicSplit", Split },
{ "Concat", Concatenation },
{ "ConvolutionBackpropData", Deconvolution },
{ "GroupConvolutionBackpropData", Deconvolution },
{ "StridedSlice", StridedSlice },
{ "Slice", StridedSlice },
{ "Tile", Tile },
{ "ROIAlign", ROIAlign },
{ "ROIPooling", ROIPooling },
{ "PSROIPooling", PSROIPooling },
{ "DeformablePSROIPooling", PSROIPooling },
{ "Pad", Pad },
{ "Transpose", Transpose },
{ "LSTMCell", RNNCell },
{ "GRUCell", RNNCell },
{ "RNNCell", RNNCell },
{ "LSTMSequence", RNNSeq },
{ "GRUSequence", RNNSeq },
{ "RNNSequence", RNNSeq },
{ "FakeQuantize", FakeQuantize },
{ "BinaryConvolution", BinaryConvolution },
{ "DeformableConvolution", DeformableConvolution },
{ "TensorIterator", TensorIterator },
{ "Loop", TensorIterator },
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Assign", MemoryOutput }, // for construction from layer ctor
{ "Convert", Convert },
{ "NV12toRGB", ColorConvert },
{ "NV12toBGR", ColorConvert },
{ "I420toRGB", ColorConvert },
{ "I420toBGR", ColorConvert },
{ "MVN", MVN},
{ "NormalizeL2", NormalizeL2},
{ "ScatterUpdate", ScatterUpdate},
{ "ScatterElementsUpdate", ScatterElementsUpdate},
{ "ScatterNDUpdate", ScatterNDUpdate},
{ "Interpolate", Interpolate},
{ "ReduceL1", Reduce},
{ "ReduceL2", Reduce},
{ "ReduceLogicalAnd", Reduce},
{ "ReduceLogicalOr", Reduce},
{ "ReduceMax", Reduce},
{ "ReduceMean", Reduce},
{ "ReduceMin", Reduce},
{ "ReduceProd", Reduce},
{ "ReduceSum", Reduce},
{ "ReduceLogSum", Reduce},
{ "ReduceLogSumExp", Reduce},
{ "ReduceSumSquare", Reduce},
{ "Broadcast", Broadcast},
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
{ "Gather", Gather},
{ "GatherElements", GatherElements},
{ "GatherND", GatherND},
{ "OneHot", OneHot},
{ "RegionYolo", RegionYolo},
{ "Select", Select},
{ "ShuffleChannels", ShuffleChannels},
{ "DFT", DFT},
{ "IDFT", DFT},
{ "Abs", Math},
{ "Acos", Math},
{ "Acosh", Math},
{ "Asin", Math},
{ "Asinh", Math},
{ "Atan", Math},
{ "Atanh", Math},
{ "Ceil", Math},
{ "Ceiling", Math},
{ "Cos", Math},
{ "Cosh", Math},
{ "Floor", Math},
{ "HardSigmoid", Math},
{ "If", If},
{ "Log", Math},
{ "Neg", Math},
{ "Reciprocal", Math},
{ "Selu", Math},
{ "Sign", Math},
{ "Sin", Math},
{ "Sinh", Math},
{ "SoftPlus", Math},
{ "Softsign", Math},
{ "Tan", Math},
{ "CTCLoss", CTCLoss},
{ "Bucketize", Bucketize},
{ "CTCGreedyDecoder", CTCGreedyDecoder},
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
{ "CumSum", CumSum},
{ "DetectionOutput", DetectionOutput},
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
{ "LogSoftmax", LogSoftmax},
{ "TopK", TopK},
{ "GatherTree", GatherTree},
{ "GRN", GRN},
{ "Range", Range},
{ "Proposal", Proposal},
{ "ReorgYolo", ReorgYolo},
{ "ReverseSequence", ReverseSequence},
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
{ "ExtractImagePatches", ExtractImagePatches},
{ "NonMaxSuppression", NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
{ "MatrixNms", MatrixNms},
{ "MulticlassNms", MulticlassNms},
{ "Reference", Reference},
{ "Subgraph", Subgraph},
{ "PriorBox", PriorBox},
{ "PriorBoxClustered", PriorBoxClustered},
{ "Constant", Type::Input },
{ "Parameter", Type::Input },
{ "Result", Type::Output },
{ "Convolution", Type::Convolution },
{ "GroupConvolution", Type::Convolution },
{ "MatMul", Type::MatMul },
{ "FullyConnected", Type::FullyConnected },
{ "MaxPool", Type::Pooling },
{ "AvgPool", Type::Pooling },
{ "AdaptiveMaxPool", Type::AdaptivePooling},
{ "AdaptiveAvgPool", Type::AdaptivePooling},
{ "Add", Type::Eltwise },
{ "Subtract", Type::Eltwise },
{ "Multiply", Type::Eltwise },
{ "Divide", Type::Eltwise },
{ "SquaredDifference", Type::Eltwise },
{ "Maximum", Type::Eltwise },
{ "Minimum", Type::Eltwise },
{ "Mod", Type::Eltwise },
{ "FloorMod", Type::Eltwise },
{ "Power", Type::Eltwise },
{ "PowerStatic", Type::Eltwise },
{ "Equal", Type::Eltwise },
{ "NotEqual", Type::Eltwise },
{ "Greater", Type::Eltwise },
{ "GreaterEqual", Type::Eltwise },
{ "Less", Type::Eltwise },
{ "LessEqual", Type::Eltwise },
{ "LogicalAnd", Type::Eltwise },
{ "LogicalOr", Type::Eltwise },
{ "LogicalXor", Type::Eltwise },
{ "LogicalNot", Type::Eltwise },
{ "Relu", Type::Eltwise },
{ "LeakyRelu", Type::Eltwise },
{ "Gelu", Type::Eltwise },
{ "Elu", Type::Eltwise },
{ "Tanh", Type::Eltwise },
{ "Sigmoid", Type::Eltwise },
{ "Abs", Type::Eltwise },
{ "Sqrt", Type::Eltwise },
{ "Clamp", Type::Eltwise },
{ "Exp", Type::Eltwise },
{ "SwishCPU", Type::Eltwise },
{ "HSwish", Type::Eltwise },
{ "Mish", Type::Eltwise },
{ "HSigmoid", Type::Eltwise },
{ "Round", Type::Eltwise },
{ "PRelu", Type::Eltwise },
{ "Erf", Type::Eltwise },
{ "SoftPlus", Type::Eltwise },
{ "Reshape", Type::Reshape },
{ "Squeeze", Type::Reshape },
{ "Unsqueeze", Type::Reshape },
{ "ShapeOf", Type::ShapeOf },
{ "NonZero", Type::NonZero },
{ "Softmax", Type::Softmax },
{ "Reorder", Type::Reorder },
{ "BatchToSpace", Type::BatchToSpace },
{ "SpaceToBatch", Type::SpaceToBatch },
{ "DepthToSpace", Type::DepthToSpace },
{ "SpaceToDepth", Type::SpaceToDepth },
{ "Roll", Type::Roll },
{ "LRN", Type::Lrn },
{ "Split", Type::Split },
{ "VariadicSplit", Type::Split },
{ "Concat", Type::Concatenation },
{ "ConvolutionBackpropData", Type::Deconvolution },
{ "GroupConvolutionBackpropData", Type::Deconvolution },
{ "StridedSlice", Type::StridedSlice },
{ "Slice", Type::StridedSlice },
{ "Tile", Type::Tile },
{ "ROIAlign", Type::ROIAlign },
{ "ROIPooling", Type::ROIPooling },
{ "PSROIPooling", Type::PSROIPooling },
{ "DeformablePSROIPooling", Type::PSROIPooling },
{ "Pad", Type::Pad },
{ "Transpose", Type::Transpose },
{ "LSTMCell", Type::RNNCell },
{ "GRUCell", Type::RNNCell },
{ "RNNCell", Type::RNNCell },
{ "LSTMSequence", Type::RNNSeq },
{ "GRUSequence", Type::RNNSeq },
{ "RNNSequence", Type::RNNSeq },
{ "FakeQuantize", Type::FakeQuantize },
{ "BinaryConvolution", Type::BinaryConvolution },
{ "DeformableConvolution", Type::DeformableConvolution },
{ "TensorIterator", Type::TensorIterator },
{ "Loop", Type::TensorIterator },
{ "ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Assign", Type::MemoryOutput }, // for construction from layer ctor
{ "Convert", Type::Convert },
{ "NV12toRGB", Type::ColorConvert },
{ "NV12toBGR", Type::ColorConvert },
{ "I420toRGB", Type::ColorConvert },
{ "I420toBGR", Type::ColorConvert },
{ "MVN", Type::MVN},
{ "NormalizeL2", Type::NormalizeL2},
{ "ScatterUpdate", Type::ScatterUpdate},
{ "ScatterElementsUpdate", Type::ScatterElementsUpdate},
{ "ScatterNDUpdate", Type::ScatterNDUpdate},
{ "Interpolate", Type::Interpolate},
{ "ReduceL1", Type::Reduce},
{ "ReduceL2", Type::Reduce},
{ "ReduceLogicalAnd", Type::Reduce},
{ "ReduceLogicalOr", Type::Reduce},
{ "ReduceMax", Type::Reduce},
{ "ReduceMean", Type::Reduce},
{ "ReduceMin", Type::Reduce},
{ "ReduceProd", Type::Reduce},
{ "ReduceSum", Type::Reduce},
{ "ReduceLogSum", Type::Reduce},
{ "ReduceLogSumExp", Type::Reduce},
{ "ReduceSumSquare", Type::Reduce},
{ "Broadcast", Type::Broadcast},
{ "EmbeddingSegmentsSum", Type::EmbeddingSegmentsSum},
{ "EmbeddingBagPackedSum", Type::EmbeddingBagPackedSum},
{ "EmbeddingBagOffsetsSum", Type::EmbeddingBagOffsetsSum},
{ "Gather", Type::Gather},
{ "GatherElements", Type::GatherElements},
{ "GatherND", Type::GatherND},
{ "OneHot", Type::OneHot},
{ "RegionYolo", Type::RegionYolo},
{ "Select", Type::Select},
{ "ShuffleChannels", Type::ShuffleChannels},
{ "DFT", Type::DFT},
{ "IDFT", Type::DFT},
{ "Abs", Type::Math},
{ "Acos", Type::Math},
{ "Acosh", Type::Math},
{ "Asin", Type::Math},
{ "Asinh", Type::Math},
{ "Atan", Type::Math},
{ "Atanh", Type::Math},
{ "Ceil", Type::Math},
{ "Ceiling", Type::Math},
{ "Cos", Type::Math},
{ "Cosh", Type::Math},
{ "Floor", Type::Math},
{ "HardSigmoid", Type::Math},
{ "If", Type::If},
{ "Log", Type::Math},
{ "Neg", Type::Math},
{ "Reciprocal", Type::Math},
{ "Selu", Type::Math},
{ "Sign", Type::Math},
{ "Sin", Type::Math},
{ "Sinh", Type::Math},
{ "SoftPlus", Type::Math},
{ "Softsign", Type::Math},
{ "Tan", Type::Math},
{ "CTCLoss", Type::CTCLoss},
{ "Bucketize", Type::Bucketize},
{ "CTCGreedyDecoder", Type::CTCGreedyDecoder},
{ "CTCGreedyDecoderSeqLen", Type::CTCGreedyDecoderSeqLen},
{ "CumSum", Type::CumSum},
{ "DetectionOutput", Type::DetectionOutput},
{ "ExperimentalDetectronDetectionOutput", Type::ExperimentalDetectronDetectionOutput},
{ "LogSoftmax", Type::LogSoftmax},
{ "TopK", Type::TopK},
{ "GatherTree", Type::GatherTree},
{ "GRN", Type::GRN},
{ "Range", Type::Range},
{ "Proposal", Type::Proposal},
{ "ReorgYolo", Type::ReorgYolo},
{ "ReverseSequence", Type::ReverseSequence},
{ "ExperimentalDetectronTopKROIs", Type::ExperimentalDetectronTopKROIs},
{ "ExperimentalDetectronROIFeatureExtractor", Type::ExperimentalDetectronROIFeatureExtractor},
{ "ExperimentalDetectronPriorGridGenerator", Type::ExperimentalDetectronPriorGridGenerator},
{ "ExperimentalDetectronGenerateProposalsSingleImage", Type::ExperimentalDetectronGenerateProposalsSingleImage},
{ "ExtractImagePatches", Type::ExtractImagePatches},
{ "NonMaxSuppression", Type::NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", Type::NonMaxSuppression},
{ "MatrixNms", Type::MatrixNms},
{ "MulticlassNms", Type::MulticlassNms},
{ "Reference", Type::Reference},
{ "Subgraph", Type::Subgraph},
{ "PriorBox", Type::PriorBox},
{ "PriorBoxClustered", Type::PriorBoxClustered},
};
Type TypeFromName(const std::string& type) {
@ -198,183 +198,183 @@ Type TypeFromName(const std::string& type) {
if (type_to_name_tbl.end() != itType) {
return itType->second;
} else {
return Unknown;
return Type::Unknown;
}
}
std::string NameFromType(const Type type) {
switch (type) {
case Generic:
case Type::Generic:
return "Generic";
case Reorder:
case Type::Reorder:
return "Reorder";
case Input:
case Type::Input:
return "Input";
case Output:
case Type::Output:
return "Output";
case Convolution:
case Type::Convolution:
return "Convolution";
case Deconvolution:
case Type::Deconvolution:
return "Deconvolution";
case Lrn:
case Type::Lrn:
return "Lrn";
case Pooling:
case Type::Pooling:
return "Pooling";
case AdaptivePooling:
case Type::AdaptivePooling:
return "AdaptivePooling";
case FullyConnected:
case Type::FullyConnected:
return "FullyConnected";
case MatMul:
case Type::MatMul:
return "MatMul";
case Softmax:
case Type::Softmax:
return "Softmax";
case Split:
case Type::Split:
return "Split";
case Concatenation:
case Type::Concatenation:
return "Concatenation";
case StridedSlice:
case Type::StridedSlice:
return "StridedSlice";
case Reshape:
case Type::Reshape:
return "Reshape";
case ShapeOf:
case Type::ShapeOf:
return "ShapeOf";
case NonZero:
case Type::NonZero:
return "NonZero";
case Tile:
case Type::Tile:
return "Tile";
case ROIAlign:
case Type::ROIAlign:
return "ROIAlign";
case ROIPooling:
case Type::ROIPooling:
return "ROIPooling";
case PSROIPooling:
case Type::PSROIPooling:
return "PSROIPooling";
case DepthToSpace:
case Type::DepthToSpace:
return "DepthToSpace";
case BatchToSpace:
case Type::BatchToSpace:
return "BatchToSpace";
case Pad:
case Type::Pad:
return "Pad";
case Transpose:
case Type::Transpose:
return "Transpose";
case SpaceToDepth:
case Type::SpaceToDepth:
return "SpaceToDepth";
case SpaceToBatch:
case Type::SpaceToBatch:
return "SpaceToBatch";
case MemoryOutput:
case Type::MemoryOutput:
return "MemoryOutput";
case MemoryInput:
case Type::MemoryInput:
return "MemoryInput";
case RNNSeq:
case Type::RNNSeq:
return "RNNSeq";
case RNNCell:
case Type::RNNCell:
return "RNNCell";
case Eltwise:
case Type::Eltwise:
return "Eltwise";
case FakeQuantize:
case Type::FakeQuantize:
return "FakeQuantize";
case BinaryConvolution:
case Type::BinaryConvolution:
return "BinaryConvolution";
case DeformableConvolution:
case Type::DeformableConvolution:
return "DeformableConvolution";
case MVN:
case Type::MVN:
return "MVN";
case TensorIterator:
case Type::TensorIterator:
return "TensorIterator";
case Convert:
case Type::Convert:
return "Convert";
case ColorConvert:
case Type::ColorConvert:
return "ColorConvert";
case NormalizeL2:
case Type::NormalizeL2:
return "NormalizeL2";
case ScatterUpdate:
case Type::ScatterUpdate:
return "ScatterUpdate";
case ScatterElementsUpdate:
case Type::ScatterElementsUpdate:
return "ScatterElementsUpdate";
case ScatterNDUpdate:
case Type::ScatterNDUpdate:
return "ScatterNDUpdate";
case Interpolate:
case Type::Interpolate:
return "Interpolate";
case Reduce:
case Type::Reduce:
return "Reduce";
case Broadcast:
case Type::Broadcast:
return "Broadcast";
case EmbeddingSegmentsSum:
case Type::EmbeddingSegmentsSum:
return "EmbeddingSegmentsSum";
case EmbeddingBagPackedSum:
case Type::EmbeddingBagPackedSum:
return "EmbeddingBagPackedSum";
case EmbeddingBagOffsetsSum:
case Type::EmbeddingBagOffsetsSum:
return "EmbeddingBagOffsetsSum";
case Gather:
case Type::Gather:
return "Gather";
case GatherElements:
case Type::GatherElements:
return "GatherElements";
case GatherND:
case Type::GatherND:
return "GatherND";
case OneHot:
case Type::OneHot:
return "OneHot";
case RegionYolo:
case Type::RegionYolo:
return "RegionYolo";
case Select:
case Type::Select:
return "Select";
case Roll:
case Type::Roll:
return "Roll";
case ShuffleChannels:
case Type::ShuffleChannels:
return "ShuffleChannels";
case DFT:
case Type::DFT:
return "DFT";
case Math:
case Type::Math:
return "Math";
case CTCLoss:
case Type::CTCLoss:
return "CTCLoss";
case Bucketize:
case Type::Bucketize:
return "Bucketize";
case CTCGreedyDecoder:
case Type::CTCGreedyDecoder:
return "CTCGreedyDecoder";
case CTCGreedyDecoderSeqLen:
case Type::CTCGreedyDecoderSeqLen:
return "CTCGreedyDecoderSeqLen";
case CumSum:
case Type::CumSum:
return "CumSum";
case DetectionOutput:
case Type::DetectionOutput:
return "DetectionOutput";
case ExperimentalDetectronDetectionOutput:
case Type::ExperimentalDetectronDetectionOutput:
return "ExperimentalDetectronDetectionOutput";
case If:
case Type::If:
return "If";
case LogSoftmax:
case Type::LogSoftmax:
return "LogSoftmax";
case TopK:
case Type::TopK:
return "TopK";
case GatherTree:
case Type::GatherTree:
return "GatherTree";
case GRN:
case Type::GRN:
return "GRN";
case Range:
case Type::Range:
return "Range";
case Proposal:
case Type::Proposal:
return "Proposal";
case ReorgYolo:
case Type::ReorgYolo:
return "ReorgYolo";
case ReverseSequence:
case Type::ReverseSequence:
return "ReverseSequence";
case ExperimentalDetectronTopKROIs:
case Type::ExperimentalDetectronTopKROIs:
return "ExperimentalDetectronTopKROIs";
case ExperimentalDetectronROIFeatureExtractor:
case Type::ExperimentalDetectronROIFeatureExtractor:
return "ExperimentalDetectronROIFeatureExtractor";
case ExperimentalDetectronPriorGridGenerator:
case Type::ExperimentalDetectronPriorGridGenerator:
return "ExperimentalDetectronPriorGridGenerator";
case ExperimentalDetectronGenerateProposalsSingleImage:
case Type::ExperimentalDetectronGenerateProposalsSingleImage:
return "ExperimentalDetectronGenerateProposalsSingleImage";
case ExtractImagePatches:
case Type::ExtractImagePatches:
return "ExtractImagePatches";
case NonMaxSuppression:
case Type::NonMaxSuppression:
return "NonMaxSuppression";
case MatrixNms:
case Type::MatrixNms:
return "MatrixNms";
case MulticlassNms:
case Type::MulticlassNms:
return "MulticlassNms";
case Reference:
case Type::Reference:
return "Reference";
case Subgraph:
case Type::Subgraph:
return "Subgraph";
default:
return "Unknown";
@ -382,8 +382,8 @@ std::string NameFromType(const Type type) {
}
std::string algToString(const Algorithm alg) {
#define CASE(_alg) do { \
if (alg == _alg) return #_alg; \
#define CASE(_alg) do { \
if (alg == Algorithm::_alg) return #_alg; \
} while (0)
CASE(Default);
CASE(PoolingMax);

View File

@ -15,7 +15,7 @@ namespace intel_cpu {
using Dim = std::size_t;
using VectorDims = std::vector<Dim>;
enum Type {
enum class Type {
Unknown,
Generic,
If,
@ -107,7 +107,7 @@ enum Type {
PriorBoxClustered,
};
enum Algorithm {
enum class Algorithm {
Default,
// Pooling algorithms

View File

@ -4,30 +4,33 @@
#include <ie_common.h>
#include "descriptor.h"
#include "dnnl_descriptor.h"
mkldnn::primitive_desc_iterator MKLDNNDescriptor::createPrimitiveDescriptorIterator(const mkldnn::engine &engine,
namespace ov {
namespace intel_cpu {
mkldnn::primitive_desc_iterator DnnlDesriptor::createPrimitiveDescriptorIterator(const mkldnn::engine &engine,
const mkldnn::primitive_attr &attr) const {
return desc->createPrimitiveDescriptorIterator(attr, engine);
}
MKLDNNDescriptor::operator bool() {
DnnlDesriptor::operator bool() {
return desc != nullptr;
}
size_t MKLDNNDescriptor::inputNumbers() const {
size_t DnnlDesriptor::inputNumbers() const {
return 1;
}
size_t MKLDNNDescriptor::outputNumbers() const {
size_t DnnlDesriptor::outputNumbers() const {
return 1;
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::convolution_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::convolution_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::convolution_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -35,11 +38,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_forward::desc>()
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::deconvolution_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::deconvolution_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::deconvolution_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::deconvolution_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::deconvolution_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::deconvolution_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -47,14 +50,14 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::deconvolution_forward::desc>(
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc> desc,
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc> desc,
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> prim) {
this->desc.reset(
new DescBwdImpl<mkldnn::convolution_backward_data::desc,
mkldnn::convolution_forward::primitive_desc>(desc, prim));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_backward_data::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::convolution_backward_data::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescBwdImpl<mkldnn::convolution_backward_data::desc, mkldnn::convolution_forward::primitive_desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -62,7 +65,7 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_backward_data::de
return typeDesc->getPtr();
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_forward::primitive_desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::convolution_forward::primitive_desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescBwdImpl<mkldnn::convolution_backward_data::desc, mkldnn::convolution_forward::primitive_desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -70,11 +73,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_forward::primitiv
return typeDesc->getPrimPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::inner_product_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::inner_product_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::inner_product_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::inner_product_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::inner_product_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::inner_product_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -82,11 +85,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::inner_product_forward::desc>(
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::lrn_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::lrn_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::lrn_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::lrn_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::lrn_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::lrn_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -94,11 +97,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::lrn_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::pooling_v2_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::pooling_v2_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::pooling_v2_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::pooling_v2_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::pooling_v2_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::pooling_v2_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -106,11 +109,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::pooling_v2_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::softmax_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::softmax_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::softmax_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::softmax_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::softmax_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::softmax_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -118,11 +121,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::softmax_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::vanilla_rnn_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::vanilla_rnn_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::vanilla_rnn_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::vanilla_rnn_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::vanilla_rnn_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::vanilla_rnn_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -130,11 +133,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::vanilla_rnn_forward::desc>()
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::lstm_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::lstm_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::lstm_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::lstm_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::lstm_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::lstm_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -142,11 +145,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::lstm_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::gru_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::gru_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::gru_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::gru_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::gru_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::gru_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -154,11 +157,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::gru_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::lbr_gru_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::lbr_gru_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::lbr_gru_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::lbr_gru_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::lbr_gru_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::lbr_gru_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -166,11 +169,11 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::lbr_gru_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::eltwise_forward::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::eltwise_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::eltwise_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::eltwise_forward::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::eltwise_forward::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::eltwise_forward::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
@ -178,14 +181,17 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::eltwise_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::matmul::desc> desc) {
DnnlDesriptor::DnnlDesriptor(std::shared_ptr<mkldnn::matmul::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::matmul::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::matmul::desc>() {
DnnlDesriptor::operator std::shared_ptr<mkldnn::matmul::desc>() {
auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::matmul::desc>>(desc);
if (typeDesc == nullptr) {
IE_THROW() << "Cannot cast descriptor!";
}
return typeDesc->getPtr();
}
} // namespace intel_cpu
} // namespace ov

View File

@ -8,48 +8,51 @@
#include <string>
#include "mkldnn/ie_mkldnn.h"
class MKLDNNDescriptor {
namespace ov {
namespace intel_cpu {
class DnnlDesriptor {
public:
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc);
operator std::shared_ptr<mkldnn::convolution_forward::desc>();
MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc> desc,
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> prim);
DnnlDesriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc> desc,
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> prim);
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::deconvolution_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::deconvolution_forward::desc> desc);
operator std::shared_ptr<mkldnn::deconvolution_forward::desc>();
operator std::shared_ptr<mkldnn::convolution_backward_data::desc>();
operator std::shared_ptr<mkldnn::convolution_forward::primitive_desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::inner_product_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::inner_product_forward::desc> desc);
operator std::shared_ptr<mkldnn::inner_product_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::lrn_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::lrn_forward::desc> desc);
operator std::shared_ptr<mkldnn::lrn_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::pooling_v2_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::pooling_v2_forward::desc> desc);
operator std::shared_ptr<mkldnn::pooling_v2_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::softmax_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::softmax_forward::desc> desc);
operator std::shared_ptr<mkldnn::softmax_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::vanilla_rnn_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::vanilla_rnn_forward::desc> desc);
operator std::shared_ptr<mkldnn::vanilla_rnn_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::lstm_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::lstm_forward::desc> desc);
operator std::shared_ptr<mkldnn::lstm_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::gru_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::gru_forward::desc> desc);
operator std::shared_ptr<mkldnn::gru_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::lbr_gru_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::lbr_gru_forward::desc> desc);
operator std::shared_ptr<mkldnn::lbr_gru_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::eltwise_forward::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::eltwise_forward::desc> desc);
operator std::shared_ptr<mkldnn::eltwise_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::matmul::desc> desc);
explicit DnnlDesriptor(std::shared_ptr<mkldnn::matmul::desc> desc);
operator std::shared_ptr<mkldnn::matmul::desc>();
mkldnn::primitive_desc_iterator createPrimitiveDescriptorIterator(const mkldnn::engine &engine,
@ -110,3 +113,6 @@ private:
std::shared_ptr<IDesc> desc;
};
} // namespace intel_cpu
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include "utils/general_utils.h"
#include <vector>
#include "memory_desc/dnnl_blocked_memory_desc.h"
using namespace mkldnn;
using namespace ov::intel_cpu;
uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) {
namespace ov {
namespace intel_cpu {
uint8_t DnnlExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) {
switch (dataType) {
case mkldnn::memory::data_type::f32:
return 4;
@ -31,7 +33,7 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType)
}
}
memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
switch (prec) {
case InferenceEngine::Precision::FP32:
return memory::data_type::f32;
@ -54,7 +56,7 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEng
}
}
InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
switch (dataType) {
case memory::data_type::f32:
return InferenceEngine::Precision::FP32;
@ -76,14 +78,14 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d
}
}
Dim MKLDNNExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
}
dnnl::memory::dim MKLDNNExtensionUtils::convertToDnnlDim(const Dim &dim) {
dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<mkldnn::memory::dim>(dim);
}
VectorDims MKLDNNExtensionUtils::convertToVectorDims(const memory::dims& dims) {
VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
std::vector<size_t> vecResult;
vecResult.reserve(dims.size());
std::back_insert_iterator<std::vector<size_t>> itr(vecResult);
@ -91,7 +93,7 @@ VectorDims MKLDNNExtensionUtils::convertToVectorDims(const memory::dims& dims) {
return vecResult;
}
memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const VectorDims& dims) {
memory::dims DnnlExtensionUtils::convertToDnnlDims(const VectorDims& dims) {
memory::dims vecResult;
vecResult.reserve(dims.size());
std::back_insert_iterator<memory::dims> itr(vecResult);
@ -99,7 +101,7 @@ memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const VectorDims& dims) {
return vecResult;
}
memory::format_tag MKLDNNExtensionUtils::GetPlainFormatByRank(size_t rank) {
memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
switch (rank) {
case 0:
case 1:
@ -119,7 +121,7 @@ memory::format_tag MKLDNNExtensionUtils::GetPlainFormatByRank(size_t rank) {
}
}
DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc) {
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc) {
if (desc.data.format_kind == dnnl_blocked) {
return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
} else {
@ -127,7 +129,7 @@ DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::des
}
}
size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc) {
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc) {
auto tmpDesc = desc;
const auto offset0 = tmpDesc.data.offset0;
tmpDesc.data.offset0 = 0;
@ -138,10 +140,13 @@ size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& d
return size;
}
std::shared_ptr<DnnlBlockedMemoryDesc> MKLDNNExtensionUtils::makeUndefinedDesc(const memory::desc &desc, const Shape &shape) {
std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(const memory::desc &desc, const Shape &shape) {
if (desc.data.format_kind == dnnl_blocked) {
return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc, shape));
} else {
IE_THROW(Unexpected) << "Cannot make undefined descriptor. Only dnnl_blocked type is allowed.";
}
}
} // namespace intel_cpu
} // namespace ov

View File

@ -4,7 +4,7 @@
/**
* @brief Convinience wrapper class for handling MKL-DNN memory formats.
* @file extension_utils.h
* @file dnnl_extension_utils.h
*/
#pragma once
@ -18,7 +18,7 @@ namespace intel_cpu {
class DnnlMemoryDesc;
class MKLDNNExtensionUtils {
class DnnlExtensionUtils {
public:
static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType);
static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec);

View File

@ -4,7 +4,7 @@
#include "edge.h"
#include "node.h"
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include <blob_factory.hpp>
#include "nodes/input.h"
@ -12,28 +12,28 @@ using namespace mkldnn;
namespace ov {
namespace intel_cpu {
MKLDNNEdge::MKLDNNEdge(const MKLDNNNodePtr &parent, const MKLDNNNodePtr &child, int pr_port, int ch_port) :
Edge::Edge(const NodePtr &parent, const NodePtr &child, int pr_port, int ch_port) :
parent(parent), child(child), parent_port(pr_port), child_port(ch_port) {}
const MKLDNNNodePtr MKLDNNEdge::getParent() const {
const NodePtr Edge::getParent() const {
auto parentPtr = parent.lock();
if (!parentPtr)
IE_THROW() << "Edge contains empty parent node";
return parentPtr;
}
const MKLDNNNodePtr MKLDNNEdge::getChild() const {
const NodePtr Edge::getChild() const {
auto childPtr = child.lock();
if (!childPtr)
IE_THROW() << "Edge contains empty child node";
return childPtr;
}
bool MKLDNNEdge::isUseExternalMemory() const {
bool Edge::isUseExternalMemory() const {
return useExternalMemory;
}
bool MKLDNNEdge::isDropped() const {
bool Edge::isDropped() const {
bool not_in_parent = true;
bool not_in_child = true;
@ -53,10 +53,10 @@ bool MKLDNNEdge::isDropped() const {
return not_in_parent && not_in_child;
}
void MKLDNNEdge::drop() {
auto _drop_from = [&] (std::vector<MKLDNNEdgeWeakPtr> &list) {
void Edge::drop() {
auto _drop_from = [&] (std::vector<EdgeWeakPtr> &list) {
auto myself = std::find_if(list.begin(), list.end(),
[&] (MKLDNNEdgeWeakPtr edge) { return edge.lock().get() == this; });
[&] (EdgeWeakPtr edge) { return edge.lock().get() == this; });
if (myself != list.end())
list.erase(myself);
@ -66,7 +66,7 @@ void MKLDNNEdge::drop() {
_drop_from(getChild()->parentEdges);
}
bool MKLDNNEdge::enforceReorder() {
bool Edge::enforceReorder() {
bool canBeInPlaceConflicts = false;
auto parentNode = getParent();
auto parentSPD = parentNode->getSelectedPrimitiveDescriptor();
@ -83,7 +83,7 @@ bool MKLDNNEdge::enforceReorder() {
childCanChangeMem = true;
}
const auto& detectInPlaceChildrenNum = [](const std::vector<MKLDNNEdgePtr>& edges) -> size_t {
const auto& detectInPlaceChildrenNum = [](const std::vector<EdgePtr>& edges) -> size_t {
size_t count = 0;
for (const auto& edge : edges) {
auto childSPD = edge->getChild()->getSelectedPrimitiveDescriptor();
@ -105,7 +105,7 @@ bool MKLDNNEdge::enforceReorder() {
for (auto &p_edge_peer : portChildEdges) {
if (p_edge_peer.get() == this)
continue;
if (p_edge_peer->getChild()->getType() != Reorder && p_edge_peer->inPlace(LOOK_DOWN))
if (p_edge_peer->getChild()->getType() != Type::Reorder && p_edge_peer->inPlace(LOOK_DOWN))
canBeInPlaceConflicts = true;
}
}
@ -126,7 +126,7 @@ bool MKLDNNEdge::enforceReorder() {
if ((childSPD->getImplementationType() & impl_desc_type::sse42) &&
Type::Input == parentNode->getType() &&
parentNode->isConstant()) {
if (auto pInputNode = std::dynamic_pointer_cast<MKLDNNInputNode>(parentNode)) {
if (auto pInputNode = std::dynamic_pointer_cast<node::Input>(parentNode)) {
auto rawMemPtr = pInputNode->getMemoryPtr()->GetData();
bool isAligned = (reinterpret_cast<uintptr_t>(rawMemPtr) & 15) == 0;
if (!isAligned) {
@ -217,7 +217,7 @@ static inline bool isPhycicalMemCompatible(const MemoryDesc& lhsMemDesc, const M
return true;
}
MKLDNNEdge::ReorderStatus MKLDNNEdge::needReorder() {
Edge::ReorderStatus Edge::needReorder() {
bool optimized = false;
auto inputPortDesc = getInputPortDesc();
auto outPortDesc = getOutputPortDesc();
@ -243,22 +243,22 @@ MKLDNNEdge::ReorderStatus MKLDNNEdge::needReorder() {
return ReorderStatus::No;
}
void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) {
void Edge::reuse(MemoryPtr ptr) {
if (status != Status::NeedAllocation)
return;
memoryPtr = ptr;
status = Status::Allocated;
}
int MKLDNNEdge::getInputNum() const {
int Edge::getInputNum() const {
return parent_port;
}
int MKLDNNEdge::getOutputNum() const {
int Edge::getOutputNum() const {
return child_port;
}
void MKLDNNEdge::allocate(const void* mem_ptr) {
void Edge::allocate(const void* mem_ptr) {
if (status != Status::NeedAllocation)
return;
@ -271,13 +271,13 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
IE_THROW() << "Cannot allocate memory for incompatible descriptors.";
auto parentPtr = getParent();
memoryPtr.reset(new MKLDNNMemory(parentPtr->getEngine()));
memoryPtr.reset(new Memory(parentPtr->getEngine()));
memoryPtr->Create(inputDesc, mem_ptr, false); // no pads zeroing
status = Status::Allocated;
}
std::string MKLDNNEdge::name() const {
std::string Edge::name() const {
auto parentPtr = getParent();
auto childPtr = getChild();
@ -288,10 +288,8 @@ std::string MKLDNNEdge::name() const {
return result.str();
}
void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
auto isInPlace = [](const MKLDNNNodePtr node, int port) -> bool {
void Edge::externalAllocate(WeightsSharing::Ptr weightsCache) {
auto isInPlace = [](const NodePtr node, int port) -> bool {
const auto& selected_pd = node->getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
@ -333,7 +331,7 @@ void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
}
}
void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) {
void Edge::changeStatus(Edge::Status state) {
if (state == Status::NotAllocated) {
IE_THROW() << "Incorrect behaviour! Use method sharedMemFrom()";
}
@ -347,7 +345,7 @@ void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) {
status = state;
}
PortDescBaseCPtr MKLDNNEdge::getInputPortDesc() const {
PortDescBaseCPtr Edge::getInputPortDesc() const {
auto parentPtr = getParent();
if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr)
IE_THROW() << "Primitive descriptor for node " << parentPtr->getName() << " is not selected.";
@ -371,7 +369,7 @@ PortDescBaseCPtr MKLDNNEdge::getInputPortDesc() const {
return inputPortDesc;
}
PortDescBaseCPtr MKLDNNEdge::getOutputPortDesc() const {
PortDescBaseCPtr Edge::getOutputPortDesc() const {
auto childPtr = getChild();
if (childPtr->getSelectedPrimitiveDescriptor() == nullptr)
@ -396,7 +394,7 @@ PortDescBaseCPtr MKLDNNEdge::getOutputPortDesc() const {
return outPortDesc;
}
const MemoryDesc& MKLDNNEdge::getInputDesc() const {
const MemoryDesc& Edge::getInputDesc() const {
auto memDescPtr = getInputPortDesc()->getMemDesc();
if (!memDescPtr) {
IE_THROW() << "Cannot get input memory descriptor for edge: " << getParent()->getName() << "->"
@ -405,7 +403,7 @@ const MemoryDesc& MKLDNNEdge::getInputDesc() const {
return *memDescPtr;
}
const MemoryDesc& MKLDNNEdge::getOutputDesc() const {
const MemoryDesc& Edge::getOutputDesc() const {
auto memDescPtr = getOutputPortDesc()->getMemDesc();
if (!memDescPtr) {
IE_THROW() << "Cannot get output memory descriptor for edge: " << getParent()->getName() << "->"
@ -414,7 +412,7 @@ const MemoryDesc& MKLDNNEdge::getOutputDesc() const {
return *memDescPtr;
}
const MemoryDesc& MKLDNNEdge::getDesc() const {
const MemoryDesc& Edge::getDesc() const {
if (!getInputDesc().isCompatible(getOutputDesc()))
IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->"
<< getChild()->getName();
@ -422,13 +420,13 @@ const MemoryDesc& MKLDNNEdge::getDesc() const {
return getInputDesc();
}
const MKLDNNMemory &MKLDNNEdge::getMemory() {
const Memory &Edge::getMemory() {
return *getMemoryPtr();
}
MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() {
MemoryPtr &Edge::getMemoryPtr() {
if (status == Status::NotAllocated) {
memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine()));
memoryPtr.reset(new Memory(getParent()->getEngine()));
const auto &desc = getDesc();
auto sharedEdge = getSharedEdge();
auto sharedEdgeParent = sharedEdge->getParent();
@ -444,12 +442,12 @@ MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() {
return memoryPtr;
}
void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) {
void Edge::sharedMemFrom(const EdgePtr &edge) {
memoryFromEdge = edge;
status = Status::NotAllocated;
}
void MKLDNNEdge::validate() {
void Edge::validate() {
if (status == Status::Validated)
return;
getMemory();
@ -462,7 +460,7 @@ void MKLDNNEdge::validate() {
status = Status::Validated;
}
MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const {
EdgePtr Edge::getSharedEdge() const {
auto memoryFromEdgePtr = memoryFromEdge.lock();
if (!memoryFromEdgePtr) {
IE_THROW() << "Cannot get memory ptr for edge( " << name() << " ). The pointer on the edge with memory is empty!";
@ -470,14 +468,14 @@ MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const {
return memoryFromEdgePtr;
}
MKLDNNEdgePtr MKLDNNEdge::getSharedEdge(std::nothrow_t) const {
EdgePtr Edge::getSharedEdge(std::nothrow_t) const {
return memoryFromEdge.lock();
}
void MKLDNNEdge::init() {
void Edge::init() {
if (status != Status::NeedAllocation && status != Status::Uninitialized)
return;
MKLDNNEdgePtr edgePtr = getBaseEdge();
EdgePtr edgePtr = getBaseEdge();
if (edgePtr.get() == this) {
changeStatus(Status::NeedAllocation);
} else {
@ -511,7 +509,7 @@ void MKLDNNEdge::init() {
* @param type some magic enum values... description needed
* @return root of view-on-memory subgraph
*/
MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(int look) {
EdgePtr Edge::getBaseEdge(int look) {
auto parentConfig = getParent()->getSelectedPrimitiveDescriptor()->getConfig();
auto childConfig = getChild()->getSelectedPrimitiveDescriptor()->getConfig();
int inputNum = getInputNum();
@ -562,7 +560,7 @@ MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(int look) {
return edges_for_same_port[0];
}
bool MKLDNNEdge::inPlace(LOOK look) {
bool Edge::inPlace(LOOK look) {
auto parentSPD = getParent()->getSelectedPrimitiveDescriptor();
auto childSPD = getChild()->getSelectedPrimitiveDescriptor();
if (!parentSPD || !childSPD)

View File

@ -17,17 +17,17 @@
namespace ov {
namespace intel_cpu {
class MKLDNNNode;
class MKLDNNEdge;
class Node;
class Edge;
using MKLDNNEdgePtr = std::shared_ptr<MKLDNNEdge>;
using MKLDNNEdgeWeakPtr = std::weak_ptr<MKLDNNEdge>;
using EdgePtr = std::shared_ptr<Edge>;
using EdgeWeakPtr = std::weak_ptr<Edge>;
class MKLDNNEdge {
class Edge {
public:
MKLDNNEdge(const std::shared_ptr<MKLDNNNode>& parent,
const std::shared_ptr<MKLDNNNode>& child,
int pr_port = 0, int ch_port = 0);
Edge(const std::shared_ptr<Node>& parent,
const std::shared_ptr<Node>& child,
int pr_port = 0, int ch_port = 0);
enum class Status {
Uninitialized,
@ -51,16 +51,16 @@ public:
void init();
void allocate(const void* mem_ptr = nullptr);
void externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache);
void reuse(MKLDNNMemoryPtr ptr);
void externalAllocate(WeightsSharing::Ptr weightsCache);
void reuse(MemoryPtr ptr);
void validate();
void drop();
const std::shared_ptr<MKLDNNNode> getParent() const;
const std::shared_ptr<MKLDNNNode> getChild() const;
const std::shared_ptr<Node> getParent() const;
const std::shared_ptr<Node> getChild() const;
const MKLDNNMemory& getMemory();
MKLDNNMemoryPtr& getMemoryPtr();
const Memory& getMemory();
MemoryPtr& getMemoryPtr();
ReorderStatus needReorder();
bool isDropped() const;
@ -71,9 +71,9 @@ public:
void setChildPort(const size_t port) { child_port = port; }
void sharedMemFrom(const MKLDNNEdgePtr& edge);
MKLDNNEdgePtr getSharedEdge() const;
MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
void sharedMemFrom(const EdgePtr& edge);
EdgePtr getSharedEdge() const;
EdgePtr getSharedEdge(std::nothrow_t) const;
bool hasDefinedMaxSize() const {
return getDesc().hasDefinedMaxSize();
@ -82,14 +82,14 @@ public:
private:
std::string name() const;
std::weak_ptr<MKLDNNNode> parent;
std::weak_ptr<MKLDNNNode> child;
std::weak_ptr<Node> parent;
std::weak_ptr<Node> child;
int parent_port;
int child_port;
bool useExternalMemory = false;
MKLDNNEdgeWeakPtr memoryFromEdge;
MKLDNNMemoryPtr memoryPtr;
EdgeWeakPtr memoryFromEdge;
MemoryPtr memoryPtr;
Status status = Status::Uninitialized;
const MemoryDesc& getInputDesc() const;
@ -102,9 +102,9 @@ private:
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 };
MKLDNNEdgePtr getBaseEdge(int look = LOOK_BOTH);
EdgePtr getBaseEdge(int look = LOOK_BOTH);
bool inPlace(LOOK look = LOOK_BOTH);
friend class MKLDNNGraph;
friend class Graph;
};
} // namespace intel_cpu

View File

@ -49,7 +49,7 @@ struct jit_snippets_compile_args {
/// \param in[0] The number of the node inputs
/// \param in[1] The number of the node outputs
///
// Todo: Scheduler dims and offsets are currently calculated in MKLDNN Subgraph node and passed to the KernelEmitter.
// Todo: Scheduler dims and offsets are currently calculated in Subgraph node and passed to the KernelEmitter.
// However, it seems more natural to calculate all the offsets right in the Kernel op, because the calculation is
// not device-specific. It is based only on input/output dims (which we already know) and harness num dims
// (which we should pass from the plugin). It seems also better to wrap the enclosed emitters in tiles in the Kernel op

View File

@ -33,25 +33,27 @@
#include <utility>
#include <cstring>
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace ov {
namespace intel_cpu {
InferenceEngine::IInferRequestInternal::Ptr
MKLDNNExecNetwork::CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
ExecNetwork::CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
if (!this->_plugin)
return nullptr;
const auto& core = _plugin->GetCore();
if (!core || !core->isNewAPI())
return nullptr;
return std::make_shared<MKLDNNInferRequest>(inputs, outputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this()));
return std::make_shared<InferRequest>(inputs, outputs, std::static_pointer_cast<ExecNetwork>(shared_from_this()));
}
InferenceEngine::IInferRequestInternal::Ptr
MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
return std::make_shared<MKLDNNLegacyInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this()));
ExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
return std::make_shared<LegacyInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecNetwork>(shared_from_this()));
}
struct ImmediateSerialExecutor : public ITaskExecutor {
@ -62,11 +64,11 @@ struct ImmediateSerialExecutor : public ITaskExecutor {
std::mutex _mutex;
};
MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
const Config &cfg,
const MKLDNNExtensionManager::Ptr& extMgr,
NumaNodesWeights &numaNodesWeights,
const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin) :
ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network,
const Config &cfg,
const ExtensionManager::Ptr& extMgr,
NumaNodesWeights &numaNodesWeights,
const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin) :
InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr},
extensionManager(extMgr),
_cfg{cfg},
@ -92,7 +94,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
} else if (_cfg.batchLimit > 1) {
// check topology for applicability
if (!CanProcessDynBatch(_network)) {
IE_THROW() << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
IE_THROW() << "Graph::CreateGraph: such topology cannot be compiled for dynamic batch!";
}
}
@ -126,12 +128,12 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
if (_cfg.streamExecutorConfig._streams != 0) {
for (auto&& task : tasks) {
task = [this] {
MKLDNNExecNetwork::GetGraph();
ExecNetwork::GetGraph();
};
}
_taskExecutor->runAndWait(tasks);
} else {
MKLDNNExecNetwork::GetGraph();
ExecNetwork::GetGraph();
}
// Save all MemoryLayer data tensors. Will use insight about mechanics
@ -139,10 +141,10 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
// producer as storage for tensor to keep it between infer calls.
if (_graphs.size() == 1) {
for (auto &node : GetGraph()._graph.GetNodes()) {
if (node->getType() == MemoryInput) {
auto memoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
if (node->getType() == Type::MemoryInput) {
auto memoryNode = dynamic_cast<node::MemoryInput*>(node.get());
if (!memoryNode) {
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput";
}
auto state_store = memoryNode->getStore();
auto state_name = memoryNode->getId();
@ -152,13 +154,13 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
if (suffix_idx != std::string::npos)
state_name = state_name.substr(0, suffix_idx);
memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store));
memoryStates.emplace_back(new VariableState(state_name, state_store));
}
}
}
}
MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const {
ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const {
int streamId = 0;
int numaNodeId = 0;
auto streamsExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(_taskExecutor.get());
@ -166,7 +168,7 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const {
streamId = streamsExecutor->GetStreamId();
numaNodeId = streamsExecutor->GetNumaNodeId();
}
auto graphLock = Graph::Lock(_graphs[streamId % _graphs.size()]);
auto graphLock = GraphGuard::Lock(_graphs[streamId % _graphs.size()]);
if (!graphLock._graph.IsReady()) {
std::exception_ptr exception;
auto makeGraph = [&] {
@ -192,31 +194,31 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const {
return graphLock;
}
void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
void ExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
{
std::lock_guard<std::mutex> lock{_cfgMutex};
_cfg.readProperties(properties);
}
for (auto& g : _graphs) {
auto graphLock = Graph::Lock(g);
auto graphLock = GraphGuard::Lock(g);
if (graphLock._graph.IsReady()) {
graphLock._graph.setProperty(properties);
}
}
}
InferenceEngine::IInferRequestInternal::Ptr MKLDNNExecNetwork::CreateInferRequest() {
return CreateAsyncInferRequestFromSync<MKLDNNAsyncInferRequest>();
InferenceEngine::IInferRequestInternal::Ptr ExecNetwork::CreateInferRequest() {
return CreateAsyncInferRequestFromSync<AsyncInferRequest>();
}
std::shared_ptr<ngraph::Function> MKLDNNExecNetwork::GetExecGraphInfo() {
std::shared_ptr<ngraph::Function> ExecNetwork::GetExecGraphInfo() {
if (_graphs.empty())
IE_THROW() << "No graph was found";
return GetGraph()._graph.dump();
}
bool MKLDNNExecNetwork::isLegacyAPI() const {
bool ExecNetwork::isLegacyAPI() const {
const auto& core = _plugin->GetCore();
if (!core)
IE_THROW() << "Unable to get API version. Core is unavailable";
@ -224,7 +226,7 @@ bool MKLDNNExecNetwork::isLegacyAPI() const {
return !core->isNewAPI();
}
Parameter MKLDNNExecNetwork::GetConfigLegacy(const std::string &name) const {
Parameter ExecNetwork::GetConfigLegacy(const std::string &name) const {
if (_graphs.empty())
IE_THROW() << "No graph was found";
/* legacy implementation return all the parameters which is actually not correct
@ -244,13 +246,13 @@ Parameter MKLDNNExecNetwork::GetConfigLegacy(const std::string &name) const {
* All the RO properties are covered with GetMetric() method and
* GetConfig() is not expected to be called by new API with params from new configuration API.
*/
Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const {
Parameter ExecNetwork::GetConfig(const std::string &name) const {
/* Internally legacy parameters are used with new API as part of migration procedure.
* This fallback can be removed as soon as migration completed */
return GetConfigLegacy(name);
}
InferenceEngine::Parameter MKLDNNExecNetwork::GetMetricLegacy(const std::string &name, const Graph& graph) const {
InferenceEngine::Parameter ExecNetwork::GetMetricLegacy(const std::string &name, const GraphGuard& graph) const {
if (name == METRIC_KEY(NETWORK_NAME)) {
IE_SET_METRIC_RETURN(NETWORK_NAME, graph.dump()->get_friendly_name());
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
@ -278,7 +280,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetricLegacy(const std::string
}
}
InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name) const {
InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const {
if (_graphs.empty())
IE_THROW() << "No graph was found";
// @todo Can't we just use local copy (_cfg) instead?
@ -354,7 +356,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
return GetMetricLegacy(name, graph);
}
bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Model> function, int64_t& maxBatchSize) const {
bool ExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Model> function, int64_t& maxBatchSize) const {
maxBatchSize = -1;
auto isDynBatchWithUpperBound = [maxBatchSize](const ov::PartialShape& shape) -> bool {
if (shape.rank().is_dynamic()) {
@ -401,20 +403,20 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Mod
}
auto type = TypeFromName(op->get_type_name());
if (!one_of(type, Input,
Output,
Convolution,
Deconvolution,
Lrn,
Pooling,
FullyConnected,
MatMul,
Softmax,
Split,
Concatenation,
Eltwise,
Reshape,
Tile)) {
if (!one_of(type, Type::Input,
Type::Output,
Type::Convolution,
Type::Deconvolution,
Type::Lrn,
Type::Pooling,
Type::FullyConnected,
Type::MatMul,
Type::Softmax,
Type::Split,
Type::Concatenation,
Type::Eltwise,
Type::Reshape,
Type::Tile)) {
return false;
}
@ -424,7 +426,7 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Mod
}
}
if (type == Tile) {
if (type == Type::Tile) {
const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
if (!(tile && repeatsNode && repeatsNode->cast_vector<int64_t>()[0] == 1)) {
@ -432,7 +434,7 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Mod
}
}
if (type == Reshape) {
if (type == Type::Reshape) {
const auto inShape = op->get_input_partial_shape(0);
const auto outShape = op->get_output_partial_shape(0);
if (isDynBatchWithUpperBound(inShape) && isDynBatchWithUpperBound(outShape)) {
@ -452,34 +454,34 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Mod
}
}
if (type == Split) {
if (type == Type::Split) {
const auto axis = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
if (!axis || axis->cast_vector<int64_t>()[0] == 0) {
return false;
}
}
if (type == Concatenation) {
if (type == Type::Concatenation) {
const auto concat = std::dynamic_pointer_cast<const ngraph::op::v0::Concat>(op);
if (!concat || concat->get_axis() == 0) {
return false;
}
}
if (type == Softmax) {
if (type == Type::Softmax) {
const auto softmax = std::dynamic_pointer_cast<const ngraph::opset1::Softmax>(op);
if (!softmax || softmax->get_axis() == 0) {
return false;
}
}
if ((type == MatMul || type == FullyConnected) &&
if ((type == Type::MatMul || type == Type::FullyConnected) &&
(op->get_input_node_ptr(1)->get_type_info() != ngraph::op::Constant::get_type_info_static() ||
op->get_input_partial_shape(0).rank().get_length() < 2)) {
return false;
}
if (type == Eltwise && std::dynamic_pointer_cast<ov::op::util::BinaryElementwiseArithmetic>(op) &&
if (type == Type::Eltwise && std::dynamic_pointer_cast<ov::op::util::BinaryElementwiseArithmetic>(op) &&
!(op->get_input_node_ptr(0)->get_type_info() == ngraph::op::Constant::get_type_info_static() ||
op->get_input_node_ptr(1)->get_type_info() == ngraph::op::Constant::get_type_info_static()) &&
op->get_input_partial_shape(0).rank().get_length() != op->get_input_partial_shape(1).rank().get_length()) {
@ -489,7 +491,7 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Mod
return true;
}
bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const {
bool ExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const {
InputsDataMap inputs = network.getInputsInfo();
if (inputs.empty())
@ -503,7 +505,7 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &ne
auto ops = function->get_ordered_ops();
for (const auto& op : ops) {
auto type = TypeFromName(op->get_type_name());
if (type == Tile) {
if (type == Type::Tile) {
const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
if (!repeatsNode)
return false;
@ -512,23 +514,23 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &ne
continue;
}
if (type == Reshape) {
if (type == Type::Reshape) {
if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
continue;
}
if (type != Input &&
type != Output &&
type != Convolution &&
type != Deconvolution &&
type != Lrn &&
type != Pooling &&
type != FullyConnected &&
type != MatMul &&
type != Softmax &&
type != Split &&
type != Concatenation &&
type != Eltwise) {
if (type != Type::Input &&
type != Type::Output &&
type != Type::Convolution &&
type != Type::Deconvolution &&
type != Type::Lrn &&
type != Type::Pooling &&
type != Type::FullyConnected &&
type != Type::MatMul &&
type != Type::Softmax &&
type != Type::Split &&
type != Type::Concatenation &&
type != Type::Eltwise) {
return false;
}
}
@ -536,7 +538,10 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &ne
return true;
}
void MKLDNNExecNetwork::Export(std::ostream& modelStream) {
void ExecNetwork::Export(std::ostream& modelStream) {
CNNNetworkSerializer serializer(modelStream, extensionManager);
serializer <<_network;
}
} // namespace intel_cpu
} // namespace ov

View File

@ -20,9 +20,9 @@
namespace ov {
namespace intel_cpu {
class MKLDNNExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault {
class ExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
typedef std::shared_ptr<MKLDNNExecNetwork> Ptr;
typedef std::shared_ptr<ExecNetwork> Ptr;
std::shared_ptr<InferenceEngine::IInferRequestInternal>
CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
@ -34,9 +34,9 @@ public:
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg,
const MKLDNNExtensionManager::Ptr &extMgr, NumaNodesWeights &weightsSharing,
const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin);
ExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg,
const ExtensionManager::Ptr &extMgr, NumaNodesWeights &weightsSharing,
const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin);
void setProperty(const std::map<std::string, std::string> &properties);
@ -49,31 +49,31 @@ public:
void Export(std::ostream& modelStream) override;
protected:
friend class MKLDNNInferRequestBase;
MKLDNNExtensionManager::Ptr extensionManager;
friend class InferRequestBase;
ExtensionManager::Ptr extensionManager;
std::vector<InferenceEngine::IVariableStateInternal::Ptr> memoryStates;
const InferenceEngine::CNNNetwork _network;
mutable std::mutex _cfgMutex;
Config _cfg;
std::atomic_int _numRequests = {0};
std::string _name;
struct Graph : public MKLDNNGraph {
struct GraphGuard : public Graph {
std::mutex _mutex;
struct Lock : public std::unique_lock<std::mutex> {
explicit Lock(Graph& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
Graph& _graph;
explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
GraphGuard& _graph;
};
};
// WARNING: Do not use _graphs directly.
mutable std::deque<Graph> _graphs;
mutable std::deque<GraphGuard> _graphs;
NumaNodesWeights& _numaNodesWeights;
/* WARNING: Use GetGraph() function to get access to graph in current stream.
* NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs
* even from main thread
*/
Graph::Lock GetGraph() const;
GraphGuard::Lock GetGraph() const;
bool canBeExecViaLegacyDynBatch(std::shared_ptr<const ov::Model> function, int64_t& maxBatchSize) const;
bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const;
@ -82,7 +82,7 @@ protected:
InferenceEngine::Parameter GetConfigLegacy(const std::string &name) const;
InferenceEngine::Parameter GetMetricLegacy(const std::string &name, const Graph& graph) const;
InferenceEngine::Parameter GetMetricLegacy(const std::string &name, const GraphGuard& graph) const;
};
} // namespace intel_cpu

View File

@ -18,19 +18,19 @@
namespace ov {
namespace intel_cpu {
void MKLDNNExtension::GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept {
void Extension::GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept {
static const InferenceEngine::Version version = {
{1, 0}, // extension API version
"1.0",
"MKLDNNExtension" // extension description message
"Extension" // extension description message
};
versionInfo = &version;
}
void MKLDNNExtension::Unload() noexcept {}
void Extension::Unload() noexcept {}
std::map<std::string, ngraph::OpSet> MKLDNNExtension::getOpSets() {
std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
auto cpu_plugin_opset = []() {
ngraph::OpSet opset;
@ -119,11 +119,11 @@ std::map<std::string, ngraph::OpSet> MKLDNNExtension::getOpSets() {
return opsets;
}
std::vector<std::string> MKLDNNExtension::getImplTypes(const std::shared_ptr<ngraph::Node>&) {
std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::Node>&) {
return {};
}
InferenceEngine::ILayerImpl::Ptr MKLDNNExtension::getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) {
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) {
return nullptr;
}
@ -131,4 +131,4 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtension::getImplementation(const std::s
} // namespace ov
// Generate exported function
IE_DEFINE_EXTENSION_CREATE_FUNCTION(ov::intel_cpu::MKLDNNExtension)
IE_DEFINE_EXTENSION_CREATE_FUNCTION(ov::intel_cpu::Extension)

View File

@ -9,7 +9,7 @@
namespace ov {
namespace intel_cpu {
class MKLDNNExtension : public InferenceEngine::IExtension {
class Extension : public InferenceEngine::IExtension {
public:
void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override;
void Unload() noexcept override;

View File

@ -8,14 +8,16 @@
#include "extension_mngr.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
void MKLDNNExtensionManager::AddExtension(const IExtensionPtr& extension) {
namespace ov {
namespace intel_cpu {
void ExtensionManager::AddExtension(const IExtensionPtr& extension) {
_extensions.push_back(extension);
}
InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(const std::shared_ptr<ngraph::Node>& op) {
InferenceEngine::ILayerImpl::Ptr ExtensionManager::CreateImplementation(const std::shared_ptr<ngraph::Node>& op) {
if (!op)
IE_THROW() << "Cannot get nGraph operation!";
for (const auto& ext : _extensions) {
@ -31,27 +33,9 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(co
return nullptr;
}
std::shared_ptr<InferenceEngine::ILayerImplFactory> MKLDNNExtensionManager::CreateExtensionFactory(const std::shared_ptr<ngraph::Node>& op) {
std::shared_ptr<ILayerImplFactory> factory;
for (auto& ext : _extensions) {
ResponseDesc responseDesc;
StatusCode rc = GENERAL_ERROR;
ILayerImplFactory* factory_ptr = nullptr;
if (auto mkldnnExt = dynamic_cast<Extensions::Cpu::MKLDNNExtensions*>(ext.get()))
rc = mkldnnExt->getFactoryFor(factory_ptr, op, &responseDesc);
if (rc != OK) {
factory = nullptr;
continue;
} else {
factory.reset(factory_ptr);
}
if (factory) {
break;
}
}
return factory;
}
const std::vector<InferenceEngine::IExtensionPtr> & MKLDNNExtensionManager::Extensions() const {
const std::vector<InferenceEngine::IExtensionPtr> & ExtensionManager::Extensions() const {
return _extensions;
}
} // namespace intel_cpu
} // namespace ov

View File

@ -8,17 +8,15 @@
#include <vector>
#include <memory>
#include <ie_iextension.h>
#include "nodes/list.hpp"
namespace ov {
namespace intel_cpu {
class MKLDNNExtensionManager {
class ExtensionManager {
public:
using Ptr = std::shared_ptr<MKLDNNExtensionManager>;
MKLDNNExtensionManager() = default;
using Ptr = std::shared_ptr<ExtensionManager>;
ExtensionManager() = default;
InferenceEngine::ILayerImpl::Ptr CreateImplementation(const std::shared_ptr<ngraph::Node>& op);
std::shared_ptr<InferenceEngine::ILayerImplFactory> CreateExtensionFactory(const std::shared_ptr<ngraph::Node>& op);
void AddExtension(const InferenceEngine::IExtensionPtr& extension);
const std::vector<InferenceEngine::IExtensionPtr> & Extensions() const;
@ -28,4 +26,3 @@ private:
} // namespace intel_cpu
} // namespace ov

View File

@ -17,7 +17,7 @@
#include "graph.h"
#include "graph_dumper.h"
#include "graph_optimizer.h"
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include "extension_mngr.h"
#include "memory_solver.hpp"
#include "itt.h"
@ -51,19 +51,21 @@
#include "memory_desc/dnnl_blocked_memory_desc.h"
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
typedef std::unordered_set<MKLDNNEdgePtr> edge_cluster_t;
namespace ov {
namespace intel_cpu {
typedef std::unordered_set<EdgePtr> edge_cluster_t;
typedef std::vector<edge_cluster_t> edge_clusters_t;
mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0);
mkldnn::engine Graph::eng(mkldnn::engine::kind::cpu, 0);
template<typename NET>
void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
MKLDNNWeightsSharing::Ptr &w_cache) {
OV_ITT_SCOPE(FIRST_INFERENCE, ov::intel_cpu::itt::domains::intel_cpu_LT, "CreateGraph");
void Graph::CreateGraph(NET &net, const ExtensionManager::Ptr& extMgr,
WeightsSharing::Ptr &w_cache) {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "CreateGraph");
if (IsReady())
ForgetGraphData();
@ -80,9 +82,9 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg
CPU_DEBUG_CAP_ENABLE(serialize(*this));
}
void MKLDNNGraph::CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
const std::vector<MKLDNNEdgePtr> &graphEdges,
MKLDNNWeightsSharing::Ptr &w_cache,
void Graph::CreateGraph(const std::vector<NodePtr> &graphNodes,
const std::vector<EdgePtr> &graphEdges,
WeightsSharing::Ptr &w_cache,
std::string name) {
if (IsReady())
ForgetGraphData();
@ -112,12 +114,12 @@ void MKLDNNGraph::CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
CPU_DEBUG_CAP_ENABLE(serialize(*this));
}
template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
template void MKLDNNGraph::CreateGraph(const CNNNetwork&,
const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
template void Graph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
const ExtensionManager::Ptr&, WeightsSharing::Ptr&);
template void Graph::CreateGraph(const CNNNetwork&,
const ExtensionManager::Ptr&, WeightsSharing::Ptr&);
void MKLDNNGraph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, const ExtensionManager::Ptr& extMgr) {
this->_name = "subgraph";
this->reuse_io_tensors = false;
@ -125,7 +127,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, co
ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(subgraph);
// Map data object onto producer node
std::map<std::shared_ptr<ov::Node>, MKLDNNNodePtr> op2node;
std::map<std::shared_ptr<ov::Node>, NodePtr> op2node;
// nodes which has no consumers (output or just unused). But doesn't marked as graph output.
// Will be stored as fake output separately.
@ -143,7 +145,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, co
};
for (const auto op : subgraph->get_ordered_ops()) {
const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)};
const NodePtr node {Node::factory().create(op, getEngine(), extMgr, weightsCache)};
if (isQuantized()) {
node->setQuantizedGraphFlag(true);
}
@ -168,12 +170,12 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, co
auto parentOp = op->get_input_node_shared_ptr(port);
auto parentNode = op2node[parentOp];
MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast<int>(port)));
EdgePtr edge(new Edge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast<int>(port)));
node->addEdge(edge);
graphEdges.push_back(edge);
}
if (!ov::intel_cpu::one_of(op->get_type_info(),
if (!one_of(op->get_type_info(),
ngraph::op::v0::Result::get_type_info_static(),
ngraph::op::v3::Assign::get_type_info_static(),
ngraph::op::v6::Assign::get_type_info_static())) {
@ -190,18 +192,18 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, co
auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
const auto port = unusedOutput.get_index();
const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outputShapes[port],
const NodePtr outNode = std::make_shared<node::Input>(parentNode->outputShapes[port],
parentNode->getOriginalOutputPrecisionAtPort(port),
nodeName, "Result", getEngine(), weightsCache);
MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
EdgePtr edge(new Edge(parentNode, outNode, port, 0));
outNode->addEdge(edge);
graphEdges.push_back(edge);
graphNodes.push_back(outNode);
}
}
void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& extMgr) {
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "CNNNetwork");
InputsDataMap inputsInfo = network.getInputsInfo();
OutputsDataMap outputsInfo = network.getOutputsInfo();
@ -237,7 +239,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
auto orderedOps = func->get_ordered_ops();
// TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node
std::map<std::shared_ptr<ngraph::Node>, MKLDNNNodePtr> op2node;
std::map<std::shared_ptr<ngraph::Node>, NodePtr> op2node;
std::deque<ngraph::Output<ngraph::Node>> unusedOutputs; // nodes which has no consumers (output or just unused)
auto getParentOutputPort = [](const std::shared_ptr<ngraph::Node> childOp, const std::shared_ptr<ngraph::Node> parentOp,
@ -255,7 +257,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
// Replicate All Nodes in topological order
for (const auto& op : orderedOps) {
const MKLDNNNodePtr node(MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache));
const NodePtr node(Node::factory().create(op, getEngine(), extMgr, weightsCache));
if (isQuantized()) {
node->setQuantizedGraphFlag(true);
}
@ -287,12 +289,12 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
auto parentOp = op->get_input_node_shared_ptr(port);
auto parentNode = op2node[parentOp];
MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast<int>(port)));
EdgePtr edge(new Edge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast<int>(port)));
node->addEdge(edge);
graphEdges.push_back(edge);
}
if (!ov::intel_cpu::one_of(op->get_type_info(),
if (!one_of(op->get_type_info(),
ngraph::op::v0::Result::get_type_info_static(),
ngraph::op::v3::Assign::get_type_info_static(),
ngraph::op::v6::Assign::get_type_info_static())) {
@ -309,10 +311,10 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
const auto port = unusedOutput.get_index();
const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outputShapes[port],
const NodePtr outNode = std::make_shared<node::Input>(parentNode->outputShapes[port],
parentNode->getOriginalOutputPrecisionAtPort(port),
nodeName, "Result", getEngine(), weightsCache);
MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
EdgePtr edge(new Edge(parentNode, outNode, port, 0));
outNode->addEdge(edge);
graphEdges.push_back(edge);
graphNodes.push_back(outNode);
@ -321,10 +323,10 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
if (config.enforceBF16)
EnforceBF16();
auto hasSubgraphConsumers = [] (const MKLDNNNodePtr& node) -> bool {
auto hasSubgraphConsumers = [] (const NodePtr& node) -> bool {
const auto & childEdges = node->getChildEdges();
return std::any_of(childEdges.begin(), childEdges.end(),
[] (const MKLDNNEdgeWeakPtr& edge) -> bool {
[] (const EdgeWeakPtr& edge) -> bool {
auto edgePtr = edge.lock();
if (!edgePtr)
return false;
@ -372,8 +374,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
}
}
void MKLDNNGraph::InitGraph() {
MKLDNNGraphOptimizer optimizer;
void Graph::InitGraph() {
GraphOptimizer optimizer;
SortTopologically();
InitNodes();
@ -404,19 +406,19 @@ void MKLDNNGraph::InitGraph() {
ExecuteConstantNodesOnly();
}
void MKLDNNGraph::InitNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::InitNodes");
void Graph::InitNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::InitNodes");
for (auto &node : graphNodes) {
node->init();
}
}
void MKLDNNGraph::InitDescriptors() {
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, ov::intel_cpu::itt::domains::intel_cpu_LT, "InitDescriptors", "Prepare");
void Graph::InitDescriptors() {
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "InitDescriptors", "Prepare");
for (auto &node : graphNodes) {
if (node->getType() == Input && _normalizePreprocMap.find(node->getName()) != _normalizePreprocMap.end()) {
auto *inputNode = dynamic_cast<MKLDNNInputNode *>(node.get());
if (node->getType() == Type::Input && _normalizePreprocMap.find(node->getName()) != _normalizePreprocMap.end()) {
auto *inputNode = dynamic_cast<node::Input *>(node.get());
if (inputNode)
inputNode->withMeanImage();
}
@ -436,16 +438,16 @@ void MKLDNNGraph::InitDescriptors() {
}
}
void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "MKLDNNGraph::InitOptimalPrimitiveDescriptors");
void Graph::InitOptimalPrimitiveDescriptors() {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::InitOptimalPrimitiveDescriptors");
for (auto &node : graphNodes) {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.initOptimalPrimitiveDescriptor);
node->initOptimalPrimitiveDescriptor();
}
}
void MKLDNNGraph::ExtractConstantAndExecutableNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::ExtractConstantAndExecutableNodes");
void Graph::ExtractConstantAndExecutableNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExtractConstantAndExecutableNodes");
for (const auto& graphNode : graphNodes) {
if (graphNode->isConstant()) {
constantGraphNodes.emplace_back(graphNode);
@ -460,13 +462,13 @@ void MKLDNNGraph::ExtractConstantAndExecutableNodes() {
}
}
void MKLDNNGraph::ExecuteConstantNodesOnly() const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
void Graph::ExecuteConstantNodesOnly() const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExecuteConstantNodesOnly");
mkldnn::stream stream(eng);
using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr;
using shared_memory_ptr = WeightsSharing::SharedMemory::Ptr;
auto acquireSharedOutputs = [this](const MKLDNNNodePtr & node) {
auto acquireSharedOutputs = [this](const NodePtr & node) {
std::vector<shared_memory_ptr> outputs;
bool hasLocalAllocatedEdges = false;
bool hasExternalInvalidEdges = false;
@ -523,8 +525,8 @@ static bool isReorderAvailable(const MemoryDescPtr& parentDesc, const MemoryDesc
return mkldnn_success == status;
}
void MKLDNNGraph::InitEdges() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::InitEdges");
void Graph::InitEdges() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::InitEdges");
size_t numberOfEdges = graphEdges.size();
@ -533,9 +535,9 @@ void MKLDNNGraph::InitEdges() {
uniqueLayerNames.insert(node->getName());
}
auto insertReorder = [&](MKLDNNEdgePtr& edge, bool isOptimized) {
auto insertReorder = [&](EdgePtr& edge, bool isOptimized) {
std::string basicLayerName = edge->getParent()->getName() + "_" +
MKLDNNReorderNode::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
node::Reorder::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
edge->getChild()->getName();
std::string layerName = basicLayerName;
int idx = 0;
@ -558,8 +560,8 @@ void MKLDNNGraph::InitEdges() {
for (auto i = 0; i < numberOfEdges; i++) {
auto edge = graphEdges[i];
auto reorderStatus = graphEdges[i]->needReorder();
if (reorderStatus == MKLDNNEdge::ReorderStatus::Regular) {
MKLDNNEdge::ReorderStatus reorderStatusInternal = MKLDNNEdge::ReorderStatus::Regular;
if (reorderStatus == Edge::ReorderStatus::Regular) {
Edge::ReorderStatus reorderStatusInternal = Edge::ReorderStatus::Regular;
// Check if there is a reorder that needs the precision conversion
if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
!isReorderAvailable(edge->getInputPortDesc()->getMemDesc(),
@ -572,33 +574,33 @@ void MKLDNNGraph::InitEdges() {
std::string convertName = edge->getParent()->getName() + "_" +
inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(),
auto convertNode = std::make_shared<node::Convert>(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(),
convertName, this->getEngine(), this->weightsCache);
convertNode->setDescs(inDesc, outDesc);
InsertNode(edge, convertNode, true);
//Check if reorder is still needed
reorderStatusInternal = convertNode->getChildEdgeAt(0)->needReorder();
if (reorderStatusInternal != MKLDNNEdge::ReorderStatus::No)
if (reorderStatusInternal != Edge::ReorderStatus::No)
edge = convertNode->getChildEdgeAt(0);
}
if (reorderStatusInternal != MKLDNNEdge::ReorderStatus::No) {
insertReorder(edge, reorderStatusInternal == MKLDNNEdge::ReorderStatus::Optimized);
if (reorderStatusInternal != Edge::ReorderStatus::No) {
insertReorder(edge, reorderStatusInternal == Edge::ReorderStatus::Optimized);
}
updateEdge(i);
} else if (reorderStatus == MKLDNNEdge::ReorderStatus::Optimized) {
} else if (reorderStatus == Edge::ReorderStatus::Optimized) {
insertReorder(edge, true);
updateEdge(i);
}
}
}
static inline bool isConstOutput(MKLDNNEdgePtr edge) {
static inline bool isConstOutput(EdgePtr edge) {
return edge->getParent()->isConstant() && !edge->getChild()->isConstant();
}
static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graphEdges) {
typedef std::unordered_map<MKLDNNEdgePtr, size_t> edge_cluster_idx_map_t;
static edge_clusters_t findEdgeClusters(const std::vector<EdgePtr> & graphEdges) {
typedef std::unordered_map<EdgePtr, size_t> edge_cluster_idx_map_t;
edge_clusters_t edge_clusters;
edge_cluster_idx_map_t edge_cluster_indices;
@ -613,7 +615,7 @@ static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graph
continue; // edge is visited
size_t cluster_idx = edge_clusters.size();
MKLDNNEdgePtr last_shared_edge = nullptr;
EdgePtr last_shared_edge = nullptr;
//has_defined_max_path means all the edges on path from current to the actual shared edge
//have defined max memory size so they can be added to the clusters and resolved by mem solver
bool has_defined_max_path = true;
@ -654,7 +656,7 @@ static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graph
return edge_clusters;
}
void MKLDNNGraph::AllocateWithReuse() {
void Graph::AllocateWithReuse() {
edge_clusters_t edge_clusters = findEdgeClusters(graphEdges);
size_t edge_clusters_count = edge_clusters.size();
@ -663,11 +665,11 @@ void MKLDNNGraph::AllocateWithReuse() {
auto &cluster = edge_clusters[i];
bool erase = false;
for (auto &edge : cluster) {
if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation
if (edge->getStatus() == Edge::Status::NeedAllocation
&& edge->getParent()->isConstant()) {
if (edge->getParent()->getType() == Input) {
auto constNode = std::static_pointer_cast<MKLDNNInputNode>(edge->getParent());
edge->reuse(std::const_pointer_cast<MKLDNNMemory>(constNode->getMemoryPtr()));
if (edge->getParent()->getType() == Type::Input) {
auto constNode = std::static_pointer_cast<node::Input>(edge->getParent());
edge->reuse(std::const_pointer_cast<Memory>(constNode->getMemoryPtr()));
} else {
edge->externalAllocate(weightsCache);
}
@ -711,8 +713,8 @@ void MKLDNNGraph::AllocateWithReuse() {
bool isConst = false, isOutput = false, isInput = false;
for (auto &edge : edge_clusters[i]) {
isConst |= isConstOutput(edge);
isOutput |= edge->getChild()->getType() == Output;
isInput |= edge->getParent()->getType() == Input;
isOutput |= edge->getChild()->getType() == Type::Output;
isInput |= edge->getParent()->getType() == Type::Input;
}
if (reuse_io_tensors) {
@ -731,7 +733,7 @@ void MKLDNNGraph::AllocateWithReuse() {
MemorySolver memSolver(boxes);
size_t total_size = static_cast<size_t>(memSolver.solve()) * alignment;
memWorkspace = std::make_shared<MKLDNNMemory>(eng);
memWorkspace = std::make_shared<Memory>(eng);
memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size})));
if (edge_clusters.empty())
@ -742,7 +744,7 @@ void MKLDNNGraph::AllocateWithReuse() {
for (int i = 0; i < edge_clusters.size(); i++) {
int count = 0;
for (auto &edge : edge_clusters[i]) {
if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation) {
if (edge->getStatus() == Edge::Status::NeedAllocation) {
int64_t offset = memSolver.getOffset(i);
// !! Fallback to individual memory allocation !!
// if you like to check infer without reuse just call this function without arguments.
@ -751,7 +753,7 @@ void MKLDNNGraph::AllocateWithReuse() {
// TODO: WA for some test (like strided_slice_test) which use tensors with
// shapes {0}. And it is implisitly converted into {1} tensor.
// Zeroing of input data allow pass tests.
if (edge->getParent()->type == Input && edge->hasDefinedMaxSize())
if (edge->getParent()->type == Type::Input && edge->hasDefinedMaxSize())
edge->getMemoryPtr()->FillZero();
count++;
@ -761,8 +763,8 @@ void MKLDNNGraph::AllocateWithReuse() {
}
}
void MKLDNNGraph::Allocate() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::Allocate");
void Graph::Allocate() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::Allocate");
// resolve edges. Define which will be a view on others
// NeedAllocation - real blob
@ -782,15 +784,15 @@ void MKLDNNGraph::Allocate() {
for (auto& edge : graphEdges) edge->validate();
}
void MKLDNNGraph::CreatePrimitives() {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "MKLDNNGraph::CreatePrimitives");
void Graph::CreatePrimitives() {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::CreatePrimitives");
for (auto& node : graphNodes) {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.createPrimitive);
node->createPrimitive();
}
}
void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) {
void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) {
if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready.";
auto input = inputNodesMap.find(name);
@ -806,7 +808,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
if (ext_data_ptr != inter_data_ptr) {
auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc());
MKLDNNMemory ext_mem(eng);
Memory ext_mem(eng);
ext_mem.Create(ext_tdesc, ext_data_ptr, false);
// branch for handling dynamic batch feature in new API
@ -814,7 +816,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
auto newDims = childEdge->getMemory().getStaticDims();
newDims[0] = ext_mem.getStaticDims()[0];
MKLDNNMemory tmpMem(eng);
Memory tmpMem(eng);
auto newDesc = childEdge->getMemory().getDesc().cloneWithNewDims(newDims, true);
tmpMem.Create(newDesc, childEdge->getMemory().GetData(), false);
@ -838,7 +840,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
}
}
void MKLDNNGraph::PullOutputData(BlobMap &out) {
void Graph::PullOutputData(BlobMap &out) {
if (!IsReady())
IE_THROW() << "Wrong state. Topology not ready.";
@ -846,7 +848,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
auto name = outputMap.first;
auto node = outputMap.second;
auto parentEdge = node->getParentEdgeAt(0);
const MKLDNNMemory& intr_blob = parentEdge->getMemory();
const Memory& intr_blob = parentEdge->getMemory();
const auto ext_blob_map = out.find(name);
const auto ext_blob = ext_blob_map->second;
@ -907,7 +909,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
auto outBlobDesc = expectedDesc.getLayout() == InferenceEngine::Layout::ANY
? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims()))
: MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
MKLDNNMemory outBloMem(eng);
Memory outBloMem(eng);
outBloMem.Create(outBlobDesc, ext_blob_ptr, false);
// branch for handling dynamic batch feature in new API
@ -915,7 +917,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
auto newDims = intr_blob.getStaticDims();
newDims[0] = outBloMem.getStaticDims()[0];
MKLDNNMemory tmpMem(eng);
Memory tmpMem(eng);
auto newDesc = intr_blob.getDesc().cloneWithNewDims(newDims, true);
tmpMem.Create(newDesc, intr_blob.GetData(), false);
@ -940,7 +942,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
}
}
inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::stream& stream) const {
inline void Graph::ExecuteNode(const NodePtr& node, const mkldnn::stream& stream) const {
DUMP(node, config, infer_count);
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
@ -951,7 +953,7 @@ inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::st
}
}
void MKLDNNGraph::Infer(MKLDNNInferRequestBase* request) {
void Graph::Infer(InferRequestBase* request) {
if (!IsReady()) {
IE_THROW() << "Wrong state. Topology is not ready.";
}
@ -970,7 +972,7 @@ void MKLDNNGraph::Infer(MKLDNNInferRequestBase* request) {
if (infer_count != -1) infer_count++;
}
void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes) {
void Graph::VisitNode(NodePtr node, std::vector<NodePtr>& sortedNodes) {
if (node->temporary) {
return;
}
@ -991,14 +993,14 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sort
sortedNodes.insert(sortedNodes.begin(), node);
}
void MKLDNNGraph::SortTopologically() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::SortTopologically");
void Graph::SortTopologically() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::SortTopologically");
std::vector<MKLDNNNodePtr> unsorted;
std::vector<MKLDNNNodePtr> sorted;
std::vector<NodePtr> unsorted;
std::vector<NodePtr> sorted;
for (int i = 0; i < graphNodes.size(); i++) {
MKLDNNNodePtr node = graphNodes[i];
NodePtr node = graphNodes[i];
node->permanent = false;
node->temporary = false;
@ -1007,7 +1009,7 @@ void MKLDNNGraph::SortTopologically() {
}
while (!unsorted.empty()) {
MKLDNNNodePtr node = unsorted.at(0);
NodePtr node = unsorted.at(0);
unsorted.erase(unsorted.begin());
VisitNode(node, sorted);
@ -1027,7 +1029,7 @@ void MKLDNNGraph::SortTopologically() {
for (auto &node : graphNodes) {
{
int port_num = node->inputShapes.size();
std::vector<MKLDNNEdgePtr> res(port_num);
std::vector<EdgePtr> res(port_num);
for (int i = 0; i < node->parentEdges.size(); i++) {
auto edge = node->getParentEdgeAt(i);
@ -1041,7 +1043,7 @@ void MKLDNNGraph::SortTopologically() {
}
{
int port_num = node->outputShapes.size();
std::vector<MKLDNNEdgePtr> res(port_num);
std::vector<EdgePtr> res(port_num);
for (int i = 0; i < node->childEdges.size(); i++) {
auto edge = node->getChildEdgeAt(i);
@ -1056,10 +1058,10 @@ void MKLDNNGraph::SortTopologically() {
}
}
void MKLDNNGraph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
void Graph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
unsigned i = 0;
std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const MKLDNNNodePtr&)>
getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const MKLDNNNodePtr& node) {
std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const NodePtr&)>
getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
pc.execution_index = i++;
// TODO: Why time counter is signed?
@ -1088,23 +1090,23 @@ void MKLDNNGraph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEn
}
}
void MKLDNNGraph::setConfig(const Config &cfg) {
void Graph::setConfig(const Config &cfg) {
config = cfg;
}
const Config& MKLDNNGraph::getConfig() const {
const Config& Graph::getConfig() const {
return config;
}
void MKLDNNGraph::setProperty(const std::map<std::string, std::string>& properties) {
void Graph::setProperty(const std::map<std::string, std::string>& properties) {
config.readProperties(properties);
}
Config MKLDNNGraph::getProperty() const {
Config Graph::getProperty() const {
return config;
}
void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
void Graph::RemoveEdge(EdgePtr& edge) {
for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) {
if ((*it) == edge) {
edge->drop();
@ -1114,7 +1116,7 @@ void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
}
}
void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
void Graph::DropNode(const NodePtr &node) {
auto children = node->childEdges;
auto parents = node->parentEdges;
@ -1131,7 +1133,7 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
if (!child)
continue;
MKLDNNEdgePtr &remEdge = p_edge;
EdgePtr &remEdge = p_edge;
int inNum = 0;
if (remEdge) {
inNum = remEdge->getInputNum();
@ -1145,14 +1147,14 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
remEdge->drop();
RemoveEdge(remEdge);
}
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
EdgePtr newEdge(new Edge(parent, child, inNum, outNum));
graphEdges.push_back(newEdge);
parent->addEdge(newEdge);
}
}
}
void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
void Graph::DropDWConvNode(const NodePtr &node) {
auto children = node->childEdges;
auto parents = node->parentEdges;
@ -1176,7 +1178,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
if (!child)
continue;
MKLDNNEdgePtr &remEdge = p_edge;
EdgePtr &remEdge = p_edge;
int inNum = 0;
if (remEdge) {
inNum = remEdge->getInputNum();
@ -1190,7 +1192,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
remEdge->drop();
RemoveEdge(remEdge);
}
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
EdgePtr newEdge(new Edge(parent, child, inNum, outNum));
graphEdges.push_back(newEdge);
parent->addEdge(newEdge);
}
@ -1202,7 +1204,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
auto parent = p_edge->getParent();
if (!parent) continue;
MKLDNNEdgePtr &remEdge = p_edge;
EdgePtr &remEdge = p_edge;
int inNum = 0;
int portCandidate = 0;
if (remEdge) {
@ -1213,7 +1215,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
}
int outNum = parentConv->parentEdges.size();
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentConv, inNum, outNum));
EdgePtr newEdge(new Edge(parent, parentConv, inNum, outNum));
graphEdges.push_back(newEdge);
parent->addEdge(newEdge);
parentConv->inputShapes.push_back(node->getInputShapeAtPort(portCandidate));
@ -1221,7 +1223,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
parentConv->outputShapes[0] = node->getOutputShapeAtPort(0);
}
void MKLDNNGraph::RemoveDroppedNodes() {
void Graph::RemoveDroppedNodes() {
auto& nodes = this->GetNodes();
auto it = nodes.begin();
@ -1235,7 +1237,7 @@ void MKLDNNGraph::RemoveDroppedNodes() {
}
}
void MKLDNNGraph::RemoveDroppedEdges() {
void Graph::RemoveDroppedEdges() {
auto& edges = this->GetEdges();
auto it = edges.begin();
@ -1249,20 +1251,20 @@ void MKLDNNGraph::RemoveDroppedEdges() {
}
}
MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc,
NodePtr Graph::InsertReorder(EdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc,
bool isOptimized) {
MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache));
auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
NodePtr newReorder(new node::Reorder(layerName, getEngine(), weightsCache));
auto *reorderPtr = dynamic_cast<node::Reorder *>(newReorder.get());
if (reorderPtr == nullptr) {
IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode";
IE_THROW() << "Graph::InsertReorder: Cannot cast to Reorder";
}
reorderPtr->setDescs(inDesc, outDesc);
reorderPtr->setOptimized(isOptimized);
InsertNode(edge, newReorder, true);
// Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
// Due to the specificity of MKLDNNGraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks.
// Using the method Edge::getDesc() we can check that input and output tensor descriptors are equal.
// Due to the specificity of GraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks.
if (!isOptimized) {
newReorder->getParentEdgeAt(0)->getDesc();
newReorder->getChildEdgeAt(0)->getDesc();
@ -1271,7 +1273,7 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
return newReorder;
}
bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode) {
bool Graph::InsertNode(EdgePtr edge, NodePtr node, bool initNode) {
auto oIndex = edge->getOutputNum();
auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
@ -1284,9 +1286,9 @@ bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNo
return InsertNode(edge->getParent(), edge->getChild(), node, iIndex, oIndex, initNode);
}
bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode) {
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(parent, node, parentPort, 0));
MKLDNNEdgePtr afterNode(new MKLDNNEdge(node, child, 0, childPort));
bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPort, int childPort, bool initNode) {
EdgePtr beforeNode(new Edge(parent, node, parentPort, 0));
EdgePtr afterNode(new Edge(node, child, 0, childPort));
// Add edge for beforeNode
beforeNode->getChild()->parentEdges.push_back(beforeNode);
@ -1316,29 +1318,28 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
}
// Set all non const data paths precision to BF16
void MKLDNNGraph::EnforceBF16() {
void Graph::EnforceBF16() {
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
if (!implication(isQuantized(), config.manualEnforceBF16))
return;
/* list of node types that must be forced to be executed in BF16 precision
* because of performance gains */
static const std::unordered_set<Type, std::hash<int>> significantNodes { // std::hash<int> is necessary old compilers (defect in C++11 standart)
Convolution, // conv nets
FullyConnected, // conv / bert nets
RNNCell, // recurent nets
RNNSeq, // recurent nets
MatMul, // bert nets
ROIPooling, // object detection nets
Interpolate, // super resolution nets
};
std::function<void(const MKLDNNNodePtr&, std::unordered_set<MKLDNNNodePtr>& skipNodes)> searchForNodesToSkip;
searchForNodesToSkip = [&](const MKLDNNNodePtr& node, std::unordered_set<MKLDNNNodePtr>& skipNodes) -> void {
std::function<void(const NodePtr&, std::unordered_set<NodePtr>& skipNodes)> searchForNodesToSkip;
searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set<NodePtr>& skipNodes) -> void {
for (size_t i = 0; i < node->getParentEdges().size(); i++) {
const auto& parent = node->getParentEdgeAt(i)->getParent();
if (significantNodes.count(parent->getType())) // stop at significant nodes
continue;
/* list of node types that must be forced to be executed in BF16 precision
* because of performance gains */
if (one_of(parent->getType(),
Type::Convolution, // conv nets
Type::FullyConnected, // conv / bert nets
Type::RNNCell, // recurent nets
Type::RNNSeq, // recurent nets
Type::MatMul, // bert nets
Type::ROIPooling, // object detection nets
Type::Interpolate)) // super resolution nets
continue; // stop at significant nodes
const auto res = skipNodes.insert(parent);
if (res.second) // node not visited yet
@ -1349,7 +1350,7 @@ void MKLDNNGraph::EnforceBF16() {
/* Skip BF16 enforcement for tail of the graph by forming set of nodes to skip.
* Necessary to maintain accuracy.
* Experiments show zero peformance impact on average */
std::unordered_set<MKLDNNNodePtr> nodesToSkip;
std::unordered_set<NodePtr> nodesToSkip;
// starting from output nodes
for (const auto& entry : outputNodesMap) {
const auto& node = entry.second;
@ -1360,15 +1361,17 @@ void MKLDNNGraph::EnforceBF16() {
if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail)
continue;
if (node->getType() != Input && node->getType() != Output) {
if (node->getType() != Type::Input && node->getType() != Type::Output) {
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
const auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
/* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing.
* Precision conversion to BF16 does automatically, if convolution follows up after Constant Inputs
* and if activation is BF16 */
if (!(parent->getType() == Input && parent->isConstant() &&
node->getType() != Concatenation) && // Concatenation node is exception because it doesn't change an accuracy for BF16 activation
!(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16
if (!(parent->getType() == Type::Input && parent->isConstant() &&
// Concatenation node is exception because it doesn't change an accuracy for BF16 activation
node->getType() != Type::Concatenation) &&
// exclude Eltwise after Input since it supports conversion to BF16
!(parent->getType() == Type::Input && node->getType() == Type::Eltwise) &&
node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
}
@ -1381,6 +1384,9 @@ void MKLDNNGraph::EnforceBF16() {
}
}
std::shared_ptr<ngraph::Function> MKLDNNGraph::dump() const {
std::shared_ptr<ngraph::Function> Graph::dump() const {
return dump_graph_as_ie_ngraph_net(*this);
}
} // namespace intel_cpu
} // namespace ov

View File

@ -20,18 +20,20 @@
namespace ov {
namespace intel_cpu {
class MKLDNNInferRequestBase;
class MKLDNNGraph {
class InferRequestBase;
class InferRequest;
class Graph {
public:
typedef std::shared_ptr<MKLDNNGraph> Ptr;
MKLDNNWeightsSharing::Ptr weightsCache;
typedef std::shared_ptr<Graph> Ptr;
WeightsSharing::Ptr weightsCache;
enum Status {
NotReady = 0,
Ready = 1,
};
MKLDNNGraph() = default;
Graph() = default;
Status GetStatus() {
return status;
@ -49,12 +51,12 @@ public:
template<typename NET>
void CreateGraph(NET &network,
const MKLDNNExtensionManager::Ptr& extMgr,
MKLDNNWeightsSharing::Ptr &w_cache);
const ExtensionManager::Ptr& extMgr,
WeightsSharing::Ptr &w_cache);
void CreateGraph(const std::vector<MKLDNNNodePtr> &graphNodes,
const std::vector<MKLDNNEdgePtr> &graphEdges,
MKLDNNWeightsSharing::Ptr &w_cache,
void CreateGraph(const std::vector<NodePtr> &graphNodes,
const std::vector<EdgePtr> &graphEdges,
WeightsSharing::Ptr &w_cache,
std::string name);
bool hasMeanImageFor(const std::string& name) {
@ -64,13 +66,13 @@ public:
void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in);
void PullOutputData(InferenceEngine::BlobMap &out);
void Infer(MKLDNNInferRequestBase* request = nullptr);
void Infer(InferRequestBase* request = nullptr);
const std::vector<MKLDNNNodePtr>& GetNodes() const {
const std::vector<NodePtr>& GetNodes() const {
return graphNodes;
}
std::vector<MKLDNNNodePtr>& GetNodes() {
std::vector<NodePtr>& GetNodes() {
return graphNodes;
}
@ -78,26 +80,26 @@ public:
return _name;
}
std::vector<MKLDNNEdgePtr>& GetEdges() {
std::vector<EdgePtr>& GetEdges() {
return graphEdges;
}
std::map<std::string, MKLDNNNodePtr>& GetInputNodesMap() {
std::map<std::string, NodePtr>& GetInputNodesMap() {
return inputNodesMap;
}
std::map<std::string, MKLDNNNodePtr>& GetOutputNodesMap() {
std::map<std::string, NodePtr>& GetOutputNodesMap() {
return outputNodesMap;
}
MKLDNNNodePtr getInputNodeByName(const std::string &name) {
NodePtr getInputNodeByName(const std::string &name) {
auto input = inputNodesMap.find(name);
if (input == inputNodesMap.end())
IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name;
return input->second;
}
MKLDNNNodePtr getOutputNodeByName(const std::string &name) {
NodePtr getOutputNodeByName(const std::string &name) {
auto output = outputNodesMap.find(name);
if (output == outputNodesMap.end())
IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name;
@ -116,9 +118,9 @@ public:
void RemoveDroppedNodes();
void RemoveDroppedEdges();
void RemoveEdge(MKLDNNEdgePtr& edge);
void DropNode(const MKLDNNNodePtr& node);
void DropDWConvNode(const MKLDNNNodePtr& node);
void RemoveEdge(EdgePtr& edge);
void DropNode(const NodePtr& node);
void DropDWConvNode(const NodePtr& node);
/**
* @brief Insert Reorder node at the edge-specified location.
@ -139,11 +141,11 @@ public:
* pointer to the blob containing scales
* @return pointer to the new Reorder node.
*/
MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc,
NodePtr InsertReorder(EdgePtr edge, std::string layerName, const MemoryDesc& inDesc,
const MemoryDesc& outDesc, bool isOptimized = false);
/**
* @brief Insert MKLDNNNode at the edge-specified location.
* @brief Insert Node at the edge-specified location.
* This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
* supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
* node without initialization.
@ -155,10 +157,10 @@ public:
* parameter that determines whether the node needs to be initialized
* @return true in case of success, false otherwise.
*/
bool InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode = false);
bool InsertNode(EdgePtr edge, NodePtr node, bool initNode = false);
/**
* @brief Insert MKLDNNNode between two specified nodes.
* @brief Insert Node between two specified nodes.
* This procedure creates two edges that link the parent and child nodes to the inserted one and adds all created objects to the graph.
* This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
* supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
@ -175,7 +177,7 @@ public:
* parameter that determines whether the node needs to be initialized
* @return true in case of success, false otherwise.
*/
bool InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode = false);
bool InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPort, int childPort, bool initNode = false);
std::shared_ptr<ngraph::Function> dump() const;
@ -192,7 +194,7 @@ public:
}
protected:
void VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes);
void VisitNode(NodePtr node, std::vector<NodePtr>& sortedNodes);
void ForgetGraphData() {
status = NotReady;
@ -213,10 +215,10 @@ protected:
bool reuse_io_tensors = true;
MKLDNNMemoryPtr memWorkspace;
MemoryPtr memWorkspace;
std::vector<MKLDNNNodePtr> graphNodes;
std::vector<MKLDNNEdgePtr> graphEdges;
std::vector<NodePtr> graphNodes;
std::vector<EdgePtr> graphEdges;
std::map<std::string, NormalizePreprocess> _normalizePreprocMap;
std::string _name;
@ -226,8 +228,8 @@ protected:
static mkldnn::engine eng;
void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
void Replicate(const std::shared_ptr<const ov::Model> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
void Replicate(const InferenceEngine::CNNNetwork &network, const ExtensionManager::Ptr& extMgr);
void Replicate(const std::shared_ptr<const ov::Model> &subgraph, const ExtensionManager::Ptr& extMgr);
void InitGraph();
void InitNodes();
void InitDescriptors();
@ -237,24 +239,24 @@ protected:
void AllocateWithReuse();
void CreatePrimitives();
void ExtractConstantAndExecutableNodes();
void ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::stream& stream) const;
void ExecuteNode(const NodePtr& node, const mkldnn::stream& stream) const;
void ExecuteConstantNodesOnly() const;
friend class MKLDNNInferRequestBase;
friend class MKLDNNLegacyInferRequest;
friend class MKLDNNInferRequest;
friend std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
friend class LegacyInferRequest;
friend class intel_cpu::InferRequest;
friend class intel_cpu::InferRequestBase;
friend std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph);
private:
// TODO: change std::map to std::unordered_map
std::map<std::string, MKLDNNNodePtr> inputNodesMap;
std::map<std::string, MKLDNNNodePtr> outputNodesMap;
std::map<std::string, NodePtr> inputNodesMap;
std::map<std::string, NodePtr> outputNodesMap;
// these node pointers (from graphNodes) are to avoid regular checking for
// constantness of nodes in ExecuteConstantNodesOnly, Infer methods and calls of
// non-executable (optimized out) nodes, such as Input, Reshape, etc.
std::vector<MKLDNNNodePtr> constantGraphNodes;
std::vector<MKLDNNNodePtr> executableGraphNodes;
std::vector<NodePtr> constantGraphNodes;
std::vector<NodePtr> executableGraphNodes;
MultiCachePtr rtParamsCache;

View File

@ -24,18 +24,18 @@ using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
void serializeToCout(const MKLDNNGraph &graph);
void serializeToXML(const MKLDNNGraph &graph, const std::string& path);
void serializeToCout(const Graph &graph);
void serializeToXML(const Graph &graph, const std::string& path);
namespace {
std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &node) {
std::map<std::string, std::string> extract_node_metadata(const NodePtr &node) {
std::map<std::string, std::string> serialization_info;
if (node->getType() == Input && node->isConstant()) {
if (node->getType() == Type::Input && node->isConstant()) {
// We need to separate Input and Const layers
serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = "Const";
} else if (node->getType() == Generic) {
} else if (node->getType() == Type::Generic) {
// Path to print actual name for extension layers
serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = node->getTypeStr();
} else {
@ -114,14 +114,14 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
} // namespace
std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) {
std::map<MKLDNNNodePtr, std::shared_ptr<ngraph::Node> > node2layer;
std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph) {
std::map<NodePtr, std::shared_ptr<ngraph::Node> > node2layer;
ngraph::ResultVector results;
ngraph::ParameterVector params;
ngraph::NodeVector to_hold;
auto get_inputs = [&] (const MKLDNNNodePtr & node) {
auto get_inputs = [&] (const NodePtr & node) {
auto pr_edges = node->getParentEdges();
ngraph::OutputVector inputs(pr_edges.size());
@ -140,7 +140,7 @@ std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const MKLDNNGraph
return inputs;
};
auto create_ngraph_node = [&](const MKLDNNNodePtr &node) {
auto create_ngraph_node = [&](const NodePtr &node) {
bool is_input = false, is_output = false, should_be_hold = false;
for (auto && kvp : graph.inputNodesMap) {
if (kvp.second == node) {
@ -209,7 +209,7 @@ std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const MKLDNNGraph
}
#ifdef CPU_DEBUG_CAPS
void serialize(const MKLDNNGraph &graph) {
void serialize(const Graph &graph) {
const std::string& path = graph.getConfig().execGraphPath;
if (path.empty())
@ -223,7 +223,7 @@ void serialize(const MKLDNNGraph &graph) {
IE_THROW() << "Unknown serialize format. Should be either 'cout' or '*.xml'. Got " << path;
}
void serializeToXML(const MKLDNNGraph &graph, const std::string& path) {
void serializeToXML(const Graph &graph, const std::string& path) {
if (path.empty())
return;
@ -235,7 +235,7 @@ void serializeToXML(const MKLDNNGraph &graph, const std::string& path) {
manager.run_passes(graph.dump());
}
void serializeToCout(const MKLDNNGraph &graph) {
void serializeToCout(const Graph &graph) {
for (const auto& node : graph.GetNodes()) {
std::cout << "name: " << node->getName() << " [ ";
auto nodeDesc = node->getSelectedPrimitiveDescriptor();

View File

@ -13,9 +13,9 @@
namespace ov {
namespace intel_cpu {
std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph);
#ifdef CPU_DEBUG_CAPS
void serialize(const MKLDNNGraph &graph);
void serialize(const Graph &graph);
#endif // CPU_DEBUG_CAPS
} // namespace intel_cpu

File diff suppressed because it is too large Load Diff

View File

@ -11,38 +11,38 @@
namespace ov {
namespace intel_cpu {
class MKLDNNGraphOptimizer {
class GraphOptimizer {
public:
MKLDNNGraphOptimizer();
GraphOptimizer();
public:
void ApplyCommonGraphOptimizations(MKLDNNGraph& graph);
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
void ApplyCommonGraphOptimizations(Graph& graph);
void ApplyImplSpecificGraphOptimizations(Graph& graph);
private:
void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph);
void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
void FuseMultiplyAndAdd(MKLDNNGraph &graph);
void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
void FuseMatMulAndSimpleOperation(MKLDNNGraph &graph);
void FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph);
void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph);
void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
void FusePoolingAndFakeQuantize(MKLDNNGraph &graph);
void FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph);
void FuseMVNAndSimpleOperation(MKLDNNGraph &graph);
void FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph);
void FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph);
void FuseReduceAndSimpleOperation(MKLDNNGraph &graph);
void FuseConvolutionMatMulAndBias(Graph &graph);
void FuseDeconvolutionAndSimpleOperation(Graph &graph);
void FuseMultiplyAndAdd(Graph &graph);
void FuseFullyConnectedAndSimpleOperation(Graph &graph);
void FuseMatMulAndSimpleOperation(Graph &graph);
void FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &graph);
void FuseConvolutionAndSimpleOperation(Graph &graph);
void FuseConvolutionAndDWConvolution(Graph &graph);
void FusePoolingAndFakeQuantize(Graph &graph);
void FuseConvolutionSumAndConvolutionSumActivation(Graph &graph);
void FuseMVNAndSimpleOperation(Graph &graph);
void FuseInterpolateAndSimpleOperation(Graph &graph);
void FuseNormalizeL2AndSimpleOperation(Graph &graph);
void FuseReduceAndSimpleOperation(Graph &graph);
void DropDoubleReorders(MKLDNNGraph& graph);
void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
void FuseEltwiseAndSimple(MKLDNNGraph &graph);
void FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph &graph);
void FuseClampAndFakeQuantize(MKLDNNGraph &graph);
void MergeTransposeAndReorder(MKLDNNGraph &graph);
void reshapeRnnSeq(MKLDNNGraph &graph);
void DropDoubleReorders(Graph& graph);
void FuseConvolutionAndZeroPoints(Graph &graph);
void FuseBroadcastAndEltwise(Graph &graph);
void FuseEltwiseAndSimple(Graph &graph);
void FusePerformedAsScaleShiftAndFakeQuantize(Graph &graph);
void FuseClampAndFakeQuantize(Graph &graph);
void MergeTransposeAndReorder(Graph &graph);
void reshapeRnnSeq(Graph &graph);
};
} // namespace intel_cpu

View File

@ -3,7 +3,7 @@
//
#include "infer_request.h"
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include <vector>
#include <string>
#include <map>
@ -26,9 +26,12 @@
#include <transformations/utils/utils.hpp>
#include <ie_ngraph_utils.hpp>
void ov::intel_cpu::MKLDNNInferRequestBase::CreateInferRequest() {
namespace ov {
namespace intel_cpu {
void InferRequestBase::CreateInferRequest() {
auto id = (execNetwork->_numRequests)++;
profilingTask = openvino::itt::handle("MKLDNN_INFER_" + execNetwork->_name + "_" + std::to_string(id));
profilingTask = openvino::itt::handle("INTEL_CPU_INFER_" + execNetwork->_name + "_" + std::to_string(id));
if (execNetwork->_graphs.size() == 0)
IE_THROW() << "No graph was found";
@ -40,10 +43,10 @@ void ov::intel_cpu::MKLDNNInferRequestBase::CreateInferRequest() {
// of MemoryLayer implementation. It uses output edge of MemoryLayer
// producer as storage for tensor to keep it between infer calls.
for (auto& node : graph->GetNodes()) {
if (node->getType() == MemoryInput) {
auto memoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
if (node->getType() == Type::MemoryInput) {
auto memoryNode = dynamic_cast<node::MemoryInput*>(node.get());
if (!memoryNode) {
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput";
}
auto state_store = memoryNode->getStore();
auto state_name = memoryNode->getId();
@ -53,16 +56,16 @@ void ov::intel_cpu::MKLDNNInferRequestBase::CreateInferRequest() {
if (suffix_idx != std::string::npos)
state_name = state_name.substr(0, suffix_idx);
memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store));
memoryStates.emplace_back(new VariableState(state_name, state_store));
}
}
}
ov::intel_cpu::MKLDNNInferRequestBase::~MKLDNNInferRequestBase() {
InferRequestBase::~InferRequestBase() {
--(execNetwork->_numRequests);
}
void ov::intel_cpu::MKLDNNInferRequestBase::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
void InferRequestBase::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
auto& tensorDesc = inputBlob->getTensorDesc();
bool needConvert = inPrec != tensorDesc.getPrecision();
@ -89,12 +92,12 @@ void ov::intel_cpu::MKLDNNInferRequestBase::pushInput(const std::string& inputNa
graph->PushInputData(inputName, needConvert ? iconv : inputBlob);
}
void ov::intel_cpu::MKLDNNInferRequestBase::PushStates() {
void InferRequestBase::PushStates() {
for (auto &node : graph->GetNodes()) {
if (node->getType() == MemoryInput) {
auto cur_node = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
if (node->getType() == Type::MemoryInput) {
auto cur_node = dynamic_cast<node::MemoryInput*>(node.get());
if (!cur_node) {
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput";
}
auto cur_id = cur_node->getId();
for (const auto& state : memoryStates) {
@ -111,12 +114,12 @@ void ov::intel_cpu::MKLDNNInferRequestBase::PushStates() {
}
}
void ov::intel_cpu::MKLDNNInferRequestBase::PullStates() {
void InferRequestBase::PullStates() {
for (auto &node : graph->GetNodes()) {
if (node->getType() == MemoryInput) {
auto cur_node = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
if (node->getType() == Type::MemoryInput) {
auto cur_node = dynamic_cast<node::MemoryInput*>(node.get());
if (!cur_node) {
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput";
}
auto cur_id = cur_node->getId();
for (const auto& state : memoryStates) {
@ -133,7 +136,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::PullStates() {
}
}
void ov::intel_cpu::MKLDNNInferRequestBase::redefineMemoryForInputNodes() {
void InferRequestBase::redefineMemoryForInputNodes() {
const auto cpuInputNodes = graph->GetInputNodesMap();
for (const auto &blob : _inputs) {
@ -146,7 +149,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::redefineMemoryForInputNodes() {
}
}
void ov::intel_cpu::MKLDNNInferRequestBase::InferImpl() {
void InferRequestBase::InferImpl() {
using namespace openvino::itt;
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, profilingTask);
auto graphLock = execNetwork->GetGraph();
@ -184,7 +187,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::InferImpl() {
graph->PullOutputData(_outputs);
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> ov::intel_cpu::MKLDNNInferRequestBase::GetPerformanceCounts() const {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> InferRequestBase::GetPerformanceCounts() const {
if (!graph || !graph->IsReady())
IE_THROW() << "Graph is not ready!";
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
@ -192,16 +195,16 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> ov::intel_cpu
return perfMap;
}
static inline void changeEdgePtr(const ov::intel_cpu::MKLDNNEdgePtr &edge, void *newPtr) {
static inline void changeEdgePtr(const EdgePtr &edge, void *newPtr) {
edge->getMemoryPtr()->setDataHandle(newPtr);
}
void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() {
void InferRequestBase::changeDefaultPtr() {
for (auto& it : externalPtr) {
const auto& inputNodesMap = graph->GetInputNodesMap();
auto input = inputNodesMap.find(it.first);
if (input != inputNodesMap.end()) {
MKLDNNNodePtr inputNodePtr = input->second;
NodePtr inputNodePtr = input->second;
if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == it.second)
continue;
auto& childEdges = inputNodePtr->getChildEdges();
@ -219,8 +222,8 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() {
break;
}
if (child->getType() == Concatenation) {
auto concat = dynamic_cast<MKLDNNConcatNode*>(child.get());
if (child->getType() == Type::Concatenation) {
auto concat = dynamic_cast<node::Concat*>(child.get());
if (concat && concat->isOptimized()) {
canBeInPlace = false;
break;
@ -228,7 +231,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() {
}
// Cannot be in-place before split because split is using different ptrs without offsets
if (child->getType() == Split) {
if (child->getType() == Type::Split) {
canBeInPlace = false;
break;
}
@ -277,7 +280,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() {
void* defaultPtr = parentEdge->getMemory().GetData();
// Cannot be in-place after concat because concat is using different ptrs without offsets
auto parent = parentEdge->getParent();
MKLDNNNodePtr previousParent;
NodePtr previousParent;
do {
previousParent = parent;
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
@ -305,22 +308,22 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() {
}
}
std::vector<InferenceEngine::IVariableStateInternal::Ptr> ov::intel_cpu::MKLDNNInferRequestBase::QueryState() {
std::vector<InferenceEngine::IVariableStateInternal::Ptr> InferRequestBase::QueryState() {
return memoryStates;
}
void ov::intel_cpu::MKLDNNInferRequestBase::SetAsyncRequest(MKLDNNAsyncInferRequest* asyncRequest) {
void InferRequestBase::SetAsyncRequest(AsyncInferRequest* asyncRequest) {
_asyncRequest = asyncRequest;
}
void ov::intel_cpu::MKLDNNInferRequestBase::ThrowIfCanceled() const {
void InferRequestBase::ThrowIfCanceled() const {
if (_asyncRequest != nullptr) {
_asyncRequest->ThrowIfCanceled();
}
}
InferenceEngine::Precision
ov::intel_cpu::MKLDNNInferRequestBase::normToInputSupportedPrec(const std::pair<const std::string, InferenceEngine::Blob::Ptr>& input) const {
InferRequestBase::normToInputSupportedPrec(const std::pair<const std::string, InferenceEngine::Blob::Ptr>& input) const {
const auto& inputTensorDesc = input.second->getTensorDesc();
auto inPrec = inputTensorDesc.getPrecision();
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
@ -336,24 +339,24 @@ ov::intel_cpu::MKLDNNInferRequestBase::normToInputSupportedPrec(const std::pair<
return inPrec;
}
/* ========================================== MKLDNNLegacyInferRequest ========================================== */
ov::intel_cpu::MKLDNNLegacyInferRequest::MKLDNNLegacyInferRequest(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
std::shared_ptr<MKLDNNExecNetwork> execNetwork)
: MKLDNNInferRequestBase(networkInputs, networkOutputs, execNetwork) {
/* ========================================== LegacyInferRequest ========================================== */
LegacyInferRequest::LegacyInferRequest(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
std::shared_ptr<ExecNetwork> execNetwork)
: InferRequestBase(networkInputs, networkOutputs, execNetwork) {
CreateInferRequest();
}
void ov::intel_cpu::MKLDNNLegacyInferRequest::initBlobs() {
void LegacyInferRequest::initBlobs() {
for (const auto& it : _networkInputs) {
MKLDNNLegacyInferRequest::GetBlob(it.first);
LegacyInferRequest::GetBlob(it.first);
}
for (const auto& it : _networkOutputs) {
MKLDNNLegacyInferRequest::GetBlob(it.first);
LegacyInferRequest::GetBlob(it.first);
}
}
void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBatch(int new_batch) {
void LegacyInferRequest::SetBatch(int new_batch) {
if (!graph->getProperty().enableDynamicBatch)
IE_THROW() << "Dynamic batch is not enabled.";
@ -369,7 +372,7 @@ void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBatch(int new_batch) {
}
}
void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) {
void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "SetBlobLegacy");
if (name.empty()) {
IE_THROW(NotFound) << "Failed to set blob with empty name";
@ -479,7 +482,7 @@ void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBlob(const std::string& name, c
}
}
InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNLegacyInferRequest::GetBlob(const std::string& name) {
InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "GetBlobLegacy");
if (!graph || !graph->IsReady())
@ -595,7 +598,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNLegacyInferRequest::GetBlob(cons
return data;
}
void ov::intel_cpu::MKLDNNLegacyInferRequest::PushInputData() {
void LegacyInferRequest::PushInputData() {
for (auto input : _inputs) {
auto inputName = input.first;
if (!_networkInputs[inputName]) {
@ -613,11 +616,11 @@ void ov::intel_cpu::MKLDNNLegacyInferRequest::PushInputData() {
}
}
/* ========================================== MKLDNNInferRequest ========================================== */
ov::intel_cpu::MKLDNNInferRequest::MKLDNNInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
MKLDNNExecNetwork::Ptr execNetwork)
: MKLDNNInferRequestBase(inputs, outputs, execNetwork) {
/* ========================================== InferRequest ========================================== */
InferRequest::InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
ExecNetwork::Ptr execNetwork)
: InferRequestBase(inputs, outputs, execNetwork) {
for (const std::shared_ptr<const ov::Node>& in : inputs) {
modelInputsMap[ngraph::op::util::get_ie_output_name(ngraph::Output<const ngraph::Node>(in))] = in;
}
@ -628,16 +631,16 @@ ov::intel_cpu::MKLDNNInferRequest::MKLDNNInferRequest(const std::vector<std::sha
CreateInferRequest();
}
void ov::intel_cpu::MKLDNNInferRequest::initBlobs() {
void InferRequest::initBlobs() {
for (const auto& it : modelInputsMap) {
MKLDNNInferRequest::GetBlob(it.first);
InferRequest::GetBlob(it.first);
}
for (const auto& it : modelOutputsMap) {
MKLDNNInferRequest::GetBlob(it.first);
InferRequest::GetBlob(it.first);
}
}
void ov::intel_cpu::MKLDNNInferRequest::SetBatch(int new_batch) {
void InferRequest::SetBatch(int new_batch) {
if (!graph->getProperty().batchLimit || modelInputsMap.begin()->second->get_output_partial_shape(0).is_static()) {
IE_THROW() << "Can't SetBatch for model that can't be executed via legacy dynamic batch or for static model";
}
@ -653,7 +656,7 @@ void ov::intel_cpu::MKLDNNInferRequest::SetBatch(int new_batch) {
}
}
void ov::intel_cpu::MKLDNNInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) {
void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "SetBlob");
if (name.empty()) {
IE_THROW(NotFound) << "Failed to set blob with empty name";
@ -751,7 +754,7 @@ void ov::intel_cpu::MKLDNNInferRequest::SetBlob(const std::string& name, const I
}
}
InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std::string& name) {
InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "GetBlob");
if (!graph || !graph->IsReady())
@ -790,7 +793,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std:
externalPtr[name] = _inputs[name]->buffer();
}
} else {
IE_THROW() << "Blob with name: " << name << " exists in MKLDNN graph, but absents in network inputs";
IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network inputs";
}
}
data = _inputs[name];
@ -839,7 +842,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std:
externalPtr[name] = data->buffer();
}
} else {
IE_THROW() << "Blob with name: " << name << " exists in MKLDNN graph, but absents in network outputs";
IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network outputs";
}
}
data = _outputs[name];
@ -852,7 +855,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std:
return data;
}
void ov::intel_cpu::MKLDNNInferRequest::PushInputData() {
void InferRequest::PushInputData() {
for (auto input : _inputs) {
auto inputName = input.first;
if (!modelInputsMap[inputName]) {
@ -862,3 +865,6 @@ void ov::intel_cpu::MKLDNNInferRequest::PushInputData() {
pushInput(inputName, input.second, normToInputSupportedPrec(input));
}
}
} // namespace intel_cpu
} // namespace ov

View File

@ -13,12 +13,12 @@
namespace ov {
namespace intel_cpu {
class MKLDNNExecNetwork;
class MKLDNNAsyncInferRequest;
class ExecNetwork;
class AsyncInferRequest;
class MKLDNNInferRequestBase : public InferenceEngine::IInferRequestInternal {
class InferRequestBase : public InferenceEngine::IInferRequestInternal {
public:
virtual ~MKLDNNInferRequestBase();
virtual ~InferRequestBase();
void InferImpl() override;
@ -30,7 +30,7 @@ public:
* @brief Sets the pointer to asynchronous inference request that holds this request
* @param[in] asyncRequest Pointer to asynchronous inference request
*/
void SetAsyncRequest(MKLDNNAsyncInferRequest* asyncRequest);
void SetAsyncRequest(AsyncInferRequest* asyncRequest);
/**
* @brief If `_asyncRequest` is initialized throw exception with `InferenceEngine::INFER_CANCELLED` status if inference request is canceled
@ -38,14 +38,14 @@ public:
void ThrowIfCanceled() const;
protected:
MKLDNNInferRequestBase(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
std::shared_ptr<MKLDNNExecNetwork> execNetwork_)
InferRequestBase(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
std::shared_ptr<ExecNetwork> execNetwork_)
: IInferRequestInternal(networkInputs, networkOutputs), execNetwork(execNetwork_) {}
MKLDNNInferRequestBase(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
std::shared_ptr<MKLDNNExecNetwork> execNetwork_)
InferRequestBase(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
std::shared_ptr<ExecNetwork> execNetwork_)
: IInferRequestInternal(inputs, outputs), execNetwork(execNetwork_) {}
void CreateInferRequest();
@ -55,7 +55,7 @@ protected:
virtual void initBlobs() = 0;
virtual void PushInputData() = 0;
MKLDNNGraph* graph = nullptr;
Graph* graph = nullptr;
std::unordered_map<std::string, void*> externalPtr;
private:
@ -64,34 +64,36 @@ private:
void redefineMemoryForInputNodes();
void changeDefaultPtr();
std::shared_ptr<MKLDNNExecNetwork> execNetwork;
std::shared_ptr<ExecNetwork> execNetwork;
openvino::itt::handle_t profilingTask;
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> memoryStates;
MKLDNNAsyncInferRequest* _asyncRequest = nullptr;
AsyncInferRequest* _asyncRequest = nullptr;
};
class MKLDNNLegacyInferRequest : public MKLDNNInferRequestBase {
class LegacyInferRequest : public InferRequestBase {
public:
MKLDNNLegacyInferRequest(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
std::shared_ptr<MKLDNNExecNetwork> execNetwork);
LegacyInferRequest(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
std::shared_ptr<ExecNetwork> execNetwork);
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
private:
void PushInputData() override;
void initBlobs() override;
void SetBatch(int batch = -1) override;
};
class MKLDNNInferRequest : public MKLDNNInferRequestBase {
class InferRequest : public InferRequestBase {
public:
MKLDNNInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
std::shared_ptr<MKLDNNExecNetwork> execNetwork);
InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
std::shared_ptr<ExecNetwork> execNetwork);
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
private:
void PushInputData() override;
void initBlobs() override;

View File

@ -5,7 +5,8 @@
#include "blocked_memory_desc.h"
#include "utils/general_utils.h"
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
bool BlockedMemoryDesc::isCompatibleInternal(const BlockedMemoryDesc &rhs, CmpMask cmpMask) const {
if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision())
@ -66,3 +67,6 @@ std::string BlockedMemoryDesc::serializeFormat() const {
return result.str();
}
} // namespace intel_cpu
} // namespace ov

View File

@ -6,7 +6,8 @@
#include <cpu_memory.h>
#include "dnnl_blocked_memory_desc.h"
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
static VectorDims makeRange(size_t size) {
VectorDims retVec(size, 0);
@ -308,3 +309,6 @@ MemoryDescPtr CpuBlockedMemoryDesc::cloneWithNewPrecision(const InferenceEngine:
newDesc->setPrecision(prec);
return newDesc;
}
} // namespace intel_cpu
} // namespace ov

View File

@ -22,6 +22,9 @@
namespace ov {
namespace intel_cpu {
namespace node {
class Split;
} // namespace node
class MemoryDesc;
@ -173,7 +176,7 @@ protected:
friend class BlobDumper;
// WA: optimizedNspc2Ncsp used getElementOffset inside implementation
friend class MKLDNNSplitNode;
friend class node::Split;
};
} // namespace intel_cpu

View File

@ -15,7 +15,6 @@
#include <dnnl_types.h>
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
namespace ov {
@ -90,7 +89,7 @@ BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDes
}
}
InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) {
InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const Memory &mem) {
// TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
auto& memDesc = mem.getDesc();
InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc);

View File

@ -18,7 +18,7 @@ class DnnlMemoryDesc;
class BlockedMemoryDesc;
class DnnlBlockedMemoryDesc;
class CpuBlockedMemoryDesc;
class MKLDNNMemory;
class Memory;
class MemoryDescUtils {
public:
@ -60,11 +60,11 @@ public:
static std::shared_ptr<BlockedMemoryDesc> convertToBlockedMemoryDesc(const std::shared_ptr<MemoryDesc> &desc);
/**
* @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse
* @param desc MKLDNNMemory from which will be created InferenceEngine::Blob
* @brief Creates InferenceEngine::Blob from Memory with the memory reuse
* @param desc Memory from which will be created InferenceEngine::Blob
* @return pointer to InferenceEngine::Blob
*/
static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem);
static InferenceEngine::Blob::Ptr interpretAsBlob(const Memory& mem);
/**
* @brief Converts MemoryDesc to InferenceEngine::TensorDesc

View File

@ -6,9 +6,11 @@
#include <dnnl_types.h>
#include <common/memory_desc_wrapper.hpp>
using namespace ov::intel_cpu;
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& strides)
: MemoryDesc(shape, DnnlBlocked) {
const auto ndims = shape.getRank();
@ -18,9 +20,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
}
desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims),
MKLDNNExtensionUtils::IEPrecisionToDataType(prc),
MKLDNNExtensionUtils::convertToDnnlDims(strides)};
desc = {DnnlExtensionUtils::convertToDnnlDims(dims),
DnnlExtensionUtils::IEPrecisionToDataType(prc),
DnnlExtensionUtils::convertToDnnlDims(strides)};
} else {
mkldnn::memory::dims plain_strides;
if (shape.hasZeroDims()) {
@ -34,7 +36,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
}
}
desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), MKLDNNExtensionUtils::IEPrecisionToDataType(prc), plain_strides};
desc = {DnnlExtensionUtils::convertToDnnlDims(dims), DnnlExtensionUtils::IEPrecisionToDataType(prc), plain_strides};
}
order.resize(ndims);
@ -69,13 +71,13 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
// scalar case
if (shape.getRank() == 0) {
desc.data.format_kind = dnnl_blocked;
desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc));
desc.data.data_type = memory::convert_to_c(DnnlExtensionUtils::IEPrecisionToDataType(prc));
desc.data.ndims = 1;
desc.data.dims[0] = 1;
desc.data.padded_dims[0] = 1;
desc.data.format_desc.blocking.strides[0] = 1;
desc.data.padded_offsets[0] = 0;
desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding);
desc.data.offset0 = DnnlExtensionUtils::convertToDnnlDim(offsetPadding);
return;
}
@ -99,7 +101,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined or zero blockedDims.";
}
auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims());
auto dims = DnnlExtensionUtils::convertToDnnlDims(shape.getDims());
size_t outer_ndims = dims.size();
@ -141,9 +143,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
// Fill general memory desc fields
desc.data.format_kind = dnnl_blocked;
desc.data.extra.flags = 0;
desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc));
desc.data.data_type = memory::convert_to_c(DnnlExtensionUtils::IEPrecisionToDataType(prc));
desc.data.ndims = dims.size();
desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding);
desc.data.offset0 = DnnlExtensionUtils::convertToDnnlDim(offsetPadding);
std::copy(dims.begin(), dims.end(), desc.data.dims);
if (!offsetPaddingToData.empty()) {
@ -152,14 +154,14 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
if (!inner_pad_offsets_is_zero)
IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, inner pad offsets is not zero: " << vec2str(offsetPaddingToData);
auto dnnlPaddedOffsets = MKLDNNExtensionUtils::convertToDnnlDims(offsetPaddingToData);
auto dnnlPaddedOffsets = DnnlExtensionUtils::convertToDnnlDims(offsetPaddingToData);
std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.data.padded_offsets);
} else {
std::fill(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + outer_ndims, 0);
}
std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1);
auto dnnlBlkDims = MKLDNNExtensionUtils::convertToDnnlDims(blockedDims);
auto dnnlBlkDims = DnnlExtensionUtils::convertToDnnlDims(blockedDims);
for (size_t i = 0; i < order.size(); i++) {
auto idx = order[i];
@ -184,7 +186,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
this->recomputeDefaultStrides();
} else {
for (size_t i = 0; i < outer_ndims; i++) {
auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides);
auto dnnlStrides = DnnlExtensionUtils::convertToDnnlDims(strides);
dnn_blk_desc.strides[order[i]] = dnnlStrides[i];
}
initStrides();
@ -201,7 +203,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory:
if (format == memory::format_tag::x && shape.getRank() == 0) {
desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format);
} else {
desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dims), dataType, format);
desc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(dims), dataType, format);
}
VectorDims perm;
@ -313,7 +315,7 @@ static VectorDims extractOrder(const mkldnn::memory::desc& desc) {
}
DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc) :
MemoryDesc(MKLDNNExtensionUtils::convertToVectorDims(mdesc.dims()), DnnlBlocked) {
MemoryDesc(DnnlExtensionUtils::convertToVectorDims(mdesc.dims()), DnnlBlocked) {
desc = mdesc;
if (desc.data.format_kind == dnnl::impl::format_kind::any)
IE_THROW(Unexpected) << "Memory format any is prohibited!";
@ -400,7 +402,7 @@ bool DnnlBlockedMemoryDesc::isTailCFormat() const {
static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& desc, const VectorDims& dims, const VectorDims& order) {
using namespace dnnl::impl::utils;
auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims);
auto mklDims = DnnlExtensionUtils::convertToDnnlDims(dims);
const auto offsetPadding = desc.data.offset0;
mkldnn::memory::desc newMklDesc = desc;
array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size());
@ -562,7 +564,7 @@ void DnnlBlockedMemoryDesc::initBlockDims() {
}
// blocked dims
// [dims via new_outer_order with auto pad] U [inner_blk_dims]
VectorDims outer_block_dims = MKLDNNExtensionUtils::convertToVectorDims(dims);
VectorDims outer_block_dims = DnnlExtensionUtils::convertToVectorDims(dims);
for (size_t i = 0; i < outer_block_dims.size(); i++) {
if (outer_block_dims[i] != Shape::UNDEFINED_DIM) {
outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
@ -650,7 +652,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc,
if (!descWrapped.is_blocking_desc())
IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc";
if (!shape.isCompatible(MKLDNNExtensionUtils::convertToVectorDims(mdesc.dims()))) {
if (!shape.isCompatible(DnnlExtensionUtils::convertToVectorDims(mdesc.dims()))) {
IE_THROW(ParameterMismatch) << "Can not create DnnlBlockedMemoryDesc. memory::desc dims: " << vec2str(mdesc.dims()) <<
" are incompatible with provided shape: " << shape.toString() << ".";
}
@ -670,3 +672,6 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc,
std::string DnnlBlockedMemoryDesc::serializeFormat() const {
return BlockedMemoryDesc::serializeFormat();
}
} // namespace intel_cpu
} // namespace ov

View File

@ -6,7 +6,7 @@
#include "blocked_memory_desc.h"
#include <cpu_memory.h>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
namespace ov {
namespace intel_cpu {
@ -40,7 +40,7 @@ public:
}
size_t getOffsetPadding() const override {
return MKLDNNExtensionUtils::convertToDim(desc.data.offset0);
return DnnlExtensionUtils::convertToDim(desc.data.offset0);
}
const VectorDims& getStrides() const override {
@ -93,8 +93,8 @@ private:
void recomputeDefaultStrides();
friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc);
friend std::shared_ptr<DnnlBlockedMemoryDesc> MKLDNNExtensionUtils::makeUndefinedDesc(const mkldnn::memory::desc &desc, const Shape& shape);
friend DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc);
friend std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(const mkldnn::memory::desc &desc, const Shape& shape);
friend class MemoryDescUtils;
};

View File

@ -3,7 +3,7 @@
//
#include "dnnl_memory_desc.h"
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include <common/memory_desc_wrapper.hpp>
#include "mkldnn/ie_mkldnn.h"
@ -11,7 +11,7 @@ namespace ov {
namespace intel_cpu {
DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) :
MemoryDesc(Shape(MKLDNNExtensionUtils::convertToVectorDims(desc.dims())), Mkldnn), desc(desc) {
MemoryDesc(Shape(DnnlExtensionUtils::convertToVectorDims(desc.dims())), Mkldnn), desc(desc) {
if (desc.data.format_kind == dnnl::impl::format_kind::any)
IE_THROW(Unexpected) << "Memory format any is prohibited!";
}
@ -21,7 +21,7 @@ bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const {
}
size_t DnnlMemoryDesc::getCurrentMemSizeImp() const {
return MKLDNNExtensionUtils::getMemSizeForDnnlDesc(desc);
return DnnlExtensionUtils::getMemSizeForDnnlDesc(desc);
}
size_t DnnlMemoryDesc::getElementOffset(size_t elemNumber) const {
@ -62,7 +62,7 @@ bool DnnlMemoryDesc::isDefinedImp() const {
}
InferenceEngine::Precision DnnlMemoryDesc::getPrecision() const {
return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type());
return DnnlExtensionUtils::DataTypeToIEPrecision(desc.data_type());
}
MemoryDescPtr DnnlMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const {

View File

@ -5,7 +5,7 @@
#pragma once
#include "cpu_blocked_memory_desc.h"
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
namespace ov {
namespace intel_cpu {
@ -56,7 +56,7 @@ protected:
mkldnn::memory::desc desc;
void setPrecision(InferenceEngine::Precision prc) override {
desc.data.data_type = static_cast<dnnl_data_type_t>(MKLDNNExtensionUtils::IEPrecisionToDataType(prc));
desc.data.data_type = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(prc));
}
private:
@ -69,7 +69,7 @@ private:
bool isDefinedImp() const override;
MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override;
friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc);
friend DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc);
};
} // namespace intel_cpu

View File

@ -3,7 +3,7 @@
//
#include "memory_state.h"
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include "blob_factory.hpp"
using namespace InferenceEngine;
@ -11,7 +11,7 @@ using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
void MKLDNNVariableState::Reset() {
void VariableState::Reset() {
std::memset(state->buffer(), 0, state->byteSize());
}

View File

@ -15,10 +15,10 @@
namespace ov {
namespace intel_cpu {
class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal {
class VariableState : public InferenceEngine::IVariableStateInternal {
public:
MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) :
InferenceEngine::IVariableStateInternal{name} {
VariableState(std::string name, MemoryPtr storage)
: InferenceEngine::IVariableStateInternal{name} {
state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->getDesc()));
state->allocate();
cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize());

View File

@ -4,9 +4,10 @@
#include "iml_type_mapper.h"
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
impl_desc_type ov::intel_cpu::parse_impl_name(std::string impl_desc_name) {
impl_desc_type parse_impl_name(std::string impl_desc_name) {
impl_desc_type res = impl_desc_type::unknown;
#define REPLACE_WORD(_wrd, _sub) auto pos = impl_desc_name.find(#_wrd); \
@ -55,7 +56,7 @@ impl_desc_type ov::intel_cpu::parse_impl_name(std::string impl_desc_name) {
return res;
}
const char* ov::intel_cpu::impl_type_to_string(impl_desc_type type) {
const char* impl_type_to_string(impl_desc_type type) {
#define CASE(_type) do { \
if (type == _type) return #_type; \
} while (0)
@ -111,3 +112,6 @@ const char* ov::intel_cpu::impl_type_to_string(impl_desc_type type) {
#undef CASE
return "unknown";
}
} // namespace intel_cpu
} // namespace ov

View File

@ -97,4 +97,3 @@ impl_desc_type parse_impl_name(std::string impl_desc_name);
} // namespace intel_cpu
} // namespace ov

View File

@ -46,7 +46,7 @@
#include "nodes/shuffle_channels.h"
#include "nodes/reference.h"
#include "nodes/fake_quantize.h"
#include "extension_utils.h"
#include "dnnl_extension_utils.h"
#include "mkldnn/iml_type_mapper.h"
#include "nodes/common/cpu_memcpy.h"
@ -63,17 +63,20 @@
#include "memory_desc/dnnl_blocked_memory_desc.h"
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace openvino;
using namespace ov::intel_cpu::node;
using namespace InferenceEngine::details;
MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
namespace ov {
namespace intel_cpu {
Node::NodesFactory & Node::factory() {
static NodesFactory factoryInstance;
return factoryInstance;
}
MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
Node::Node(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache)
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
@ -138,7 +141,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
}
}
std::string inputMemoryFormats = ngraph::getMKLDNNInputMemoryFormats(op);
std::string inputMemoryFormats = getInputMemoryFormats(op);
if (!inputMemoryFormats.empty()) {
std::istringstream stream(inputMemoryFormats);
std::string str;
@ -149,7 +152,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
}
}
std::string outputMemoryFormats = ngraph::getMKLDNNOutputMemoryFormats(op);
std::string outputMemoryFormats = getOutputMemoryFormats(op);
if (!outputMemoryFormats.empty()) {
std::istringstream stream(outputMemoryFormats);
std::string str;
@ -166,14 +169,14 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
}
}
MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
Node::Node(const std::string& type, const std::string& name, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache)
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
weightCache(w_cache), engine(eng), fusingPort(-1), name(name), typeStr(type),
type(TypeFromName(type)), profiling(name) {
// TODO [NM]: What about filling inDims and outDims?
}
void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
void Node::addEdge(const EdgeWeakPtr& edge) {
auto edgePtr = edge.lock();
if (!edgePtr)
return;
@ -186,7 +189,7 @@ void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
childPtr->parentEdges.push_back(edge);
}
void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) {
void Node::removeEdge(const EdgeWeakPtr& edge) {
auto edgePtr = edge.lock();
if (!edgePtr)
return;
@ -210,7 +213,7 @@ void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) {
}
}
void MKLDNNNode::remove() {
void Node::remove() {
auto parent_edges = parentEdges;
for (const auto &parentEdge : parent_edges) {
removeEdge(parentEdge);
@ -221,7 +224,7 @@ void MKLDNNNode::remove() {
}
}
bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const {
bool Node::isEdgesEmpty(const std::vector<EdgeWeakPtr>& edges) const {
for (auto &edge : edges) {
if (edge.lock())
return false;
@ -229,7 +232,7 @@ bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const
return true;
}
void MKLDNNNode::createPrimitive() {
void Node::createPrimitive() {
if (inputShapesDefined() && isExecutable()) {
if (needPrepareParams()) {
prepareParams();
@ -238,11 +241,11 @@ void MKLDNNNode::createPrimitive() {
}
}
void MKLDNNNode::selectOptimalPrimitiveDescriptor() {
void Node::selectOptimalPrimitiveDescriptor() {
selectPreferPrimitiveDescriptor(getPrimitivesPriority(), false);
}
void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs) {
void Node::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs) {
for (auto& type : priority) {
int selectedPrimitive = -1;
int equalsFormatCount = -1;
@ -295,7 +298,7 @@ void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_typ
selectPrimitiveDescriptorByIndex(0);
}
bool MKLDNNNode::canBeInPlace() const {
bool Node::canBeInPlace() const {
// TODO [DS]: enable inPlace for dynamic shapes
if (isDynamicNode()) {
return false;
@ -306,7 +309,7 @@ bool MKLDNNNode::canBeInPlace() const {
return false;
// TODO: we need to extend this logic to properly handle all possible inplace conflicts
if (getParentEdges().size() == 1 && getParentEdgeAt(0)->getParent()->getType() == Reshape) {
if (getParentEdges().size() == 1 && getParentEdgeAt(0)->getParent()->getType() == Type::Reshape) {
auto reshapeNode = getParentEdgeAt(0)->getParent();
if (reshapeNode->getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1)
return false;
@ -321,37 +324,37 @@ bool MKLDNNNode::canBeInPlace() const {
return true;
}
void MKLDNNNode::resolveInPlaceEdges() {
void Node::resolveInPlaceEdges() {
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
if (!selected_pd)
IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName();
for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) {
auto parentEdge = getParentEdgeAt(i);
if (parentEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace() < 0)
if (parentEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace() < 0)
continue;
auto memMgr = parentEdge->getMemory().getDnnlMemoryMngr();
parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
parentEdge->getMemoryPtr().reset(new Memory(getEngine()));
parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr);
parentEdge->changeStatus(MKLDNNEdge::Status::Allocated);
parentEdge->changeStatus(Edge::Status::Allocated);
}
for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) {
auto childEdge = getChildEdgeAt(i);
if (childEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace() < 0)
if (childEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace() < 0)
continue;
auto memMgr = childEdge->getMemory().getDnnlMemoryMngr();
childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
childEdge->getMemoryPtr().reset(new Memory(getEngine()));
childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr);
childEdge->changeStatus(MKLDNNEdge::Status::Allocated);
childEdge->changeStatus(Edge::Status::Allocated);
}
}
MemoryDescPtr MKLDNNNode::getBaseMemDescAtInputPort(size_t portNum) const {
MemoryDescPtr Node::getBaseMemDescAtInputPort(size_t portNum) const {
if (auto primDesc = getSelectedPrimitiveDescriptor()) {
const auto& inConfs = primDesc->getConfig().inConfs;
if (inConfs.size() < portNum) {
@ -362,7 +365,7 @@ MemoryDescPtr MKLDNNNode::getBaseMemDescAtInputPort(size_t portNum) const {
IE_THROW() << "Can't get input memory desc, primitive descriptor is not selected";
}
MemoryDescPtr MKLDNNNode::getBaseMemDescAtOutputPort(size_t portNum) const {
MemoryDescPtr Node::getBaseMemDescAtOutputPort(size_t portNum) const {
if (auto primDesc = getSelectedPrimitiveDescriptor()) {
const auto& outConfs = primDesc->getConfig().outConfs;
if (outConfs.size() < portNum) {
@ -373,7 +376,7 @@ MemoryDescPtr MKLDNNNode::getBaseMemDescAtOutputPort(size_t portNum) const {
IE_THROW() << "Can't get output memory desc, primitive descriptor is not selected";
}
std::string MKLDNNNode::getPrimitiveDescriptorType() {
std::string Node::getPrimitiveDescriptorType() {
auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor();
impl_desc_type type = impl_desc_type::undef;
@ -442,7 +445,7 @@ std::string MKLDNNNode::getPrimitiveDescriptorType() {
return str_type;
}
const MKLDNNEdgePtr MKLDNNNode::getParentEdgeAt(size_t idx) const {
const EdgePtr Node::getParentEdgeAt(size_t idx) const {
if (idx >= parentEdges.size())
IE_THROW() << "Node " << getName() << " contains less parent edges than " << idx;
auto parentEdgePtr = parentEdges[idx].lock();
@ -451,7 +454,7 @@ const MKLDNNEdgePtr MKLDNNNode::getParentEdgeAt(size_t idx) const {
return parentEdgePtr;
}
const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const {
const EdgePtr Node::getChildEdgeAt(size_t idx) const {
if (idx >= childEdges.size())
IE_THROW() << "Node " << getName() << " contains less child edges than " << idx;
auto childEdgePtr = childEdges[idx].lock();
@ -460,11 +463,11 @@ const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const {
return childEdgePtr;
}
const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) const {
const std::vector<EdgePtr> Node::getParentEdgesAtPort(size_t idx) const {
if (idx >= inputShapes.size())
IE_THROW() << "Node " << getName() << " contains less input ports than " << idx;
std::vector<MKLDNNEdgePtr> res;
std::vector<EdgePtr> res;
for (auto &edge_w : parentEdges) {
auto edge = edge_w.lock();
if (!edge)
@ -474,11 +477,11 @@ const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) co
return res;
}
const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) const {
const std::vector<EdgePtr> Node::getChildEdgesAtPort(size_t idx) const {
if (idx >= outputShapes.size())
IE_THROW() << "Node " << getName() << " contains less output ports than " << idx;
std::vector<MKLDNNEdgePtr> res;
std::vector<EdgePtr> res;
for (auto &edge_w : childEdges) {
auto edge = edge_w.lock();
if (!edge)
@ -489,7 +492,7 @@ const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) con
}
std::vector<memory::format_tag> MKLDNNNode::getAvailableFormatsForDims(const Shape &dims) const {
std::vector<memory::format_tag> Node::getAvailableFormatsForDims(const Shape &dims) const {
if (dims.getRank() == 0)
return {memory::format_tag::x};
else if (dims.getRank() == 1)
@ -506,13 +509,13 @@ std::vector<memory::format_tag> MKLDNNNode::getAvailableFormatsForDims(const Sha
return {memory::format_tag::any};
}
void MKLDNNNode::execute(mkldnn::stream strm) {
void Node::execute(mkldnn::stream strm) {
if (prim) {
(*prim).execute(strm, primArgs);
}
}
void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
void Node::executeDynamic(mkldnn::stream strm) {
if (needShapeInfer()) {
redefineOutputMemory(shapeInfer());
}
@ -527,7 +530,7 @@ void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
updateLastInputDims();
}
void MKLDNNNode::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
void Node::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
if (newOutputShapes.size() != outputShapes.size()) {
IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName();
}
@ -551,7 +554,7 @@ void MKLDNNNode::redefineOutputMemory(const std::vector<VectorDims> &newOutputSh
}
}
void MKLDNNNode::initSupportedPrimitiveDescriptors() {
void Node::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -602,11 +605,11 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNNode::filterSupportedPrimitiveDescriptors() {
void Node::filterSupportedPrimitiveDescriptors() {
// Compare by format tag
auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool {
auto fmt_tdesc = DnnlBlockedMemoryDesc(desc.getShape(),
MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()),
DnnlExtensionUtils::IEPrecisionToDataType(desc.getPrecision()),
fmt);
return desc.isCompatible(fmt_tdesc);
};
@ -636,7 +639,7 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() {
}
}
void MKLDNNNode::initDescriptor(const NodeConfig& config) {
void Node::initDescriptor(const NodeConfig& config) {
if (!getSelectedPrimitiveDescriptor()) {
return;
}
@ -717,7 +720,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) {
selectedPD->setConfig(rightConfig);
}
void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
void Node::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
for (size_t i = 0; i < getChildEdges().size(); i++) {
auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->isAllocated())
@ -742,17 +745,17 @@ void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
// TODO [DS]: internal blobs should be removed or rewritten using Memory object
auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc());
MKLDNNMemory memory{ engine };
Memory memory{ engine };
memory.Create(newDesc, internalBlob->buffer());
MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine));
MemoryPtr _ptr = MemoryPtr(new Memory(engine));
_ptr->Create(*intDescs[i]);
_ptr->SetData(memory);
return _ptr;
};
MKLDNNMemoryPtr ptr;
MemoryPtr ptr;
if (weightCache != nullptr) {
const uint64_t data_hash = weightCache->GetHashFunc().hash(
internalBlob->buffer(), internalBlob->byteSize());
@ -770,7 +773,7 @@ void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
}
}
bool MKLDNNNode::isInPlace() {
bool Node::isInPlace() {
if (inplace == InPlaceType::Unknown) {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
@ -795,9 +798,9 @@ bool MKLDNNNode::isInPlace() {
return inplace == InPlaceType::InPlace;
}
bool MKLDNNNode::isConstant() {
bool Node::isConstant() {
if (constant == ConstantType::Unknown) {
std::vector<MKLDNNNodePtr> checkNodes;
std::vector<NodePtr> checkNodes;
for (size_t i = 0; i < getChildEdges().size(); i++) {
checkNodes.push_back(getChildEdgeAt(i)->getChild());
}
@ -822,7 +825,7 @@ bool MKLDNNNode::isConstant() {
return constant == ConstantType::Const;
}
MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes) {
Node::ConstantType Node::checkConstant(LOOK look, std::vector<NodePtr>& checkNodes) {
if (constant == ConstantType::Unknown) {
if (look == LOOK_DOWN) {
for (size_t i = 0; i < getChildEdges().size(); i++) {
@ -839,7 +842,7 @@ MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNN
return constant;
}
void MKLDNNNode::addOriginalLayer(const std::string& layerName) {
void Node::addOriginalLayer(const std::string& layerName) {
if (layerName.empty()) return;
if (originalLayers.empty()) {
originalLayers = layerName;
@ -848,7 +851,7 @@ void MKLDNNNode::addOriginalLayer(const std::string& layerName) {
}
}
void MKLDNNNode::cleanup() {
void Node::cleanup() {
internalBlobs.clear();
for (auto it : fusedWith) {
@ -860,7 +863,7 @@ void MKLDNNNode::cleanup() {
}
}
const std::vector<impl_desc_type>& MKLDNNNode::getPrimitivesPriority() {
const std::vector<impl_desc_type>& Node::getPrimitivesPriority() {
std::vector<impl_desc_type> priorities = {
impl_desc_type::unknown,
impl_desc_type::brgconv_avx512_amx_1x1,
@ -903,7 +906,7 @@ const std::vector<impl_desc_type>& MKLDNNNode::getPrimitivesPriority() {
return implPriorities;
}
PortDescBasePtr MKLDNNNode::getConsistentInputDesc(const NodeConfig &config, size_t idx) const {
PortDescBasePtr Node::getConsistentInputDesc(const NodeConfig &config, size_t idx) const {
int num = getParentEdgeAt(idx)->getInputNum();
auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor();
if (!selectedPD)
@ -937,7 +940,7 @@ PortDescBasePtr MKLDNNNode::getConsistentInputDesc(const NodeConfig &config, siz
return config.inConfs[idx].getPortDesc();
}
PortDescBasePtr MKLDNNNode::getConsistentOutputDesc(const NodeConfig &config, size_t idx) const {
PortDescBasePtr Node::getConsistentOutputDesc(const NodeConfig &config, size_t idx) const {
int num = getChildEdgeAt(idx)->getOutputNum();
auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor();
if (!selectedPD)
@ -971,7 +974,7 @@ PortDescBasePtr MKLDNNNode::getConsistentOutputDesc(const NodeConfig &config, si
return config.outConfs[idx].getPortDesc();
}
void MKLDNNNode::initOptimalPrimitiveDescriptor() {
void Node::initOptimalPrimitiveDescriptor() {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
@ -996,12 +999,12 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
config.outConfs[i].setMemDesc(outPortDesc->getMemDesc());
}
}
if (getType() != RNNSeq && getType() != RNNCell) {
if (getType() != Type::RNNSeq && getType() != Type::RNNCell) {
initDescriptor(config);
}
}
bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const {
bool Node::isConfigDefined(const NodeConfig &config) const {
for (const auto& configs : {config.inConfs, config.outConfs}) {
for (const auto &dc : configs) {
if (!dc.getMemDesc()->isDefined())
@ -1011,26 +1014,26 @@ bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const {
return true;
}
MemoryDescPtr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
MemoryDescPtr Node::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
if (getInputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.src_desc(idx), getInputShapeAtPort(idx));
return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.src_desc(idx), getInputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx));
return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx));
}
MemoryDescPtr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
MemoryDescPtr Node::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
if (getOutputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(idx), getOutputShapeAtPort(idx));
return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(idx), getOutputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx));
return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx));
}
int MKLDNNNode::batchToProcess() const {
int Node::batchToProcess() const {
return dynBatchLim == 0 ? getMaxBatch() : std::min<int>(getMaxBatch(), dynBatchLim);
}
// TODO [DS]: how we should process this for dynamic shape?
size_t MKLDNNNode::getMaxBatch() const {
size_t Node::getMaxBatch() const {
// FIXME: batch != 0 dims number
if (!inputShapes.empty()) {
if (inputShapes[0].getRank())
@ -1047,7 +1050,7 @@ size_t MKLDNNNode::getMaxBatch() const {
return 0;
}
void MKLDNNNode::setDynamicBatchLim(int lim) {
void Node::setDynamicBatchLim(int lim) {
dynBatchLim = lim;
auto setDynamicBatch = [this](int argType, int newBatch) {
@ -1071,9 +1074,9 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
}
}
void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
void Node::appendPostOpArgs(const mkldnn::primitive_attr& attr,
std::unordered_map<int, mkldnn::memory>& primArgs,
const std::vector<MKLDNNMemoryPtr>& postOpsArgs) {
const std::vector<MemoryPtr>& postOpsArgs) {
constexpr size_t maxPrimArgsCapacity = 32;
auto post_ops = attr.get_post_ops();
int idx = 0;
@ -1099,7 +1102,7 @@ void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
}
}
bool MKLDNNNode::isFusedWith(Type fusedNodeType) const {
bool Node::isFusedWith(Type fusedNodeType) const {
for (auto fusedNode : fusedWith) {
if (fusedNode->type == fusedNodeType)
return true;
@ -1108,7 +1111,7 @@ bool MKLDNNNode::isFusedWith(Type fusedNodeType) const {
return false;
}
InferenceEngine::Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
InferenceEngine::Layout Node::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
switch (dims.size()) {
case 0:
return InferenceEngine::Layout::SCALAR;
@ -1129,41 +1132,41 @@ InferenceEngine::Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool
}
}
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& postOpsMem) {
void Node::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MemoryPtr>& postOpsMem) {
IE_THROW() << "Fusing of " << NameFromType(this->getType()) << " operation is not implemented";
}
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<const void*>& postOpsMem) {
void Node::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<const void*>& postOpsMem) {
IE_THROW() << "Fusing of " << NameFromType(this->getType()) << " operation is not implemented";
}
void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector<size_t>& binaryShape, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
void Node::appendBinPostOps(mkldnn::post_ops& ops, const std::vector<size_t>& binaryShape, std::vector<MemoryPtr>& binaryPostOpsMem) {
IE_THROW() << "Binary fusing of " << NameFromType(this->getType()) << " operation is not implemented";
}
std::vector<InferenceEngine::Precision> MKLDNNNode::getInputPrecisions() const {
std::vector<InferenceEngine::Precision> Node::getInputPrecisions() const {
std::vector<InferenceEngine::Precision> inputPrecisions;
for (size_t i = 0; i < getParentEdges().size(); i++) {
auto parentEdge = getParentEdgeAt(i);
if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) {
inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType())));
if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) {
inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType())));
}
}
return inputPrecisions;
}
std::vector<InferenceEngine::Precision> MKLDNNNode::getOutputPrecisions() const {
std::vector<InferenceEngine::Precision> Node::getOutputPrecisions() const {
std::vector<InferenceEngine::Precision> outputPrecisions;
for (size_t i = 0; i < getChildEdges().size(); i++) {
auto childEdge = getChildEdgeAt(i);
if (childEdge && childEdge->getStatus() == MKLDNNEdge::Status::Validated) {
outputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->GetDataType())));
if (childEdge && childEdge->getStatus() == Edge::Status::Validated) {
outputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->GetDataType())));
}
}
return outputPrecisions;
}
InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const {
InferenceEngine::Precision Node::getRuntimePrecision() const {
// Base implementation consider precision only on data path and
// assumes it is placed on 0-th port (which is true for almost all layers)
InferenceEngine::Precision runtimePrecision = Precision::UNSPECIFIED;
@ -1180,8 +1183,8 @@ InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const {
return runtimePrecision;
}
MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) {
Node* Node::NodesFactory::create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
const ExtensionManager::Ptr& extMgr, WeightsSharing::Ptr &w_cache) {
// getExceptionDescWithoutStatus removes redundant information from the exception message. For instance, the NotImplemented
// exception is generated in the form: full_path_to_src_file:line_number [ NOT_IMPLEMENTED ] reason.
// An example for gather node:
@ -1200,17 +1203,17 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
}
return desc;
};
MKLDNNNode *newNode = nullptr;
Node *newNode = nullptr;
std::string errorMessage;
{
std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(intel_cpu, Generic, op, eng, w_cache));
std::unique_ptr<Node> ol(createNodeIfRegistered(intel_cpu, Type::Generic, op, eng, w_cache));
if (ol != nullptr && ol->created(extMgr))
newNode = ol.release();
}
if (newNode == nullptr) {
try {
std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, eng, w_cache));
std::unique_ptr<Node> ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, eng, w_cache));
if (ol != nullptr && ol->created(extMgr))
newNode = ol.release();
} catch (const InferenceEngine::Exception& ex) {
@ -1224,7 +1227,7 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
if (newNode == nullptr) {
try {
std::unique_ptr<MKLDNNNode> ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage));
std::unique_ptr<Node> ol(new Reference(op, eng, w_cache, errorMessage));
if (ol != nullptr && ol->created(extMgr))
newNode = ol.release();
} catch (const InferenceEngine::Exception& ex) {
@ -1241,11 +1244,11 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
// WA-start : TI node requires all attributes to construct internal subgpath
// including extManager, socket and mkldnn::eng.
if (newNode) {
if (newNode->getType() == TensorIterator) {
if (auto ti = dynamic_cast<MKLDNNTensorIteratorNode*>(newNode))
if (newNode->getType() == Type::TensorIterator) {
if (auto ti = dynamic_cast<TensorIterator*>(newNode))
ti->setExtManager(extMgr);
} else if (newNode->getType() == If) {
if (auto ifNode = dynamic_cast<MKLDNNIfNode*>(newNode))
} else if (newNode->getType() == Type::If) {
if (auto ifNode = dynamic_cast<If*>(newNode))
ifNode->setExtManager(extMgr);
}
}
@ -1262,14 +1265,14 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
return newNode;
}
bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
bool Node::canBePerformedAsScaleShift(const Node *parentNode) const {
IE_ASSERT(parentNode);
size_t fusingPort = 0;
const size_t channelAxis = parentNode->getFusingAxis();
for (size_t i = 0; i < getParentEdges().size(); i++) {
MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get();
Node *node = getParentEdgesAtPort(i)[0]->getParent().get();
if (node == nullptr) {
IE_THROW() << "Cannot get parent node for " << getName() << " on " << i << " port";
}
@ -1277,7 +1280,7 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
fusingPort = i;
continue;
}
if (node->getType() != Input || !node->isConstant()) {
if (node->getType() != Type::Input || !node->isConstant()) {
return false;
}
}
@ -1296,54 +1299,59 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
};
const auto isConvertablePowerStatic = [&]() {
if (getAlgorithm() == EltwisePowerStatic) {
const auto eltwise = dynamic_cast<const MKLDNNEltwiseNode *>(this);
if (getAlgorithm() == Algorithm::EltwisePowerStatic) {
const auto eltwise = dynamic_cast<const Eltwise *>(this);
if (!eltwise) {
IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
IE_THROW() << "Cannot cast " << getName() << " to Eltwise";
}
return eltwise->getAlpha() == 1.0f;
}
return false;
};
return (one_of(getAlgorithm(), EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd) && isBroadcastableToDataInput())
return (one_of(getAlgorithm(), Algorithm::EltwiseAdd,
Algorithm::EltwiseMultiply,
Algorithm::EltwiseSubtract,
Algorithm::EltwiseDivide,
Algorithm::EltwisePrelu,
Algorithm::EltwiseMulAdd) && isBroadcastableToDataInput())
|| isConvertablePowerStatic();
}
// @todo shifts for Subtract and scales for Divide are replaced with
// Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops
// This can be avoided after dephwise post ops are gone
std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
std::pair<std::vector<float>, std::vector<float>> Node::getScalesAndShifts(const Node *parentNode) const {
std::vector<float> scales, shifts;
const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
const auto fillValuesFrom = [&](const NodePtr& constInput, std::vector<float>& buffer) {
auto *constInputNode = dynamic_cast<node::Input *>(constInput.get());
if (!constInputNode) {
IE_THROW() << "Cannot cast " << constInput->getName() << " to MKLDNNInputNode";
IE_THROW() << "Cannot cast " << constInput->getName() << " to Input";
}
auto constBlob = constInputNode->getMemoryPtr();
const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
buffer.resize(elementsCount);
cpu_convert(constBlob->GetPtr(),
&buffer[0],
MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
Precision::FP32,
elementsCount);
};
const auto constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
if (one_of(getAlgorithm(), Algorithm::EltwiseMultiply, Algorithm::EltwiseDivide, Algorithm::EltwisePrelu)) {
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
} else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
} else if (one_of(getAlgorithm(), Algorithm::EltwiseAdd, Algorithm::EltwiseSubtract)) {
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
} else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
} else if (one_of(getAlgorithm(), Algorithm::EltwiseMulAdd)) {
fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
} else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
} else if (one_of(getAlgorithm(), Algorithm::EltwisePowerStatic)) {
const auto power = dynamic_cast<const Eltwise *>(this);
if (!power) {
IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
IE_THROW() << "Cannot cast " << getName() << " to Eltwise";
}
scales.push_back(power->getBeta());
shifts.push_back(power->getGamma());
@ -1352,20 +1360,20 @@ std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts
}
switch (getAlgorithm()) {
case EltwiseAdd: {
case Algorithm::EltwiseAdd: {
scales.resize(shifts.size(), 1.0f);
break;
}
case EltwiseSubtract: {
case Algorithm::EltwiseSubtract: {
scales.resize(shifts.size(), 1.0f);
std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
break;
}
case EltwiseMultiply: {
case Algorithm::EltwiseMultiply: {
shifts.resize(scales.size(), 0.0f);
break;
}
case EltwiseDivide: {
case Algorithm::EltwiseDivide: {
shifts.resize(scales.size(), 0.0f);
std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
break;
@ -1376,21 +1384,21 @@ std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts
return {scales, shifts};
}
bool MKLDNNNode::isInputTensorAtPortEmpty(size_t port) const {
bool Node::isInputTensorAtPortEmpty(size_t port) const {
if (inputShapes.size() <= port) {
IE_THROW() << "Incorrect input port number for node " << getName();
}
return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
}
bool MKLDNNNode::isOutputTensorAtPortEmpty(size_t port) const {
bool Node::isOutputTensorAtPortEmpty(size_t port) const {
if (outputShapes.size() <= port) {
IE_THROW() << "Incorrect output port number for node " << getName();
}
return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
}
bool MKLDNNNode::hasEmptyInputTensors() const {
bool Node::hasEmptyInputTensors() const {
for (size_t i = 0; i < getParentEdges().size(); i++) {
if (isInputTensorAtPortEmpty(i))
return true;
@ -1398,7 +1406,7 @@ bool MKLDNNNode::hasEmptyInputTensors() const {
return false;
}
bool MKLDNNNode::hasEmptyOutputTensors() const {
bool Node::hasEmptyOutputTensors() const {
for (size_t i = 0; i < outputShapes.size(); i++) {
if (isOutputTensorAtPortEmpty(i))
return true;
@ -1406,7 +1414,7 @@ bool MKLDNNNode::hasEmptyOutputTensors() const {
return false;
}
bool MKLDNNNode::inputShapesDefined() const {
bool Node::inputShapesDefined() const {
for (size_t i = 0; i < getParentEdges().size(); i++) {
if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined()) {
return false;
@ -1415,7 +1423,7 @@ bool MKLDNNNode::inputShapesDefined() const {
return true;
}
bool MKLDNNNode::outputShapesDefined() const {
bool Node::outputShapesDefined() const {
for (size_t i = 0; i < outputShapes.size(); i++) {
if (!getChildEdgesAtPort(i)[0]->getMemory().getDesc().isDefined()) {
return false;
@ -1424,15 +1432,15 @@ bool MKLDNNNode::outputShapesDefined() const {
return true;
}
bool MKLDNNNode::shapesDefined() const {
bool Node::shapesDefined() const {
return inputShapesDefined() && outputShapesDefined();
}
bool MKLDNNNode::needPrepareParams() const {
bool Node::needPrepareParams() const {
return inputShapesModified();
}
bool MKLDNNNode::inputShapesModified() const {
bool Node::inputShapesModified() const {
if (lastInputDims.size() != getParentEdges().size()) {
if (lastInputDims.empty())
return true;
@ -1446,16 +1454,16 @@ bool MKLDNNNode::inputShapesModified() const {
return false;
}
bool MKLDNNNode::needShapeInfer() const {
bool Node::needShapeInfer() const {
return inputShapesModified();
}
std::vector<VectorDims> MKLDNNNode::shapeInfer() const {
std::vector<VectorDims> Node::shapeInfer() const {
return shapeInferGeneric();
}
std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<ov::StaticShape>& input_shapes,
uint32_t input_value_port_mask) const {
std::vector<VectorDims> Node::shapeInferGeneric(const std::vector<StaticShape>& input_shapes,
uint32_t input_value_port_mask) const {
// collect input values
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> input_values;
if (input_value_port_mask) {
@ -1480,19 +1488,19 @@ std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<ov::Stat
}
// call shape inference API
std::vector<ov::StaticShape> output_shapes = shapeInference->infer(input_shapes, input_values);
std::vector<StaticShape> output_shapes = shapeInference->infer(input_shapes, input_values);
std::vector<VectorDims> result(output_shapes.size());
std::transform(output_shapes.begin(), output_shapes.end(), result.begin(), [](const ov::StaticShape& s) {
std::transform(output_shapes.begin(), output_shapes.end(), result.begin(), [](const StaticShape& s) {
return s.to_shape();
});
return result;
}
std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<Shape>& shapes,
std::vector<VectorDims> Node::shapeInferGeneric(const std::vector<Shape>& shapes,
uint32_t input_value_port_mask) const {
std::vector<ov::StaticShape> input_shapes;
std::vector<StaticShape> input_shapes;
input_shapes.reserve(shapes.size());
for (size_t i = 0; i < shapes.size(); i++)
@ -1501,8 +1509,8 @@ std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<Shape>&
return shapeInferGeneric(input_shapes, input_value_port_mask);
}
std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(uint32_t input_value_port_mask) const {
std::vector<ov::StaticShape> input_shapes;
std::vector<VectorDims> Node::shapeInferGeneric(uint32_t input_value_port_mask) const {
std::vector<StaticShape> input_shapes;
const auto & iranks = shapeInference->get_input_ranks();
input_shapes.reserve(iranks.size());
@ -1518,7 +1526,7 @@ std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(uint32_t input_value_port_
return shapeInferGeneric(input_shapes, input_value_port_mask);
}
void MKLDNNNode::updateLastInputDims() {
void Node::updateLastInputDims() {
if (lastInputDims.size() != getParentEdges().size()) {
if (!lastInputDims.empty())
IE_THROW() << "Input dims and parent edges number mismatch!";
@ -1529,23 +1537,38 @@ void MKLDNNNode::updateLastInputDims() {
lastInputDims[i] = getParentEdgesAtPort(i)[0]->getMemory().getStaticDims();
}
bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
if (node->getType() == FakeQuantize) {
bool ret = node->getAlgorithm() != FQBinarization;
bool Node::canFuseSimpleOperation(const NodePtr& node) const {
if (node->getType() == Type::FakeQuantize) {
bool ret = node->getAlgorithm() != Algorithm::FQBinarization;
for (size_t i = 1; i < node->getParentEdges().size(); i++) {
ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1;
}
return ret;
} else if (node->getType() == Eltwise) {
} else if (node->getType() == Type::Eltwise) {
return one_of(node->getAlgorithm(),
EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
Algorithm::EltwiseRelu,
Algorithm::EltwiseGelu,
Algorithm::EltwiseElu,
Algorithm::EltwiseSigmoid,
Algorithm::EltwiseClamp,
Algorithm::EltwiseTanh,
Algorithm::EltwiseSwish,
Algorithm::EltwiseHswish,
Algorithm::EltwiseMish,
Algorithm::EltwiseHsigmoid,
Algorithm::EltwiseRoundHalfToEven,
Algorithm::EltwiseRoundHalfAwayFromZero,
Algorithm::EltwiseAbs,
Algorithm::EltwiseSqrt,
Algorithm::EltwiseSoftRelu) ||
node->canBePerformedAsScaleShift(this);
}
return false;
}
void MKLDNNNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
void Node::addFusedNode(const NodePtr &fusingNode) {
fusedWith.push_back(fusingNode);
}
} // namespace intel_cpu
} // namespace ov

View File

@ -13,7 +13,7 @@
#include <caseless.hpp>
#include "cpu_memory.h"
#include "edge.h"
#include "descriptor.h"
#include "dnnl_descriptor.h"
#include "selective_build.h"
#include "mkldnn/iml_type_mapper.h"
#include "extension_mngr.h"
@ -37,9 +37,9 @@
namespace ov {
namespace intel_cpu {
using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
using NodePtr = std::shared_ptr<Node>;
using NodeConstPtr = std::shared_ptr<const Node>;
using NodeWeakPtr = std::weak_ptr<Node>;
class PortConfigurator {
public:
@ -94,10 +94,10 @@ private:
impl_desc_type implementationType;
};
class MKLDNNNode {
class Node {
public:
MKLDNNNode(const MKLDNNNode &) = delete;
MKLDNNNode & operator = (const MKLDNNNode &) = delete;
Node(const Node &) = delete;
Node & operator = (const Node &) = delete;
using AttrPtr = std::shared_ptr<mkldnn::primitive_attr>;
@ -108,12 +108,12 @@ public:
struct PerfCounters {
PerfCounters(std::string const& name)
: execute(openvino::itt::handle(name))
, getSupportedDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 0>>("MKLDNNNode::getSupportedDescriptors"))
, initSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 1>>("MKLDNNNode::initSupportedPrimitiveDescriptors"))
, filterSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 2>>("MKLDNNNode::filterSupportedPrimitiveDescriptors"))
, selectOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<MKLDNNNode, 3>>("MKLDNNNode::selectOptimalPrimitiveDescriptor"))
, createPrimitive(openvino::itt::handle<Tag<MKLDNNNode, 4>>("MKLDNNNode::createPrimitive"))
, initOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<MKLDNNNode, 5>>("MKLDNNNode::initOptimalPrimitiveDescriptor"))
, getSupportedDescriptors(openvino::itt::handle<Tag<Node, 0>>("Node::getSupportedDescriptors"))
, initSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<Node, 1>>("Node::initSupportedPrimitiveDescriptors"))
, filterSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<Node, 2>>("Node::filterSupportedPrimitiveDescriptors"))
, selectOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<Node, 3>>("Node::selectOptimalPrimitiveDescriptor"))
, createPrimitive(openvino::itt::handle<Tag<Node, 4>>("Node::createPrimitive"))
, initOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<Node, 5>>("Node::initOptimalPrimitiveDescriptor"))
{}
template<typename NodeType>
@ -138,27 +138,27 @@ public:
class NodesFactory;
static NodesFactory & factory();
virtual ~MKLDNNNode() = default;
virtual ~Node() = default;
void addEdge(const MKLDNNEdgeWeakPtr& edge);
void removeEdge(const MKLDNNEdgeWeakPtr& edge);
void addEdge(const EdgeWeakPtr& edge);
void removeEdge(const EdgeWeakPtr& edge);
virtual void cleanup();
void remove();
const std::vector<MKLDNNEdgeWeakPtr> &getParentEdges() const noexcept {
const std::vector<EdgeWeakPtr> &getParentEdges() const noexcept {
return parentEdges;
}
const std::vector<MKLDNNEdgeWeakPtr> &getChildEdges() const noexcept {
const std::vector<EdgeWeakPtr> &getChildEdges() const noexcept {
return childEdges;
}
const MKLDNNEdgePtr getParentEdgeAt(size_t idx) const;
virtual const MKLDNNEdgePtr getChildEdgeAt(size_t idx) const;
const EdgePtr getParentEdgeAt(size_t idx) const;
virtual const EdgePtr getChildEdgeAt(size_t idx) const;
const std::vector<MKLDNNEdgePtr> getParentEdgesAtPort(size_t idx) const;
const std::vector<MKLDNNEdgePtr> getChildEdgesAtPort(size_t idx) const;
const std::vector<EdgePtr> getParentEdgesAtPort(size_t idx) const;
const std::vector<EdgePtr> getChildEdgesAtPort(size_t idx) const;
bool isDropped() {
return (isEdgesEmpty(childEdges) && isEdgesEmpty(parentEdges));
@ -170,7 +170,7 @@ public:
bool isInPlace();
// must be called only after MKLDNNGraph::InitEdges()
// must be called only after Graph::InitEdges()
virtual bool isExecutable() const {
return !hasEmptyInputTensors();
}
@ -183,13 +183,13 @@ public:
static void appendPostOpArgs(const mkldnn::primitive_attr& attr,
std::unordered_map<int, mkldnn::memory>& primArgs,
const std::vector<MKLDNNMemoryPtr>& postOpsArgs);
const std::vector<MemoryPtr>& postOpsArgs);
bool isFusedWith(Type type) const;
virtual void addFusedNode(const MKLDNNNodePtr &fusingNode);
virtual void addFusedNode(const NodePtr &fusingNode);
virtual void fuseInto(MKLDNNNodePtr& parentNode) {
virtual void fuseInto(NodePtr& parentNode) {
// The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
for (int i = 0; i < getParentEdges().size(); i++) {
if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) {
@ -220,15 +220,15 @@ public:
fusedWith.clear();
}
void mergeWith(const MKLDNNNodePtr &merge) {
void mergeWith(const NodePtr &merge) {
mergedWith.push_back(merge);
}
const std::vector <MKLDNNNodePtr> &getMergeWith() {
const std::vector <NodePtr> &getMergeWith() {
return mergedWith;
}
const std::vector <MKLDNNNodePtr> &getFusedWith() {
const std::vector <NodePtr> &getFusedWith() {
return fusedWith;
}
@ -317,7 +317,7 @@ public:
selectedPrimitiveDescriptorIndex = index;
// Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection
// we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace()
// we should reset InPlace type to definite new status for node using Node::isInPlace()
inplace = InPlaceType::Unknown;
}
@ -352,7 +352,7 @@ public:
const std::vector<MemoryDescPtr>& outputDesc) {}
virtual void initDescriptor(const NodeConfig& config);
virtual bool created() const = 0;
virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) {
virtual bool created(const ExtensionManager::Ptr& extMgr) {
return created();
}
@ -422,11 +422,11 @@ public:
this->typeStr = typeStr;
}
virtual size_t descInputNumbers(MKLDNNDescriptor desc) {
virtual size_t descInputNumbers(DnnlDesriptor desc) {
return desc.inputNumbers();
}
virtual size_t descOutputNumbers(MKLDNNDescriptor desc) {
virtual size_t descOutputNumbers(DnnlDesriptor desc) {
return desc.outputNumbers();
}
@ -515,7 +515,7 @@ public:
algorithm = alg;
}
virtual bool canFuse(const MKLDNNNodePtr& node) const {
virtual bool canFuse(const NodePtr& node) const {
return false;
}
@ -523,7 +523,7 @@ public:
isInQuantizedGraph = flag;
}
bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const;
bool canBePerformedAsScaleShift(const Node *parentNode = nullptr) const;
bool isDynamicNode() const {
return isDynamic;
@ -555,24 +555,24 @@ public:
* node from which data comes
* @return pair of scales and shifts
*/
std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const MKLDNNNode *parentNode) const;
std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const Node *parentNode) const;
/**
* @brief Appends new item into ops list with the information on how the node should be executed as post operation.
* Seed node should call this routine and pass its post operations list as parameter.
* @param ops List of fused post operations
*/
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& postOpsMem);
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MemoryPtr>& postOpsMem);
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<const void*>& postOpsMem);
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MemoryPtr>& binaryPostOpsMem);
void setRuntimeCache(MultiCachePtr cache) {
rtParamsCache = cache;
}
protected:
bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
bool canFuseSimpleOperation(const NodePtr& node) const;
void setType(Type type) {
this->type = type;
@ -595,8 +595,8 @@ protected:
std::vector<Shape> inputShapes;
std::vector<Shape> outputShapes;
std::vector <MKLDNNNodePtr> fusedWith;
std::vector <MKLDNNNodePtr> mergedWith;
std::vector <NodePtr> fusedWith;
std::vector <NodePtr> mergedWith;
std::vector <impl_desc_type> implPriorities;
std::vector <mkldnn::memory::format_tag> inputMemoryFormatsFilter;
std::vector <mkldnn::memory::format_tag> outputMemoryFormatsFilter;
@ -604,8 +604,8 @@ protected:
std::string originalLayers; // contains names of the original layers separated by comma
MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
Node(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache);
Node(const std::string& type, const std::string& name, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache);
int selectedPrimitiveDescriptorIndex = -1;
bool permanent = false;
@ -624,22 +624,22 @@ protected:
InPlaceType inplace = InPlaceType::Unknown;
ConstantType constant = ConstantType::Unknown;
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
std::vector<MemoryPtr> internalBlobMemory;
std::vector<NodeDesc> supportedPrimitiveDescriptors;
std::unordered_map<int, mkldnn::memory> primArgs;
std::vector<MKLDNNMemoryPtr> postOpsArgs;
MKLDNNPrimitive prim;
std::vector<MKLDNNDescriptor> descs;
std::vector<MemoryPtr> postOpsArgs;
Primitive prim;
std::vector<DnnlDesriptor> descs;
MKLDNNWeightsSharing::Ptr weightCache;
WeightsSharing::Ptr weightCache;
Algorithm algorithm = Algorithm::Default;
bool isInQuantizedGraph = false;
friend class MKLDNNEdge;
friend class MKLDNNGraph;
friend class MKLDNNGraphOptimizer;
friend class Edge;
friend class Graph;
friend class GraphOptimizer;
void selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
bool isConfigDefined(const NodeConfig &config) const;
@ -745,8 +745,8 @@ protected:
std::shared_ptr<IShapeInfer> shapeInference;
private:
std::vector<MKLDNNEdgeWeakPtr> parentEdges;
std::vector<MKLDNNEdgeWeakPtr> childEdges;
std::vector<EdgeWeakPtr> parentEdges;
std::vector<EdgeWeakPtr> childEdges;
std::vector<InferenceEngine::Precision> originalInputPrecisions;
std::vector<InferenceEngine::Precision> originalOutputPrecisions;
@ -767,11 +767,11 @@ private:
MultiCachePtr rtParamsCache;
bool isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const;
bool isEdgesEmpty(const std::vector<EdgeWeakPtr>& edges) const;
template <class PD, class D, typename FPD>
typename std::enable_if<!std::is_same<FPD, bool>::value, PD>::type
createPd(MKLDNNDescriptor desc) {
createPd(DnnlDesriptor desc) {
std::shared_ptr<D> selected_desc_ptr = desc;
std::shared_ptr<FPD> backward_prim_desc_ptr = desc;
return PD(*selected_desc_ptr, engine, *backward_prim_desc_ptr);
@ -779,15 +779,15 @@ private:
template <class PD, class D, typename FPD>
typename std::enable_if<std::is_same<FPD, bool>::value, PD>::type
createPd(MKLDNNDescriptor desc) {
createPd(DnnlDesriptor desc) {
std::shared_ptr<D> selected_desc_ptr = desc;
return PD(*selected_desc_ptr, engine);
}
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
ConstantType checkConstant(LOOK look, std::vector<NodePtr>& checkNodes);
std::vector<VectorDims> shapeInferGeneric(const std::vector<ov::StaticShape>& input_shapes,
std::vector<VectorDims> shapeInferGeneric(const std::vector<StaticShape>& input_shapes,
uint32_t input_value_port_mask) const;
#ifdef CPU_DEBUG_CAPS
@ -804,26 +804,24 @@ constexpr uint64_t PortMask(int n, T... rest) {
return PortMask(rest...) | (1 << n);
}
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
MKLDNNNode*(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine &,
MKLDNNWeightsSharing::Ptr &)> {
class Node::NodesFactory : public openvino::cc::Factory<Type,
Node*(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine &,
WeightsSharing::Ptr &)> {
public:
NodesFactory();
MKLDNNNode* create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache);
Node* create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
const ExtensionManager::Ptr& extMgr, WeightsSharing::Ptr &w_cache);
};
template<typename MKLDNNNodeType>
struct MKLDNNNodeImpl : public MKLDNNNodeType {
MKLDNNNodeImpl(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNodeType(op, eng, cache) {
MKLDNNNodeType::perfCounters().template buildClassCounters<MKLDNNNodeType>(NameFromType(MKLDNNNodeType::getType()));
template<typename NodeType>
struct NodeImpl : public NodeType {
NodeImpl(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: NodeType(op, eng, cache) {
NodeType::perfCounters().template buildClassCounters<NodeType>(NameFromType(NodeType::getType()));
}
};
} // namespace intel_cpu
} // namespace ov
#define REG_MKLDNN_PRIM_FOR(__prim, __type)

View File

@ -7,7 +7,7 @@
#include <cpu/x64/cpu_isa_traits.hpp>
#include <math.h>
#include <mkldnn.hpp>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include <selective_build.h>
#include <mkldnn_types.h>
#include <ngraph/opsets/opset8.hpp>
@ -16,12 +16,15 @@
#include <utils/general_utils.h>
#include <vector>
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace mkldnn;
using namespace mkldnn::impl::cpu::x64;
bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool AdaptivePooling::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::get_type_info_static())) {
auto adaPool = std::dynamic_pointer_cast<const ngraph::opset8::AdaptiveAvgPool>(op);
@ -45,8 +48,8 @@ bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr<const
return true;
}
MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
AdaptivePooling::AdaptivePooling(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
errorPrefix = "Adaptive Pooling layer with name '" + getName() + "' ";
@ -62,13 +65,13 @@ MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngrap
spatialDimsValue.resize(spatialDimsCount);
}
void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
void AdaptivePooling::getSupportedDescriptors() {
if (!descs.empty())
return;
if (getParentEdges().size() != 2)
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
if (getChildEdges().size() < (algorithm == AdaptivePoolingMax ? 2 : 1))
if (getChildEdges().size() < (algorithm == Algorithm::AdaptivePoolingMax ? 2 : 1))
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size();
auto srcRank = getInputShapeAtPort(0).getRank();
@ -85,16 +88,16 @@ void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
}
}
bool MKLDNNAdaptivePoolingNode::needShapeInfer() const {
bool AdaptivePooling::needShapeInfer() const {
const auto newSpatialDimsPtr = reinterpret_cast<int32_t *>(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr());
for (size_t i = 0; i < spatialDimsCount; i++) {
if (spatialDimsValue[i] != newSpatialDimsPtr[i])
return true;
}
return MKLDNNNode::needShapeInfer();
return Node::needShapeInfer();
}
std::vector<VectorDims> MKLDNNAdaptivePoolingNode::shapeInfer() const {
std::vector<VectorDims> AdaptivePooling::shapeInfer() const {
const auto inputDims = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims();
const auto spatialDims = getParentEdgesAtPort(1)[0]->getMemory().GetShape().getStaticDims();
const auto inputRank = inputDims.size();
@ -113,7 +116,7 @@ std::vector<VectorDims> MKLDNNAdaptivePoolingNode::shapeInfer() const {
return result;
}
void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
void AdaptivePooling::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -145,11 +148,11 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNAdaptivePoolingNode::executeDynamicImpl(mkldnn::stream strm) {
void AdaptivePooling::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) {
void AdaptivePooling::execute(mkldnn::stream strm) {
auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType();
auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType();
if (!(inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32))
@ -283,13 +286,15 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) {
}});
}
bool MKLDNNAdaptivePoolingNode::created() const {
return getType() == AdaptivePooling;
bool AdaptivePooling::created() const {
return getType() == Type::AdaptivePooling;
}
inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) {
inline void AdaptivePooling::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) {
*(startPtr) = idx * inputLength / outputLength;
*(endPtr) = ceil(static_cast<float>((idx + 1) * inputLength) / outputLength);
}
REG_MKLDNN_PRIM_FOR(MKLDNNAdaptivePoolingNode, AdaptivePooling)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -8,14 +8,15 @@
#include <string>
#include <memory>
#include <vector>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNAdaptivePoolingNode : public MKLDNNNode {
class AdaptivePooling : public Node {
public:
MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
AdaptivePooling(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
@ -39,5 +40,6 @@ protected:
void executeDynamicImpl(mkldnn::stream strm) override;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -11,10 +11,13 @@
#include <nodes/common/blocked_desc_creator.h>
#include <ngraph/opsets/opset2.hpp>
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool BatchToSpace::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto batchToSpace = std::dynamic_pointer_cast<const ngraph::opset2::BatchToSpace>(op);
if (!batchToSpace) {
@ -33,8 +36,8 @@ bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr<const ng
return true;
}
MKLDNNBatchToSpaceNode::MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
BatchToSpace::BatchToSpace(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -56,7 +59,7 @@ MKLDNNBatchToSpaceNode::MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Nod
cropsBeginIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<size_t>();
}
void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() {
void BatchToSpace::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -96,8 +99,8 @@ void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() {
}
}
std::vector<VectorDims> MKLDNNBatchToSpaceNode::shapeInfer() const {
return MKLDNNNode::shapeInferGeneric(PortMask(1, 2, 3));
std::vector<VectorDims> BatchToSpace::shapeInfer() const {
return Node::shapeInferGeneric(PortMask(1, 2, 3));
}
static std::vector<size_t> getShape5D(const SizeVector &shape) {
@ -111,7 +114,7 @@ static std::vector<size_t> getShape5D(const SizeVector &shape) {
}
template<typename T>
void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
void BatchToSpace::batchToSpaceKernel() {
const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
@ -229,11 +232,11 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
});
}
void MKLDNNBatchToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
void BatchToSpace::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
void BatchToSpace::execute(mkldnn::stream strm) {
switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) {
case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>(); break;
case 2: batchToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(); break;
@ -244,8 +247,10 @@ void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
}
}
bool MKLDNNBatchToSpaceNode::created() const {
return getType() == BatchToSpace;
bool BatchToSpace::created() const {
return getType() == Type::BatchToSpace;
}
REG_MKLDNN_PRIM_FOR(MKLDNNBatchToSpaceNode, BatchToSpace)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -12,10 +12,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNBatchToSpaceNode : public MKLDNNNode {
class BatchToSpace : public Node {
public:
MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
BatchToSpace(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -39,5 +40,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -11,7 +11,7 @@
#include <string>
#include <vector>
#include <mkldnn_types.h>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include "ie_parallel.hpp"
#include "cpu/x64/jit_generator.hpp"
#include "cpu/x64/injectors/jit_uni_eltwise_injector.hpp"
@ -31,7 +31,6 @@
# endif
#endif
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace mkldnn;
using namespace mkldnn::impl;
@ -40,6 +39,10 @@ using namespace mkldnn::impl::cpu::x64;
using namespace mkldnn::impl::utils;
using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {
#define GET_OFF(field) offsetof(jit_bin_conv_call_args, field)
template <cpu_isa_t isa>
@ -103,13 +106,13 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_
solve_common(1, jcp_.oc_block);
sub(reg_oc_work, jcp_.oc_block);
add(reg_kernel_base, jcp_.oc_block * jcp_.nb_ic * jcp_.kh * jcp_.kw * ov::intel_cpu::div_up(jcp_.ic_block, nbits) * jcp_.typesize_in);
add(reg_kernel_base, jcp_.oc_block * jcp_.nb_ic * jcp_.kh * jcp_.kw * div_up(jcp_.ic_block, nbits) * jcp_.typesize_in);
if (jcp_.with_dw_conv) {
add(reg_output_base, jcp_.oc_block * jcp_dw_conv_.kh * jcp_.ow * jcp_.typesize_out);
} else {
if (jcp_.with_binarization)
add(reg_output_base, ov::intel_cpu::div_up(jcp_.oc_block, nbits) * jcp_.typesize_out);
add(reg_output_base, div_up(jcp_.oc_block, nbits) * jcp_.typesize_out);
else
add(reg_output_base, jcp_.oc_block * jcp_.typesize_out);
}
@ -315,16 +318,16 @@ private:
int nbits = 8;
for (int ki = 0; ki < kw; ki++) {
int jj_start = nstl::max(0, ov::intel_cpu::div_up(pad_l - ki * dilate_w, stride_w));
int jj_end = ur_w - nstl::max(0, ov::intel_cpu::div_up(ki*dilate_w+pad_r-(kw-1)*dilate_w, stride_w));
int jj_start = nstl::max(0, div_up(pad_l - ki * dilate_w, stride_w));
int jj_end = ur_w - nstl::max(0, div_up(ki*dilate_w+pad_r-(kw-1)*dilate_w, stride_w));
int _start = (!jcp_.exclude_pad) ? 0 : jj_start;
int _end = (!jcp_.exclude_pad) ? ur_w : jj_end;
for (int ifm2 = 0; ifm2 < ic_blocks; ifm2++) {
for (int jj = _start; jj < _end; jj++) {
int inp_off = ((ki*dilate_w + jj*stride_w - pad_l)*ov::intel_cpu::div_up(jcp_.ic, nbits) +
ifm2 * ov::intel_cpu::div_up(ic_blk, nbits)) * jcp_.typesize_in;
int inp_off = ((ki*dilate_w + jj*stride_w - pad_l)*div_up(jcp_.ic, nbits) +
ifm2 * div_up(ic_blk, nbits)) * jcp_.typesize_in;
if (h_padded || jj < jj_start || jj >= jj_end) {
uni_vmovups(vmm_src, ptr[reg_table + 8 * vlen]);
@ -334,10 +337,10 @@ private:
for (int r = 0; r < repeats; r++) {
for (int ii = 0; ii < oc_blocks; ii++) {
int ker_off = (ifm2 * kh * kw * ov::intel_cpu::div_up(ic_blk, nbits) * oc_blk
+ ii * jcp_.nb_ic * ov::intel_cpu::div_up(ic_blk, nbits) * kh * kw * oc_blk
+ ki * ov::intel_cpu::div_up(ic_blk, nbits) * oc_blk
+ r * ov::intel_cpu::div_up(ic_blk, nbits) * (oc_blk / 2)) * jcp_.typesize_in;
int ker_off = (ifm2 * kh * kw * div_up(ic_blk, nbits) * oc_blk
+ ii * jcp_.nb_ic * div_up(ic_blk, nbits) * kh * kw * oc_blk
+ ki * div_up(ic_blk, nbits) * oc_blk
+ r * div_up(ic_blk, nbits) * (oc_blk / 2)) * jcp_.typesize_in;
uni_vmovups(vmm_tmp, ptr[aux1_reg_kernel + ker_off]);
@ -393,7 +396,7 @@ private:
int kw = jcp_.kw;
int nbits = 8;
int inp_mult = ov::intel_cpu::div_up(jcp_.ic_block, nbits);
int inp_mult = div_up(jcp_.ic_block, nbits);
int out_mult = jcp_.oc_block;
Label icb_main_loop;
@ -427,7 +430,7 @@ private:
int dilate_h = jcp_.dilate_h + 1;
int nbits = 8;
const int inp_mult = dilate_h * ov::intel_cpu::div_up(jcp_.ic, nbits);
const int inp_mult = dilate_h * div_up(jcp_.ic, nbits);
Label t_overflow_label, no_t_overflow_label,
b_overflow_label, no_b_overflow_label;
@ -447,7 +450,7 @@ private:
L(t_overflow_label); {
oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, true);
add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * ov::intel_cpu::div_up(jcp_.ic_block, nbits));
add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * div_up(jcp_.ic_block, nbits));
dec(reg_overflow);
cmp(reg_overflow, 0);
jg(t_overflow_label, T_NEAR);
@ -468,7 +471,7 @@ private:
{
oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, false);
add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * ov::intel_cpu::div_up(jcp_.ic_block, nbits));
add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * div_up(jcp_.ic_block, nbits));
add(aux_reg_input, jcp_.typesize_in * iw * inp_mult);
dec(reg_kh);
@ -485,7 +488,7 @@ private:
L(b_overflow_label); {
oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, true);
add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * ov::intel_cpu::div_up(jcp_.ic_block, nbits));
add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * div_up(jcp_.ic_block, nbits));
dec(reg_overflow);
cmp(reg_overflow, 0);
jg(b_overflow_label, T_NEAR);
@ -528,8 +531,8 @@ private:
kw_padding[jj] = 0;
for (int ki = 0; ki < jcp_.kw; ki++) {
int jj_start = nstl::max(0, ov::intel_cpu::div_up(pad_l - ki * (jcp_.dilate_w + 1), jcp_.stride_w));
int jj_end = ur_w - nstl::max(0, ov::intel_cpu::div_up(ki * (jcp_.dilate_w + 1) + pad_r -
int jj_start = nstl::max(0, div_up(pad_l - ki * (jcp_.dilate_w + 1), jcp_.stride_w));
int jj_end = ur_w - nstl::max(0, div_up(ki * (jcp_.dilate_w + 1) + pad_r -
(jcp_.kw - 1) * (jcp_.dilate_w + 1), jcp_.stride_w));
for (int jj = jj_start; jj < jj_end; jj++) {
kw_padding[jj]++;
@ -677,10 +680,10 @@ private:
if (r == repeats - 1) {
if (isa == x64::avx512_common && oc_step > nbits) {
const size_t o_off = (2 * ii + jj * ov::intel_cpu::div_up(jcp_.oc, nbits));
const size_t o_off = (2 * ii + jj * div_up(jcp_.oc, nbits));
mov(ptr[reg_output + o_off * jcp_.typesize_out], reg_tmp_16);
} else {
const size_t o_off = (ii + jj * ov::intel_cpu::div_up(jcp_.oc, nbits));
const size_t o_off = (ii + jj * div_up(jcp_.oc, nbits));
mov(ptr[reg_output + o_off * jcp_.typesize_out], reg_tmp_8);
}
}
@ -754,8 +757,8 @@ private:
int str_w = jcp_.stride_w;
int nbits = 8;
const int inp_mult = ov::intel_cpu::div_up(jcp_.ic, nbits);
const int out_mult = jcp_.with_dw_conv ? jcp_.oc_block : jcp_.with_binarization ? ov::intel_cpu::div_up(jcp_.oc, nbits) : jcp_.oc;
const int inp_mult = div_up(jcp_.ic, nbits);
const int out_mult = jcp_.with_dw_conv ? jcp_.oc_block : jcp_.with_binarization ? div_up(jcp_.oc, nbits) : jcp_.oc;
int l_pad = jcp_.l_pad;
int r_pad = nstl::max(0, (jcp_.ow - 1) * str_w + (kw - 1) * dilate_w
@ -872,7 +875,7 @@ private:
}
};
bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool BinaryConvolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
@ -894,9 +897,9 @@ bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr<con
return true;
}
MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
BinaryConvolution::BinaryConvolution(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: Node(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
errorPrefix = "BinaryConvolution node with name '" + getName() + "' ";
@ -926,15 +929,15 @@ MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const std::shared_ptr<n
}
}
void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
void BinaryConvolution::getSupportedDescriptors() {
if (!descs.empty())
return;
withBinarization = isFusedWith(FakeQuantize);
withBinarization = isFusedWith(Type::FakeQuantize);
withSum = false;
int expectedInputEdgesNum = 2;
for (int i = 0; i < fusedWith.size(); i++) {
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
auto *eltwiseNode = dynamic_cast<Eltwise *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
withSum = true;
expectedInputEdgesNum++;
@ -960,7 +963,7 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
}
}
void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
void BinaryConvolution::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -1015,7 +1018,7 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNBinaryConvolutionNode::createPrimitive() {
void BinaryConvolution::createPrimitive() {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
@ -1079,7 +1082,7 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() {
auto srcPrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision();
auto dstPrecision = getChildEdgeAt(0)->getMemory().getDesc().getPrecision();
jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision);
jcp.dst_dt = DnnlExtensionUtils::IEPrecisionToDataType(dstPrecision);
jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size();
jcp.typesize_out = dstPrecision == Precision::BIN ? 1 : dstPrecision.size();
@ -1102,16 +1105,16 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() {
bin_conv_kernel->create_ker();
}
bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
bool BinaryConvolution::canFuse(const NodePtr& node) const {
if (implType == impl_desc_type::ref)
return false;
// Binarization have to be last operation in fusing chain
if (isFusedWith(FakeQuantize))
if (isFusedWith(Type::FakeQuantize))
return false;
if (node->getType() == FakeQuantize) {
bool ret = node->getAlgorithm() == FQBinarization;
if (node->getType() == Type::FakeQuantize) {
bool ret = node->getAlgorithm() == Algorithm::FQBinarization;
for (size_t i = 1; i < node->getParentEdges().size(); i++) {
ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1;
}
@ -1121,12 +1124,12 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
}
}
void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
void BinaryConvolution::setPostOps(mkldnn::primitive_attr &attr) {
mkldnn::post_ops ops;
postOpsDataPtrs.clear();
for (auto &node : fusedWith) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
auto* eltwiseNode = dynamic_cast<Eltwise *>(node.get());
if (eltwiseNode) {
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
ops.append_sum(1.0);
@ -1137,7 +1140,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
continue;
}
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
auto* fakeQuantizeNode = dynamic_cast<FakeQuantize *>(node.get());
if (fakeQuantizeNode) {
fakeQuantizeNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), postOpsDataPtrs);
continue;
@ -1149,13 +1152,13 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
attr.set_post_ops(ops);
}
void MKLDNNBinaryConvolutionNode::executeOptimized(const uint8_t* src, const uint8_t* weights, uint8_t* dst,
void BinaryConvolution::executeOptimized(const uint8_t* src, const uint8_t* weights, uint8_t* dst,
const std::vector<size_t>& s_str, const std::vector<size_t>& w_str, const std::vector<size_t>& d_str) {
auto dst_f32 = reinterpret_cast<float *>(dst);
const int MB = jcp.mb;
int ocb_work = ov::intel_cpu::div_up(jcp.nb_oc, jcp.nb_oc_blocking);
int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
int nbits = 8;
parallel_for4d(MB, jcp.ngroups, ocb_work, jcp.oh, [&](int n, int g, int ocbb, int oh) {
@ -1165,8 +1168,8 @@ void MKLDNNBinaryConvolutionNode::executeOptimized(const uint8_t* src, const uin
auto par_conv = jit_bin_conv_call_args();
const int ij = oh * jcp.stride_h;
const int i_t_overflow = nstl::min(jcp.kh, ov::intel_cpu::div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1)));
const int i_b_overflow = nstl::min(jcp.kh, ov::intel_cpu::div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) -
const int i_t_overflow = nstl::min(jcp.kh, div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1)));
const int i_b_overflow = nstl::min(jcp.kh, div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) -
jcp.t_pad+1) - jcp.ih, (jcp.dilate_h + 1)));
const size_t _oc = g * jcp.nb_oc + ocb;
@ -1199,7 +1202,7 @@ void MKLDNNBinaryConvolutionNode::executeOptimized(const uint8_t* src, const uin
});
}
void MKLDNNBinaryConvolutionNode::executeReference(const uint8_t* src, const uint8_t* weights, uint8_t* dst,
void BinaryConvolution::executeReference(const uint8_t* src, const uint8_t* weights, uint8_t* dst,
const std::vector<size_t>& s_str, const std::vector<size_t>& w_str, const std::vector<size_t>& d_str) {
auto dst_fp = reinterpret_cast<float *>(dst);
@ -1276,12 +1279,12 @@ void MKLDNNBinaryConvolutionNode::executeReference(const uint8_t* src, const uin
const int i_left_overflow = nstl::max(0, (padL - ow * KSW));
const int i_right_overflow = nstl::max(IW, (ow * KSW + (KW - 1) * (KDW + 1) - padL + 1)) - IW;
const int kw_padding =
KW - ov::intel_cpu::div_up(i_left_overflow, (KDW + 1)) - ov::intel_cpu::div_up(i_right_overflow, (KDW + 1));
KW - div_up(i_left_overflow, (KDW + 1)) - div_up(i_right_overflow, (KDW + 1));
const int i_top_overflow = nstl::max(0, (padT - oh * KSH));
const int i_bottom_overflow = nstl::max(IH, (oh * KSH + (KH - 1) * (KDH + 1) - padT + 1)) - IH;
const int kh_padding =
KH - ov::intel_cpu::div_up(i_top_overflow, (KDH + 1)) - ov::intel_cpu::div_up(i_bottom_overflow, (KDH + 1));
KH - div_up(i_top_overflow, (KDH + 1)) - div_up(i_bottom_overflow, (KDH + 1));
base_value = IC * kh_padding * kw_padding;
} else {
@ -1294,7 +1297,7 @@ void MKLDNNBinaryConvolutionNode::executeReference(const uint8_t* src, const uin
});
}
void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
void BinaryConvolution::execute(mkldnn::stream strm) {
auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
auto &weightsMemory = getParentEdgeAt(1)->getMemoryPtr();
auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
@ -1333,8 +1336,10 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
}
}
bool MKLDNNBinaryConvolutionNode::created() const {
return getType() == BinaryConvolution;
bool BinaryConvolution::created() const {
return getType() == Type::BinaryConvolution;
}
REG_MKLDNN_PRIM_FOR(MKLDNNBinaryConvolutionNode, BinaryConvolution);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -12,6 +12,7 @@
namespace ov {
namespace intel_cpu {
namespace node {
struct jit_bin_conv_params {
int mb;
@ -74,9 +75,9 @@ struct jit_uni_bin_conv_kernel {
const mkldnn_primitive_attr &attr_;
};
class MKLDNNBinaryConvolutionNode : public MKLDNNNode {
class BinaryConvolution : public Node {
public:
MKLDNNBinaryConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
BinaryConvolution(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void createPrimitive() override;
@ -87,7 +88,7 @@ public:
return false;
}
void setPostOps(mkldnn::primitive_attr &attr);
bool canFuse(const MKLDNNNodePtr& node) const override;
bool canFuse(const NodePtr& node) const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
@ -122,5 +123,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -14,10 +14,13 @@
#include <ngraph/opsets/opset1.hpp>
#include "common/cpu_memcpy.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool Broadcast::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
if (!ov::is_type<ov::op::v1::Broadcast>(op)) {
errorMessage = "Only Broadcast operations from opset1 are supported.";
@ -46,8 +49,8 @@ bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<const ov::N
return true;
}
MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
Broadcast::Broadcast(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -81,7 +84,7 @@ MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, co
}
}
void MKLDNNBroadcastNode::getSupportedDescriptors() {
void Broadcast::getSupportedDescriptors() {
if (!isDynamicNode()) {
const auto& srcDims = getInputShapeAtPort(INPUT_DATA_IDX).getDims();
repeats.assign(targetShape.begin(), targetShape.end());
@ -100,18 +103,18 @@ void MKLDNNBroadcastNode::getSupportedDescriptors() {
}
}
void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
void Broadcast::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
supportedPrimitiveDescriptors = getSupportedConfigs(this);
}
bool MKLDNNBroadcastNode::needPrepareParams() const {
bool Broadcast::needPrepareParams() const {
return needPrepareParamsVar;
}
void MKLDNNBroadcastNode::prepareParams() {
void Broadcast::prepareParams() {
if (!constMap[TARGET_SHAPE_IDX]) {
const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory();
const int32_t* targetShapeData = reinterpret_cast<const int32_t *>(targetShapeMem.GetPtr());
@ -149,7 +152,7 @@ void MKLDNNBroadcastNode::prepareParams() {
optimizedCase = prepareOptimizedParams(this, srcBlockedDims, dstBlockedDims);
}
bool MKLDNNBroadcastNode::needShapeInfer() const {
bool Broadcast::needShapeInfer() const {
needPrepareParamsVar = true;
if (inputShapesModified()) {
return true;
@ -181,19 +184,19 @@ bool MKLDNNBroadcastNode::needShapeInfer() const {
return false;
}
std::vector<VectorDims> MKLDNNBroadcastNode::shapeInfer() const {
return MKLDNNNode::shapeInferGeneric(PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX));
std::vector<VectorDims> Broadcast::shapeInfer() const {
return Node::shapeInferGeneric(PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX));
}
bool MKLDNNBroadcastNode::isExecutable() const {
bool Broadcast::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
void MKLDNNBroadcastNode::executeDynamicImpl(mkldnn::stream strm) {
void Broadcast::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
void Broadcast::execute(mkldnn::stream strm) {
if (optimizedCase) {
optimizedExecute(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr());
} else {
@ -201,7 +204,7 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
}
}
void MKLDNNBroadcastNode::plainExecute(mkldnn::stream strm) {
void Broadcast::plainExecute(mkldnn::stream strm) {
VectorDims srcDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getStaticDims();
const auto& dstDims = getChildEdgeAt(0)->getMemory().getStaticDims();
const auto& dataSrcRank = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetShape().getRank();
@ -257,8 +260,10 @@ void MKLDNNBroadcastNode::plainExecute(mkldnn::stream strm) {
});
}
bool MKLDNNBroadcastNode::created() const {
return getType() == Broadcast;
bool Broadcast::created() const {
return getType() == Type::Broadcast;
}
REG_MKLDNN_PRIM_FOR(MKLDNNBroadcastNode, Broadcast)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -10,13 +10,13 @@
#include <string>
#include <vector>
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNBroadcastNode : public MKLDNNNode, public TileBroadcastCommon {
class Broadcast : public Node, public TileBroadcastCommon {
public:
MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
Broadcast(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
@ -52,5 +52,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -10,10 +10,13 @@
#include "ie_parallel.hpp"
#include "bucketize.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool Bucketize::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
if (!bucketsize) {
@ -26,8 +29,8 @@ bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr<const ngrap
return true;
}
MKLDNNBucketizeNode::MKLDNNBucketizeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
Bucketize::Bucketize(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -47,7 +50,7 @@ MKLDNNBucketizeNode::MKLDNNBucketizeNode(const std::shared_ptr<ngraph::Node>& op
with_right = bucketsize->get_with_right_bound();
}
void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() {
void Bucketize::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -73,7 +76,7 @@ void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNBucketizeNode::execute(mkldnn::stream strm) {
void Bucketize::execute(mkldnn::stream strm) {
auto precision_mask = getPrecisionMask(input_precision, boundaries_precision, output_precision);
switch (precision_mask) {
@ -172,7 +175,7 @@ void MKLDNNBucketizeNode::execute(mkldnn::stream strm) {
}
}
void MKLDNNBucketizeNode::prepareParams() {
void Bucketize::prepareParams() {
auto& inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr();
auto& inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr();
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
@ -203,16 +206,16 @@ void MKLDNNBucketizeNode::prepareParams() {
std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
}
bool MKLDNNBucketizeNode::isExecutable() const {
bool Bucketize::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
std::vector<VectorDims> MKLDNNBucketizeNode::shapeInfer() const {
std::vector<VectorDims> Bucketize::shapeInfer() const {
return {getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
}
template <typename T, typename T_BOUNDARIES, typename T_IND>
void MKLDNNBucketizeNode::bucketize() {
void Bucketize::bucketize() {
const auto *input_data = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
const auto *boundaries_data = reinterpret_cast<const T_BOUNDARIES *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
auto *output_data = reinterpret_cast<T_IND *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
@ -235,8 +238,10 @@ void MKLDNNBucketizeNode::bucketize() {
});
}
bool MKLDNNBucketizeNode::created() const {
return getType() == Bucketize;
bool Bucketize::created() const {
return getType() == Type::Bucketize;
}
REG_MKLDNN_PRIM_FOR(MKLDNNBucketizeNode, Bucketize)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNBucketizeNode : public MKLDNNNode {
class Bucketize : public Node {
public:
MKLDNNBucketizeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
Bucketize(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -47,5 +48,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -19,6 +19,7 @@ using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {
namespace {
std::tuple<Algorithm, std::string> getAlgorithmFor(const std::shared_ptr<const ngraph::Node>& op) {
@ -33,11 +34,11 @@ std::tuple<Algorithm, std::string> getAlgorithmFor(const std::shared_ptr<const n
return std::make_tuple(Algorithm::Default, std::string("Type ") + op->get_type_name() + " is not supported.");
}
class Converter : public MKLDNNColorConvertNode::Converter {
using Base = MKLDNNColorConvertNode::Converter;
class Converter : public ColorConvert::Converter {
using Base = ColorConvert::Converter;
public:
Converter(MKLDNNNode *node);
Converter(Node *node);
Shapes shapeInfer() const override;
bool singlePlane() const;
@ -46,14 +47,14 @@ public:
std::tuple<T, T, T> yuv_to_rgb(float y, float u, float v);
};
Converter::Converter(MKLDNNNode *node)
Converter::Converter(Node *node)
: Base(node, node->getAlgorithm() == Algorithm::ColorConvertNV12toRGB
|| node->getAlgorithm() == Algorithm::ColorConvertI420toRGB
? ColorFormat { { 0, 1, 2 } }
: ColorFormat { { 2, 1, 0 } }) {
}
MKLDNNColorConvertNode::Converter::Shapes
ColorConvert::Converter::Shapes
Converter::shapeInfer() const {
const auto & dims = inputDims(0);
if (dims.size() != 4)
@ -275,14 +276,14 @@ void jit_uni_converter::store_tail(const variable<T*> & dst,
namespace nv12 {
MKLDNNColorConvertNode::Converter::PrimitiveDescs supportedPrimitiveDescs(MKLDNNNode *node) {
ColorConvert::Converter::PrimitiveDescs supportedPrimitiveDescs(Node *node) {
const LayoutType layout = LayoutType::ncsp; // 0,1,2,3
const Precision precision = node->getOriginalInputPrecisionAtPort(0) == Precision::U8
? Precision::U8
: Precision::FP32;
MKLDNNColorConvertNode::Converter::PrimitiveDescs descs;
ColorConvert::Converter::PrimitiveDescs descs;
descs.emplace_back(std::vector<PortConfigurator> { node->getOriginalInputsNumber(), { layout, precision } },
std::vector<PortConfigurator> { { layout, precision } },
@ -301,7 +302,7 @@ class TwoPlaneConvert;
class RefConverter : public Converter {
public:
RefConverter(MKLDNNNode *node);
RefConverter(Node *node);
protected:
template<typename T>
@ -315,7 +316,7 @@ protected:
size_t stride_uv);
};
RefConverter::RefConverter(MKLDNNNode *node)
RefConverter::RefConverter(Node *node)
: Converter(node) {
if (node->getOriginalInputsNumber() != (singlePlane() ? 1: 2))
IE_THROW() <<"NV12Converter node has incorrect number of inputs";
@ -553,7 +554,7 @@ const jit_uni_converter & jit_converter_get() {
template<typename T>
class SinglePlaneConvert<T, impl_desc_type::jit_uni> : public Converter {
public:
SinglePlaneConvert(MKLDNNNode *node)
SinglePlaneConvert(Node *node)
: Converter(node) {
jit_converter_create<T>();
}
@ -588,7 +589,7 @@ public:
template<typename T>
class TwoPlaneConvert<T, impl_desc_type::jit_uni> : public Converter {
public:
TwoPlaneConvert(MKLDNNNode *node)
TwoPlaneConvert(Node *node)
: Converter(node) {
jit_converter_create<T>();
}
@ -624,14 +625,14 @@ public:
namespace i420 {
MKLDNNColorConvertNode::Converter::PrimitiveDescs supportedPrimitiveDescs(MKLDNNNode *node) {
ColorConvert::Converter::PrimitiveDescs supportedPrimitiveDescs(Node *node) {
const LayoutType layout = LayoutType::ncsp; // 0,1,2,3
const Precision precision = node->getOriginalInputPrecisionAtPort(0) == Precision::U8
? Precision::U8
: Precision::FP32;
MKLDNNColorConvertNode::Converter::PrimitiveDescs descs;
ColorConvert::Converter::PrimitiveDescs descs;
descs.emplace_back(std::vector<PortConfigurator> { node->getOriginalInputsNumber(), { layout, precision } },
std::vector<PortConfigurator> { { layout, precision } },
@ -650,7 +651,7 @@ class ThreePlaneConvert;
class RefConverter : public Converter {
public:
RefConverter(MKLDNNNode *node);
RefConverter(Node *node);
protected:
template<typename T>
@ -665,7 +666,7 @@ protected:
size_t stride_uv);
};
RefConverter::RefConverter(MKLDNNNode *node)
RefConverter::RefConverter(Node *node)
: Converter(node) {
if (node->getOriginalInputsNumber() != (singlePlane() ? 1: 3))
IE_THROW() <<"I420Converter node has incorrect number of inputs";
@ -902,7 +903,7 @@ const jit_uni_converter & jit_converter_get() {
template<typename T>
class SinglePlaneConvert<T, impl_desc_type::jit_uni> : public Converter {
public:
SinglePlaneConvert(MKLDNNNode *node)
SinglePlaneConvert(Node *node)
: Converter(node) {
jit_converter_create<T>();
}
@ -939,7 +940,7 @@ public:
template<typename T>
class ThreePlaneConvert<T, impl_desc_type::jit_uni> : public Converter {
public:
ThreePlaneConvert(MKLDNNNode *node)
ThreePlaneConvert(Node *node)
: Converter(node) {
jit_converter_create<T>();
}
@ -977,50 +978,50 @@ public:
} // namespace
MKLDNNColorConvertNode::Converter::Converter(MKLDNNNode *node, const ColorFormat & colorFormat)
ColorConvert::Converter::Converter(Node *node, const ColorFormat & colorFormat)
: _node(node)
, _colorFormat(colorFormat) {
}
InferenceEngine::Precision MKLDNNColorConvertNode::Converter::inputPrecision(size_t idx) const {
InferenceEngine::Precision ColorConvert::Converter::inputPrecision(size_t idx) const {
return _node->getParentEdgesAtPort(idx)[0]->getMemory().getDesc().getPrecision();
}
InferenceEngine::Precision MKLDNNColorConvertNode::Converter::outputPrecision(size_t idx) const {
InferenceEngine::Precision ColorConvert::Converter::outputPrecision(size_t idx) const {
return _node->getChildEdgesAtPort(idx)[0]->getMemory().getDesc().getPrecision();
}
const void * MKLDNNColorConvertNode::Converter::input(size_t idx) const {
const void * ColorConvert::Converter::input(size_t idx) const {
return _node->getParentEdgeAt(idx)->getMemoryPtr()->GetPtr();
}
void * MKLDNNColorConvertNode::Converter::output(size_t idx) const {
void * ColorConvert::Converter::output(size_t idx) const {
return _node->getChildEdgeAt(idx)->getMemoryPtr()->GetPtr();
}
const VectorDims & MKLDNNColorConvertNode::Converter::inputDims(size_t idx) const {
const VectorDims & ColorConvert::Converter::inputDims(size_t idx) const {
return _node->getParentEdgesAtPort(idx)[0]->getMemory().getStaticDims();
}
bool MKLDNNColorConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool ColorConvert::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
Algorithm alg;
std::tie(alg, errorMessage) = getAlgorithmFor(op);
return alg != Algorithm::Default;
}
MKLDNNColorConvertNode::MKLDNNColorConvertNode(const std::shared_ptr<ngraph::Node>& op,
ColorConvert::ColorConvert(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
WeightsSharing::Ptr &cache)
: Node(op, eng, cache) {
std::string errorMessage;
std::tie(algorithm, errorMessage) = getAlgorithmFor(op);
if (algorithm == Algorithm::Default)
IE_THROW(NotImplemented) << errorMessage;
}
void MKLDNNColorConvertNode::getSupportedDescriptors() {}
void ColorConvert::getSupportedDescriptors() {}
void MKLDNNColorConvertNode::initSupportedPrimitiveDescriptors() {
void ColorConvert::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -1054,9 +1055,9 @@ void MKLDNNColorConvertNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNColorConvertNode::initSupportedNV12Impls() {
void ColorConvert::initSupportedNV12Impls() {
#define SUPPORTED_IMPL(Impl, type, desc_type) \
[](MKLDNNNode *node) { \
[](Node *node) { \
return new nv12::Impl<type, impl_desc_type::desc_type>(node); \
};
@ -1081,9 +1082,9 @@ void MKLDNNColorConvertNode::initSupportedNV12Impls() {
#undef SUPPORTED_IMPL
}
void MKLDNNColorConvertNode::initSupportedI420Impls() {
void ColorConvert::initSupportedI420Impls() {
#define SUPPORTED_IMPL(Impl, type, desc_type) \
[](MKLDNNNode *node) { \
[](Node *node) { \
return new i420::Impl<type, impl_desc_type::desc_type>(node); \
};
@ -1108,7 +1109,7 @@ void MKLDNNColorConvertNode::initSupportedI420Impls() {
#undef SUPPORTED_IMPL
}
void MKLDNNColorConvertNode::createPrimitive() {
void ColorConvert::createPrimitive() {
const NodeDesc *desc = getSelectedPrimitiveDescriptor();
if (!desc)
IE_THROW() << getTypeStr() + " node with name '" + getName() + "' "
@ -1127,33 +1128,32 @@ void MKLDNNColorConvertNode::createPrimitive() {
}
}
void MKLDNNColorConvertNode::execute(mkldnn::stream strm) {
void ColorConvert::execute(mkldnn::stream strm) {
if (!_impl)
IE_THROW() << getTypeStr() + " node with name '" + getName() + "' "
<< "has no any implemented converter";
_impl->execute(strm);
}
bool MKLDNNColorConvertNode::created() const {
return getType() == ColorConvert;
bool ColorConvert::created() const {
return getType() == Type::ColorConvert;
}
std::vector<VectorDims> MKLDNNColorConvertNode::shapeInfer() const {
std::vector<VectorDims> ColorConvert::shapeInfer() const {
if (!_impl)
IE_THROW() << getTypeStr() + " node with name '" + getName() + "' "
<< "has no any implemented converter";
return _impl->shapeInfer();
}
bool MKLDNNColorConvertNode::needPrepareParams() const {
bool ColorConvert::needPrepareParams() const {
return false;
}
void MKLDNNColorConvertNode::executeDynamicImpl(mkldnn::stream strm) {
void ColorConvert::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
REG_MKLDNN_PRIM_FOR(MKLDNNColorConvertNode, ColorConvert);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -12,12 +12,13 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNColorConvertNode : public MKLDNNNode {
class ColorConvert : public Node {
public:
MKLDNNColorConvertNode(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache);
ColorConvert(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng,
WeightsSharing::Ptr &cache);
class Converter;
public:
@ -37,7 +38,7 @@ private:
void initSupportedI420Impls();
private:
using ConverterBuilder = std::function<Converter*(MKLDNNNode *)>;
using ConverterBuilder = std::function<Converter*(Node *)>;
using SupportedImpls = multidim_map<impl_desc_type, // Implementation type
Algorithm, // Algorithm: ColorConvertXXX
InferenceEngine::Precision::ePrecision, // Precision: FP32/U8
@ -48,7 +49,7 @@ private:
SupportedImpls _supportedImpls;
};
class MKLDNNColorConvertNode::Converter {
class ColorConvert::Converter {
public:
using PrimitiveDescs = std::vector<std::tuple<std::vector<PortConfigurator>, // Input port configurator
std::vector<PortConfigurator>, // Output port configurator
@ -63,7 +64,7 @@ public:
using ColorFormat = std::array<uint8_t, 3>;
Converter(MKLDNNNode *node, const ColorFormat & colorFormat);
Converter(Node *node, const ColorFormat & colorFormat);
virtual ~Converter() = default;
InferenceEngine::Precision inputPrecision(size_t idx) const;
InferenceEngine::Precision outputPrecision(size_t idx) const;
@ -74,9 +75,10 @@ public:
virtual void execute(mkldnn::stream strm) = 0;
protected:
MKLDNNNode *_node;
Node *_node;
ColorFormat _colorFormat; // RGB: {0,1,2}, BGR: {2,1,0}
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -6,9 +6,11 @@
#include <numeric>
using namespace InferenceEngine;
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
namespace {
constexpr size_t channelsPos = 1lu;
class PlainFormatCreator : public BlockedDescCreator {
@ -67,6 +69,7 @@ public:
private:
size_t _blockSize;
};
} // namespace
const BlockedDescCreator::CreatorsMap& BlockedDescCreator::getCommonCreators() {
@ -119,3 +122,6 @@ BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, BlockedDescCreator
auto last = first.end();
return std::make_pair(first, last);
}
} // namespace intel_cpu
} // namespace ov

View File

@ -17,12 +17,13 @@
#include <cmath>
#include "mkldnn/ie_mkldnn.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace mkldnn::impl::utils;
using namespace mkldnn::impl::cpu::x64;
using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace {
template <typename src_t, typename dst_t>
@ -470,52 +471,52 @@ bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
} // namespace
#define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
#define INTEL_CPU_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
#define MKLDNN_CVT_LIST \
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \
MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \
MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \
MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \
MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \
MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \
MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \
MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \
MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \
MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \
MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \
MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \
MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \
MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \
MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \
MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \
MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \
MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \
MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \
MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \
MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \
MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \
MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \
MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \
MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
MKLDNN_CVT(BOOL, BOOL)
#define INTEL_CPU_CVT_LIST \
INTEL_CPU_CVT(U8, I8), INTEL_CPU_CVT(U8, U16), INTEL_CPU_CVT(U8, I16), INTEL_CPU_CVT(U8, U32), \
INTEL_CPU_CVT(U8, I32), INTEL_CPU_CVT(U8, U64), INTEL_CPU_CVT(U8, I64), INTEL_CPU_CVT(U8, FP32), \
INTEL_CPU_CVT(U8, FP16), INTEL_CPU_CVT(U8, BF16), INTEL_CPU_CVT(U8, FP64), INTEL_CPU_CVT(U8, BOOL), \
INTEL_CPU_CVT(I8, U8), INTEL_CPU_CVT(I8, U16), INTEL_CPU_CVT(I8, I16), INTEL_CPU_CVT(I8, U32), \
INTEL_CPU_CVT(I8, I32), INTEL_CPU_CVT(I8, U64), INTEL_CPU_CVT(I8, I64), INTEL_CPU_CVT(I8, FP32), \
INTEL_CPU_CVT(I8, FP16), INTEL_CPU_CVT(I8, BF16), INTEL_CPU_CVT(I8, FP64), INTEL_CPU_CVT(I8, BOOL), \
INTEL_CPU_CVT(U16, U8), INTEL_CPU_CVT(U16, I8), INTEL_CPU_CVT(U16, I16), INTEL_CPU_CVT(U16, U32), \
INTEL_CPU_CVT(U16, I32), INTEL_CPU_CVT(U16, U64), INTEL_CPU_CVT(U16, I64), INTEL_CPU_CVT(U16, FP32), \
INTEL_CPU_CVT(U16, FP16), INTEL_CPU_CVT(U16, BF16), INTEL_CPU_CVT(U16, FP64), INTEL_CPU_CVT(U16, BOOL), \
INTEL_CPU_CVT(I16, U8), INTEL_CPU_CVT(I16, I8), INTEL_CPU_CVT(I16, U16), INTEL_CPU_CVT(I16, U32), \
INTEL_CPU_CVT(I16, I32), INTEL_CPU_CVT(I16, U64), INTEL_CPU_CVT(I16, I64), INTEL_CPU_CVT(I16, FP32), \
INTEL_CPU_CVT(I16, FP16), INTEL_CPU_CVT(I16, BF16), INTEL_CPU_CVT(I16, FP64), INTEL_CPU_CVT(I16, BOOL), \
INTEL_CPU_CVT(U32, U8), INTEL_CPU_CVT(U32, I8), INTEL_CPU_CVT(U32, U16), INTEL_CPU_CVT(U32, I16), \
INTEL_CPU_CVT(U32, I32), INTEL_CPU_CVT(U32, U64), INTEL_CPU_CVT(U32, I64), INTEL_CPU_CVT(U32, FP32), \
INTEL_CPU_CVT(U32, FP16), INTEL_CPU_CVT(U32, BF16), INTEL_CPU_CVT(U32, FP64), INTEL_CPU_CVT(U32, BOOL), \
INTEL_CPU_CVT(I32, U8), INTEL_CPU_CVT(I32, I8), INTEL_CPU_CVT(I32, U16), INTEL_CPU_CVT(I32, I16), \
INTEL_CPU_CVT(I32, U32), INTEL_CPU_CVT(I32, U64), INTEL_CPU_CVT(I32, I64), INTEL_CPU_CVT(I32, FP32), \
INTEL_CPU_CVT(I32, FP16), INTEL_CPU_CVT(I32, BF16), INTEL_CPU_CVT(I32, FP64), INTEL_CPU_CVT(I32, BOOL), \
INTEL_CPU_CVT(U64, U8), INTEL_CPU_CVT(U64, I8), INTEL_CPU_CVT(U64, U16), INTEL_CPU_CVT(U64, I16), \
INTEL_CPU_CVT(U64, U32), INTEL_CPU_CVT(U64, I32), INTEL_CPU_CVT(U64, I64), INTEL_CPU_CVT(U64, FP32), \
INTEL_CPU_CVT(U64, FP16), INTEL_CPU_CVT(U64, BF16), INTEL_CPU_CVT(U64, FP64), INTEL_CPU_CVT(U64, BOOL), \
INTEL_CPU_CVT(I64, U8), INTEL_CPU_CVT(I64, I8), INTEL_CPU_CVT(I64, U16), INTEL_CPU_CVT(I64, I16), \
INTEL_CPU_CVT(I64, U32), INTEL_CPU_CVT(I64, I32), INTEL_CPU_CVT(I64, U64), INTEL_CPU_CVT(I64, FP32), \
INTEL_CPU_CVT(I64, FP16), INTEL_CPU_CVT(I64, BF16), INTEL_CPU_CVT(I64, FP64), INTEL_CPU_CVT(I64, BOOL), \
INTEL_CPU_CVT(FP32, U8), INTEL_CPU_CVT(FP32, I8), INTEL_CPU_CVT(FP32, U16), INTEL_CPU_CVT(FP32, I16), \
INTEL_CPU_CVT(FP32, U32), INTEL_CPU_CVT(FP32, I32), INTEL_CPU_CVT(FP32, U64), INTEL_CPU_CVT(FP32, I64), \
INTEL_CPU_CVT(FP32, FP16), INTEL_CPU_CVT(FP32, BF16), INTEL_CPU_CVT(FP32, FP64), INTEL_CPU_CVT(FP32, BOOL), \
INTEL_CPU_CVT(FP16, U8), INTEL_CPU_CVT(FP16, I8), INTEL_CPU_CVT(FP16, U16), INTEL_CPU_CVT(FP16, I16), \
INTEL_CPU_CVT(FP16, U32), INTEL_CPU_CVT(FP16, I32), INTEL_CPU_CVT(FP16, U64), INTEL_CPU_CVT(FP16, I64), \
INTEL_CPU_CVT(FP16, FP32), INTEL_CPU_CVT(FP16, BF16), INTEL_CPU_CVT(FP16, FP64), INTEL_CPU_CVT(FP16, BOOL), \
INTEL_CPU_CVT(BF16, U8), INTEL_CPU_CVT(BF16, I8), INTEL_CPU_CVT(BF16, U16), INTEL_CPU_CVT(BF16, I16), \
INTEL_CPU_CVT(BF16, U32), INTEL_CPU_CVT(BF16, I32), INTEL_CPU_CVT(BF16, U64), INTEL_CPU_CVT(BF16, I64), \
INTEL_CPU_CVT(BF16, FP32), INTEL_CPU_CVT(BF16, FP16), INTEL_CPU_CVT(BF16, FP64), INTEL_CPU_CVT(BF16, BOOL), \
INTEL_CPU_CVT(FP64, U8), INTEL_CPU_CVT(FP64, I8), INTEL_CPU_CVT(FP64, U16), INTEL_CPU_CVT(FP64, I16), \
INTEL_CPU_CVT(FP64, U32), INTEL_CPU_CVT(FP64, I32), INTEL_CPU_CVT(FP64, U64), INTEL_CPU_CVT(FP64, I64), \
INTEL_CPU_CVT(FP64, FP32), INTEL_CPU_CVT(FP64, FP16), INTEL_CPU_CVT(FP64, BF16), INTEL_CPU_CVT(FP64, BOOL), \
INTEL_CPU_CVT(BOOL, U8), INTEL_CPU_CVT(BOOL, I8), INTEL_CPU_CVT(BOOL, U16), INTEL_CPU_CVT(BOOL, I16), \
INTEL_CPU_CVT(BOOL, U32), INTEL_CPU_CVT(BOOL, I32), INTEL_CPU_CVT(BOOL, U64), INTEL_CPU_CVT(BOOL, I64), \
INTEL_CPU_CVT(BOOL, FP32), INTEL_CPU_CVT(BOOL, FP16), INTEL_CPU_CVT(BOOL, BF16), INTEL_CPU_CVT(BOOL, FP64), \
INTEL_CPU_CVT(U8, U8), INTEL_CPU_CVT(I8, I8), INTEL_CPU_CVT(U16, U16), INTEL_CPU_CVT(I16, I16), \
INTEL_CPU_CVT(U32, U32), INTEL_CPU_CVT(I32, I32), INTEL_CPU_CVT(U64, U64), INTEL_CPU_CVT(I64, I64), \
INTEL_CPU_CVT(FP32, FP32), INTEL_CPU_CVT(FP16, FP16), INTEL_CPU_CVT(BF16, BF16), INTEL_CPU_CVT(FP64, FP64), \
INTEL_CPU_CVT(BOOL, BOOL)
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
@ -553,11 +554,14 @@ void cpu_convert(const void *srcPtr,
dstPrc,
false
};
OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_LIST);
if (!ctx.converted)
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
}
}
#undef MKLDNN_CVT
#undef MKLDNN_CVT_LIST
#undef INTEL_CPU_CVT
#undef INTEL_CPU_CVT_LIST
} // namespace intel_cpu
} // namespace ov

View File

@ -4,6 +4,9 @@
#include <ie_precision.hpp>
namespace ov {
namespace intel_cpu {
/**
* @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
* If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
@ -48,3 +51,6 @@ void cpu_convert(const void *srcPtr,
InferenceEngine::Precision interimPrc,
InferenceEngine::Precision dstPrc,
const size_t size);
} // namespace intel_cpu
} // namespace ov

View File

@ -7,6 +7,9 @@
#include <cstring>
#include "ie_api.h"
namespace ov {
namespace intel_cpu {
/**
* @brief Copies bytes between buffers with security enhancements
* Copies count bytes from src to dest. If the source and destination
@ -47,3 +50,6 @@ inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t coun
#endif
return 0;
}
} // namespace intel_cpu
} // namespace ov

View File

@ -5,7 +5,9 @@
#include "dnnl_executor.h"
using namespace mkldnn;
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
DnnlExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descSrc,
const mkldnn::memory::desc& descDst,
@ -47,3 +49,6 @@ void DnnlExecutor::exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn
bool DnnlExecutor::needReordering() const {
return !inputReorders.empty() || !outputReorders.empty();
}
} // namespace intel_cpu
} // namespace ov

View File

@ -32,7 +32,7 @@ class DnnlExecutor {
protected:
DnnlExecutor() = default;
MKLDNNPrimitive execPrim;
Primitive execPrim;
// key is the port number for the primitive that needs memory reordering
std::unordered_map<int, IntermReorder> inputReorders;
std::unordered_map<int, IntermReorder> outputReorders;

View File

@ -4,6 +4,9 @@
#pragma once
namespace ov {
namespace intel_cpu {
typedef short ie_fp16;
// Function to convert F32 into F16
@ -80,3 +83,5 @@ inline float f16tof32(ie_fp16 x) {
return asfloat(u);
}
} // namespace intel_cpu
} // namespace ov

View File

@ -7,7 +7,7 @@
#include <vector>
#include <mkldnn_types.h>
#include <ie_parallel.hpp>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include "cpu_memcpy.h"
#include "utils/bfloat16.hpp"
@ -15,7 +15,6 @@
#include <common/primitive_hashing_utils.hpp>
using namespace InferenceEngine;
using namespace ov::intel_cpu;
using namespace mkldnn;
using namespace mkldnn::impl;
using namespace mkldnn::impl::cpu::x64;
@ -24,6 +23,9 @@ using namespace Xbyak;
#define GET_OFF(field) offsetof(jit_args_permute, field)
namespace ov {
namespace intel_cpu {
template <cpu_isa_t isa>
struct jit_uni_permute_kernel_f32 : public jit_uni_permute_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_permute_kernel_f32)
@ -410,3 +412,6 @@ bool PermuteParams::operator==(const PermuteParams& rhs) const {
(dst_block_order == rhs.dst_block_order) && (order == rhs.order) &&
(data_size == rhs.data_size);
}
} // namespace intel_cpu
} // namespace ov

View File

@ -16,7 +16,6 @@
#include <vector>
using namespace InferenceEngine;
using namespace ov::intel_cpu;
using namespace mkldnn;
using namespace mkldnn::impl::cpu;
using namespace mkldnn::impl::cpu::x64;
@ -24,6 +23,9 @@ using namespace mkldnn::impl::utils;
#define GET_OFF(field) offsetof(jit_args_softmax, field)
namespace ov {
namespace intel_cpu {
struct jit_args_softmax {
const void* src;
void* dst;
@ -325,3 +327,6 @@ void SoftmaxGeneric::execute(const uint8_t *src_data, uint8_t *dst_data, int B,
IE_THROW() << "Unsupported input precision: " << input_prec.name();
}
}
} // namespace intel_cpu
} // namespace ov

View File

@ -10,6 +10,9 @@
#include "defs.h"
#include "ie_parallel.hpp"
namespace ov {
namespace intel_cpu {
struct jit_uni_softmax_kernel;
static inline
@ -51,3 +54,5 @@ private:
std::shared_ptr<jit_uni_softmax_kernel> softmax_kernel;
};
} // namespace intel_cpu
} // namespace ov

View File

@ -10,7 +10,9 @@
#include "memory_desc/dnnl_blocked_memory_desc.h"
using namespace InferenceEngine;
using namespace ov::intel_cpu;
namespace ov {
namespace intel_cpu {
VectorDims TileBroadcastCommon::calculateDenseStrides(const VectorDims &dims) {
VectorDims strides(dims.size(), 1);
@ -87,10 +89,10 @@ bool TileBroadcastCommon::canBeExecutedInNSPCLayout(VectorDims srcBlockedDims, V
return optimizedDims.size() <= maxNDims;
}
std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode *node) {
std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const Node *node) {
std::vector<NodeDesc> supportedPrimitiveDescriptors;
auto precision = node->getOriginalInputPrecisionAtPort(0);
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(precision);
const auto& srcDims = node->getInputShapeAtPort(0).getDims();
const auto& inDataShape = node->getInputShapeAtPort(0);
@ -150,8 +152,8 @@ std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode
}
}
auto inFmt = MKLDNNExtensionUtils::GetPlainFormatByRank(inDataShape.getRank());
auto outFmt = MKLDNNExtensionUtils::GetPlainFormatByRank(outDataShapeRank);
auto inFmt = DnnlExtensionUtils::GetPlainFormatByRank(inDataShape.getRank());
auto outFmt = DnnlExtensionUtils::GetPlainFormatByRank(outDataShapeRank);
if (inFmt == mkldnn::memory::format_tag::undef || outFmt == mkldnn::memory::format_tag::undef) {
config.inConfs[0].setMemDesc(std::make_shared<CpuBlockedMemoryDesc>(precision, node->getInputShapeAtPort(0)));
for (int i = 0; i < config.outConfs.size(); i++) {
@ -167,7 +169,7 @@ std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode
return supportedPrimitiveDescriptors;
}
bool TileBroadcastCommon::prepareOptimizedParams(const MKLDNNNode *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims) {
bool TileBroadcastCommon::prepareOptimizedParams(const Node *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims) {
while (srcBlockedDims.size() < dstBlockedDims.size()) {
srcBlockedDims.insert(srcBlockedDims.begin(), 1);
}
@ -244,7 +246,7 @@ void TileBroadcastCommon::broadcastScalar(const char *srcData, char *dstData, si
}
}
void TileBroadcastCommon::optimizedExecute(const MKLDNNMemoryPtr& srcMemory, const MKLDNNMemoryPtr& dstMemory) {
void TileBroadcastCommon::optimizedExecute(const MemoryPtr& srcMemory, const MemoryPtr& dstMemory) {
auto srcData = reinterpret_cast<const char *>(srcMemory->GetPtr());
auto dstData = reinterpret_cast<char *>(dstMemory->GetPtr());
@ -287,3 +289,6 @@ void TileBroadcastCommon::optimizedExecute(const MKLDNNMemoryPtr& srcMemory, con
});
}
}
} // namespace intel_cpu
} // namespace ov

View File

@ -16,10 +16,10 @@ namespace intel_cpu {
class TileBroadcastCommon {
protected:
static VectorDims calculateDenseStrides(const VectorDims &dims);
std::vector<NodeDesc> getSupportedConfigs(const MKLDNNNode *node);
bool prepareOptimizedParams(const MKLDNNNode *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims);
std::vector<NodeDesc> getSupportedConfigs(const Node *node);
bool prepareOptimizedParams(const Node *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims);
void optimizedExecute(const MKLDNNMemoryPtr& srcMemory, const MKLDNNMemoryPtr& dstMemory);
void optimizedExecute(const MemoryPtr& srcMemory, const MemoryPtr& dstMemory);
VectorDims repeats;
bool optimizedCase = false;

View File

@ -7,7 +7,7 @@
#include <map>
#include <utility>
#include <vector>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include "mkldnn.hpp"
#include "mkldnn/iml_type_mapper.h"
@ -24,18 +24,20 @@
#include <memory_desc/cpu_memory_desc_utils.h>
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
namespace node {
namespace {
constexpr size_t channelAxis = 1lu;
}
bool MKLDNNConcatNode::isExecutable() const {
bool Concat::isExecutable() const {
return !hasEmptyOutputTensors() && !isOptimized();
}
bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool Concat::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
if (!concatOp) {
@ -48,8 +50,8 @@ bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::
return true;
}
MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
Concat::Concat(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -67,7 +69,7 @@ MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, cons
this->axis = axis;
}
void MKLDNNConcatNode::getSupportedDescriptors() {
void Concat::getSupportedDescriptors() {
const auto& firstParentDims = getInputShapeAtPort(0).getDims();
for (size_t i = 1; i < getParentEdges().size(); i++) {
const auto& dims = getInputShapeAtPort(i).getDims();
@ -94,7 +96,7 @@ void MKLDNNConcatNode::getSupportedDescriptors() {
}
}
void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
void Concat::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -221,7 +223,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() {
void Concat::selectOptimalPrimitiveDescriptor() {
std::vector<size_t> canSelectPrimitive;
// The double connection marks that some tensor should
@ -337,22 +339,22 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() {
selectPrimitiveDescriptorByIndex(0);
}
bool MKLDNNConcatNode::created() const {
return getType() == Concatenation;
bool Concat::created() const {
return getType() == Type::Concatenation;
}
bool MKLDNNConcatNode::isOptimized() const {
bool Concat::isOptimized() const {
return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].inPlace() >= 0;
}
bool MKLDNNConcatNode::needPrepareParams() const {
bool Concat::needPrepareParams() const {
if (canOptimizeNspc) {
return false;
}
return inputShapesModified();
}
void MKLDNNConcatNode::prepareParams() {
void Concat::prepareParams() {
if (canOptimizeNspc || isOptimized())
return;
@ -395,7 +397,7 @@ void MKLDNNConcatNode::prepareParams() {
prim.reset(new concat(primitive_desc));
}
size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) {
size_t Concat::inverseOrder(const SizeVector& order, size_t axis) {
for (size_t i = 0; i < order.size(); i++) {
if (axis == order[i]) {
return i;
@ -404,13 +406,13 @@ size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) {
return -1;
}
void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
void Concat::initOptimalPrimitiveDescriptor() {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
if (!isOptimized()) {
MKLDNNNode::initOptimalPrimitiveDescriptor();
Node::initOptimalPrimitiveDescriptor();
auto config = selected_pd->getConfig();
if (!isConfigDefined(config)) {
for (size_t i = 0; i < config.inConfs.size(); i++) {
@ -486,12 +488,12 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
canOptimizeNspc = axis == channelAxis && getSelectedPrimitiveDescriptor()->getConfig().outConfs.front().getMemDesc()->hasLayoutType(LayoutType::nspc);
}
void MKLDNNConcatNode::execute(mkldnn::stream strm) {
void Concat::execute(mkldnn::stream strm) {
if (isOptimized()) {
return;
}
const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
const Memory& dst_memory = getChildEdgeAt(0)->getMemory();
if (canOptimizeNspc) {
execNspcSpecCase();
return;
@ -512,15 +514,15 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
(*prim).execute(strm, mem_ags);
}
InferenceEngine::Precision MKLDNNConcatNode::getRuntimePrecision() const {
InferenceEngine::Precision Concat::getRuntimePrecision() const {
return getMaxPrecision(getInputPrecisions());
}
void MKLDNNConcatNode::execNspcSpecCase() {
const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
void Concat::execNspcSpecCase() {
const Memory& dst_memory = getChildEdgeAt(0)->getMemory();
const size_t num_src = getParentEdges().size();
uint8_t* dst_ptr = reinterpret_cast<uint8_t*>(dst_memory.GetData());
const size_t dataSize = MKLDNNExtensionUtils::sizeOfDataType(dst_memory.GetDataType());
const size_t dataSize = DnnlExtensionUtils::sizeOfDataType(dst_memory.GetDataType());
std::vector<size_t> channelsDataSize;
size_t channels_size = 0;
@ -530,7 +532,7 @@ void MKLDNNConcatNode::execNspcSpecCase() {
size_t nonZeroInShapes = 0;
int firstNonZeroEdge = -1;
for (size_t i = 0; i < num_src; i++) {
const MKLDNNMemory& src_mem = getParentEdgesAtPort(i)[0]->getMemory();
const Memory& src_mem = getParentEdgesAtPort(i)[0]->getMemory();
if (src_mem.GetShape().hasZeroDims()) {
continue;
}
@ -558,4 +560,6 @@ void MKLDNNConcatNode::execNspcSpecCase() {
});
}
REG_MKLDNN_PRIM_FOR(MKLDNNConcatNode, Concatenation);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -11,10 +11,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNConcatNode : public MKLDNNNode {
class Concat : public Node {
public:
MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
Concat(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override;
@ -45,5 +46,6 @@ private:
InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -14,7 +14,7 @@
#include <string>
#include <vector>
#include <mkldnn_types.h>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include <utils/general_utils.h>
#include <ngraph/ops.hpp>
#include <cpu/x64/jit_generator.hpp>
@ -25,9 +25,11 @@
#include <common/primitive_hashing_utils.hpp>
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
namespace node {
namespace {
struct ConvKey {
@ -96,16 +98,16 @@ bool ConvKey::operator==(const ConvKey &rhs) const {
} // namespace
class MKLDNNConvolutionNode::FusedSubgraph {
class Convolution::FusedSubgraph {
public:
FusedSubgraph(const std::vector<MKLDNNNodePtr> &opList, const MKLDNNConvolutionNode &conv, MKLDNNWeightsSharing::Ptr weightCache) {
_graph = std::unique_ptr<MKLDNNGraph>(new MKLDNNGraph());
FusedSubgraph(const std::vector<NodePtr> &opList, const Convolution &conv, WeightsSharing::Ptr weightCache) {
_graph = std::unique_ptr<Graph>(new Graph());
std::unordered_set<MKLDNNNodePtr> nodesSet;
std::vector<MKLDNNEdgePtr> edges;
std::unordered_set<NodePtr> nodesSet;
std::vector<EdgePtr> edges;
auto addEdge = [&](const MKLDNNNodePtr& parent, const MKLDNNNodePtr& child, size_t parentPort, size_t childPort) -> void {
auto edge = std::make_shared<MKLDNNEdge>(parent, child, parentPort, childPort);
auto addEdge = [&](const NodePtr& parent, const NodePtr& child, size_t parentPort, size_t childPort) -> void {
auto edge = std::make_shared<Edge>(parent, child, parentPort, childPort);
child->addEdge(edge);
edges.push_back(edge);
nodesSet.insert(parent);
@ -114,15 +116,15 @@ public:
//Make inputs
const auto &inpMemDesc1 = conv.getBaseMemDescAtOutputPort(0);
auto inp0 = std::make_shared<MKLDNNInputNode>(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache);
auto inp0 = std::make_shared<Input>(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache);
inputs.push_back(inp0);
const size_t sumPortNum = conv.getParentEdges().size() - 1;
const auto &inpMemDesc2 = conv.getBaseMemDescAtInputPort(sumPortNum);
auto inp1 = std::make_shared<MKLDNNInputNode>(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache);
auto inp1 = std::make_shared<Input>(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache);
inputs.push_back(inp1);
auto itr = std::find_if(opList.begin(), opList.end(), [](const MKLDNNNodePtr &node) {
if (auto eltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(node)) {
auto itr = std::find_if(opList.begin(), opList.end(), [](const NodePtr &node) {
if (auto eltwise = std::dynamic_pointer_cast<Eltwise>(node)) {
return eltwise->isSpecialConvolutionAddFusing();
}
return false;
@ -140,7 +142,7 @@ public:
while (++itr != opList.end()) {
auto parentNode = *parentItr;
auto currentNode = *itr;
if (FakeQuantize == currentNode->getType()) {
if (Type::FakeQuantize == currentNode->getType()) {
parentNode->addFusedNode(currentNode);
} else {
addEdge(parentNode, currentNode, 0, 0);
@ -157,29 +159,29 @@ public:
//Make output
const auto &outMemDesc = conv.getBaseMemDescAtOutputPort(0);
auto out = std::make_shared<MKLDNNInputNode>(outMemDesc, "out", "Result", conv.getEngine(), weightCache);
auto out = std::make_shared<Input>(outMemDesc, "out", "Result", conv.getEngine(), weightCache);
addEdge(*parentItr, out, 0, 0);
outputs.push_back(out);
std::vector<MKLDNNNodePtr> nodes(nodesSet.begin(), nodesSet.end());
std::vector<NodePtr> nodes(nodesSet.begin(), nodesSet.end());
_graph->CreateGraph(nodes, edges, weightCache, "fused_subgraph");
}
std::shared_ptr<MKLDNNInputNode> getInput(size_t idx) const {
std::shared_ptr<Input> getInput(size_t idx) const {
if (idx < inputs.size()) {
return inputs[idx];
} else {
IE_THROW(OutOfBounds) << "Unexpected input index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx
IE_THROW(OutOfBounds) << "Unexpected input index in Convolution::fusedSubgraph::getInput idx=" << idx
<< " inputs.size()=" << inputs.size();
}
}
std::shared_ptr<MKLDNNInputNode> getOutput(size_t idx) const {
std::shared_ptr<Input> getOutput(size_t idx) const {
if (idx < outputs.size()) {
return outputs[idx];
} else {
IE_THROW(OutOfBounds) << "Unexpected output index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx
IE_THROW(OutOfBounds) << "Unexpected output index in Convolution::fusedSubgraph::getInput idx=" << idx
<< " inputs.size()=" << outputs.size();
}
}
@ -190,12 +192,12 @@ public:
}
private:
std::unique_ptr<MKLDNNGraph> _graph;
std::vector<std::shared_ptr<MKLDNNInputNode>> inputs;
std::vector<std::shared_ptr<MKLDNNInputNode>> outputs;
std::unique_ptr<Graph> _graph;
std::vector<std::shared_ptr<Input>> inputs;
std::vector<std::shared_ptr<Input>> outputs;
};
bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool Convolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) {
errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported";
@ -217,8 +219,8 @@ bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngr
return true;
}
MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false),
Convolution::Convolution(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: Node(op, eng, cache), withBiases(false), withSum(false), withDWConv(false),
isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef),
groupNum(1lu), IC(1), groupIC(1), groupOC(1), eltwisePrecision(Precision::FP32) {
std::string errorMessage;
@ -230,7 +232,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
auto groupConvolutionOp = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolution>(op);
if (convolutionOp) {
algorithm = ConvolutionCommon;
algorithm = Algorithm::ConvolutionCommon;
groupNum = 1;
isGrouped = false;
@ -253,7 +255,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
paddingR = convolutionOp->get_pads_end();
autoPadding = one_of(convolutionOp->get_auto_pad(), ov::op::PadType::SAME_UPPER, ov::op::PadType::SAME_LOWER);
} else if (groupConvolutionOp) {
algorithm = ConvolutionGrouped;
algorithm = Algorithm::ConvolutionGrouped;
groupNum = groupConvolutionOp->input_value(1).get_shape()[0];
isGrouped = true;
@ -278,19 +280,19 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
}
}
bool MKLDNNConvolutionNode::canBeExecutedInInt8() const {
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
bool Convolution::canBeExecutedInInt8() const {
auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
if (!inputZeroPoints.empty())
inputDataType = memory::data_type::u8;
auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
if (!weightsZeroPoints.empty())
weightsDataType = memory::data_type::s8;
return one_of(inputDataType, memory::data_type::u8, memory::data_type::s8) && weightsDataType == memory::data_type::s8;
}
InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const {
InferenceEngine::Precision Convolution::fusedEltwisePrecision(const NodePtr& fusingNode) const {
InferenceEngine::Precision eltwisePrecision;
int fusingPort = fusingNode->getFusingPort();
@ -305,7 +307,7 @@ InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MK
return eltwisePrecision;
}
void MKLDNNConvolutionNode::getSupportedDescriptors() {
void Convolution::getSupportedDescriptors() {
if (!descs.empty())
return;
@ -316,44 +318,44 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
// winograd support only constant weights and bias
isWino = std::find(implPriorities.begin(), implPriorities.end(), impl_desc_type::jit_avx512_winograd) != implPriorities.end() &&
mkldnn::impl::cpu::x64::mayiuse(mkldnn::impl::cpu::x64::avx512_common) && !canBeExecutedInInt8() &&
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Input &&
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true);
getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Type::Input &&
(withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Type::Input) : true);
}
int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
for (int i = 0; i < fusedWith.size(); i++) {
if (fusedWith[i]->getType() == Convolution) {
if (fusedWith[i]->getType() == Type::Convolution) {
expectedInputEdgesNum += static_cast<int>(fusedWith[i]->getOriginalInputsNumber()) - 1;
}
if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (fusedWith[i]->getAlgorithm() == Algorithm::EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<Eltwise *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
expectedInputEdgesNum++;
}
}
}
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
if (!inputZeroPoints.empty())
inputDataType = memory::data_type::u8;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
auto outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
eltwisePrecision = DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType);
if (!fusedWith.empty()) {
outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
eltwisePrecision = DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType);
}
// We need to make sure that convolution output and second input of fused Eltwise operation
// have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) {
for (int i = 0; i < fusedWith.size(); i++) {
if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (fusedWith[i]->getAlgorithm() == Algorithm::EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<Eltwise *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
eltwisePrecision = fusedEltwisePrecision(fusedWith[i]);
if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
if (DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
eltwisePrecision = Precision::FP32;
outputDataType = memory::data_type::f32;
}
@ -371,13 +373,13 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
int ndims = getInputShapeAtPort(0).getRank();
withDWConv = isFusedWith(Convolution);
withDWConv = isFusedWith(Type::Convolution);
if (withDWConv && isDynamicNode()) {
IE_THROW() << "DW convolution is fused into convolution node " << getName() << " with dynamic shape.";
}
for (int i = 0; i < fusedWith.size(); i++) {
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
auto *convolutionNode = dynamic_cast<Convolution *>(fusedWith[i].get());
if (convolutionNode) {
auto& inActivationDims = convolutionNode->inputShapes[0].getStaticDims();
dw_conv_ih = inActivationDims[convolutionNode->inputShapes[0].getRank() - 2];
@ -393,9 +395,9 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
if (canBeExecutedInInt8()) {
if (i == 0) {
dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
dw_conv_in_dt = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
} else {
dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[i - 1]->getOriginalOutputPrecisionAtPort(0));
dw_conv_in_dt = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[i - 1]->getOriginalOutputPrecisionAtPort(0));
}
} else {
dw_conv_in_dt = memory::data_type::f32;
@ -433,8 +435,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
&& !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
eltwisePrecision = Precision::FP32;
for (int i = 0; i < fusedWith.size(); i++) {
if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (fusedWith[i]->getAlgorithm() == Algorithm::EltwiseAdd) {
auto* eltwiseNode = dynamic_cast<Eltwise *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
eltwisePrecision = fusedEltwisePrecision(fusedWith[i]);
// TODO(amalyshe): there might be situation when convolution can be executed in BF16,
@ -445,7 +447,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
// bofore the fused convolution. This behaviour might be more correct regarding expected markup
// of the graph but performance of first and second approaches might be different. Need to verify
outputDataType = eltwisePrecision == Precision::BF16 ? memory::data_type::bf16 : memory::data_type::f32;
eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
eltwisePrecision = DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType);
}
}
}
@ -497,7 +499,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
}
}
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) {
void Convolution::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) {
mkldnn::post_ops ops;
const bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed
@ -511,17 +513,17 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
};
for (auto &node : fusedWith) {
if (node->getType() == Split || node->getType() == Concatenation)
if (node->getType() == Type::Split || node->getType() == Type::Concatenation)
continue;
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
if (auto* eltwiseNode = dynamic_cast<Eltwise *>(node.get())) {
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
if (withSumBroadcast) {
break;
}
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
ops.append_sum(1.0, DnnlExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
} else {
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
if (useLegacyPostOps || eltwiseNode->getOneDnnAlgorithm() != mkldnn::algorithm::undef) {
eltwiseNode->appendPostOps(ops, dims, postOpsArgs);
} else {
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs);
@ -530,7 +532,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
continue;
}
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
if (auto* fakeQuantizeNode = dynamic_cast<FakeQuantize *>(node.get())) {
if (useLegacyPostOps) {
fakeQuantizeNode->appendPostOps(ops, dims, postOpsArgs);
} else {
@ -539,7 +541,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
continue;
}
auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
auto* convolutionNode = dynamic_cast<Convolution *>(node.get());
if (convolutionNode) {
if (initWeights) {
postOpsArgs.push_back(getParentEdgeAt(getOriginalInputsNumber() + 0)->getMemoryPtr());
@ -564,11 +566,11 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
attr.set_post_ops(ops);
}
void MKLDNNConvolutionNode::selectOptimalPrimitiveDescriptor() {
void Convolution::selectOptimalPrimitiveDescriptor() {
selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true);
}
void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
void Convolution::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -603,7 +605,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
}
if (withDWConv) {
auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
auto weightsPrc = DnnlExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
auto biasPrc = memory::data_type::f32;
std::vector<size_t> dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
@ -653,8 +655,8 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
}
}
bool MKLDNNConvolutionNode::created() const {
return getType() == Convolution;
bool Convolution::created() const {
return getType() == Type::Convolution;
}
namespace {
@ -693,7 +695,7 @@ createDescriptorInternal(const mkldnn::memory::desc& inputDesc,
}
} // namespace
void MKLDNNConvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
void Convolution::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
const std::vector<MemoryDescPtr>& outputDesc) {
MemoryDescPtr inpDesc;
if (inputDesc[0]->isDefined()) {
@ -723,12 +725,12 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>& i
wdt = memory::data_type::s8;
}
mkldnn::memory::desc weightDnnlDesc(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any);
mkldnn::memory::desc weightDnnlDesc(DnnlExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any);
mkldnn::memory::desc biasDnnlDesc;
if (withBiases) {
memory::data_type bdt = memory::data_type::f32;
biasDnnlDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims), bdt, memory::format_tag::any);
biasDnnlDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(biasesDims), bdt, memory::format_tag::any);
}
std::vector<mkldnn::algorithm> algorithms;
@ -744,12 +746,12 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>& i
}
}
void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) {
void Convolution::addZeroPoints(mkldnn::primitive_attr& attr) {
if (!inputZeroPoints.empty()) {
attr.set_input_zero_points(inputZeroPoints.size(), 1 << 1 /*through C dim*/);
if (!inputZeroPointsMemPtr) {
inputZeroPointsMemPtr.reset(new MKLDNNMemory(getEngine()));
inputZeroPointsMemPtr.reset(new Memory(getEngine()));
DnnlBlockedMemoryDesc memoryDesc(Precision::U8, {inputZeroPoints.size()});
inputZeroPointsMemPtr->Create(memoryDesc, inputZeroPoints.data());
}
@ -759,7 +761,7 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) {
attr.set_weights_zero_points(weightsZeroPoints.size(), 1 << 1 /*through C dim*/);
if (!weightsZeroPointsMemPtr) {
weightsZeroPointsMemPtr.reset(new MKLDNNMemory(getEngine()));
weightsZeroPointsMemPtr.reset(new Memory(getEngine()));
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {weightsZeroPoints.size()});
weightsZeroPointsMemPtr->Create(memoryDesc, weightsZeroPoints.data());
}
@ -769,14 +771,14 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) {
attr.set_output_compensations(outputCompensation.size(), 1 << 1 /*through C dim*/);
if (!outputCompensationMemPtr) {
outputCompensationMemPtr.reset(new MKLDNNMemory(getEngine()));
outputCompensationMemPtr.reset(new Memory(getEngine()));
DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {outputCompensation.size()});
outputCompensationMemPtr->Create(memoryDesc, outputCompensation.data());
}
}
}
void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
void Convolution::initDescriptor(const NodeConfig& config) {
auto *selectedPD = getSelectedPrimitiveDescriptor();
if (!selectedPD) {
return;
@ -823,7 +825,7 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
}
if (withDWConv) {
auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
auto weightsPrc = DnnlExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
auto biasPrc = memory::data_type::f32;
std::vector <size_t> dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
@ -877,13 +879,13 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
selectedPD->setConfig(rightConfig);
}
void MKLDNNConvolutionNode::filterSupportedPrimitiveDescriptors() {
MKLDNNNode::filterSupportedPrimitiveDescriptors();
void Convolution::filterSupportedPrimitiveDescriptors() {
Node::filterSupportedPrimitiveDescriptors();
// We also need to filter descs in Convolution node
filterSupportedDescriptors();
}
void MKLDNNConvolutionNode::filterSupportedDescriptors() {
void Convolution::filterSupportedDescriptors() {
if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) {
if (inputMemoryFormatsFilter.size() > 1 || outputMemoryFormatsFilter.size() > 1) {
IE_THROW() << "Incorrect number of input or output memory formats for Convolution node";
@ -892,11 +894,11 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() {
while (itd != descs.end()) {
bool isSuitableDesc = true;
if (!inputMemoryFormatsFilter.empty()) {
auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc>(*itd)->data.src_desc);
auto src_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc>(*itd)->data.src_desc);
isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]);
}
if (!outputMemoryFormatsFilter.empty()) {
auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc>(*itd)->data.dst_desc);
auto dst_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc>(*itd)->data.dst_desc);
isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]);
}
if (!isSuitableDesc) {
@ -908,7 +910,7 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() {
}
}
bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const {
bool Convolution::isPossibleToSkipInitConfig(DnnlDesriptor &desc) const {
// WA: In some cases, we can predict in advance the type of primitive that will be called in the future.
// In particular, isPossibleToSkipInitConfig() checks whether we can skip the creation of primitives with
// gemm implementation, which significantly increase the network load time.
@ -931,8 +933,8 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c
isPossibleJitPlanar = false;
std::shared_ptr<mkldnn::convolution_forward::desc> convDesc(desc);
auto srcMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.src_desc);
auto dstMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.dst_desc);
auto srcMemDesc = DnnlExtensionUtils::makeDescriptor(convDesc->data.src_desc);
auto dstMemDesc = DnnlExtensionUtils::makeDescriptor(convDesc->data.dst_desc);
auto srcDataType = convDesc->data.src_desc.data_type;
auto dstDataType = convDesc->data.dst_desc.data_type;
bool isPlanarFloatConv = srcMemDesc->hasLayoutType(LayoutType::ncsp)
@ -943,51 +945,51 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c
return !isPossibleJitPlanar && isPlanarFloatConv;
}
std::shared_ptr<MemoryDesc> MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
std::shared_ptr<MemoryDesc> Convolution::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx);
if (getInputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(desc);
return DnnlExtensionUtils::makeDescriptor(desc);
}
bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
bool Convolution::canFuse(const NodePtr& node) const {
return canFuseSimpleOperation(node);
}
mkldnn::memory MKLDNNConvolutionNode::getWeights() const {
mkldnn::memory Convolution::getWeights() const {
return getParentEdgeAt(1)->getMemory().GetPrimitive();
}
void MKLDNNConvolutionNode::setDynamicBatchLim(int lim) {
void Convolution::setDynamicBatchLim(int lim) {
if (!execPtr) {
IE_THROW() << "Can't set dynamic batch for Convolution node with name: " << getName() << ", because executor is not compiled";
}
if (execPtr->needReordering()) {
IE_THROW() << "Can't execute Convolution node with dynamic batch via executor with reorders";
}
MKLDNNNode::setDynamicBatchLim(lim);
Node::setDynamicBatchLim(lim);
}
mkldnn::memory MKLDNNConvolutionNode::getBias() const {
mkldnn::memory Convolution::getBias() const {
return getParentEdgeAt(2)->getMemory().GetPrimitive();
}
InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
InferenceEngine::Precision Convolution::getRuntimePrecision() const {
std::vector<InferenceEngine::Precision> inputPrecisions;
// Don't take bias precision into account
size_t inputsNumLimit = 2;
for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) {
auto parentEdge = getParentEdgeAt(i);
if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) {
inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType())));
if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) {
inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType())));
}
}
return getMaxPrecision(inputPrecisions);
}
bool MKLDNNConvolutionNode::isNspcAvailable() const {
bool Convolution::isNspcAvailable() const {
using impl::cpu::x64::mayiuse;
// do not use in non-quantized networks until it is enforced externally
@ -1062,8 +1064,8 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const {
return true;
}
InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) {
const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgeAt(edgeNum)->getParent());
InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) {
const auto constNode = std::dynamic_pointer_cast<Input>(getParentEdgeAt(edgeNum)->getParent());
if (!constNode) {
IE_THROW() << "Cannot cast " << edgeNum << " input to Input node for " << getName() << ".";
}
@ -1084,14 +1086,14 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn
cpu_convert(blb->GetPtr(),
internalBlob->buffer(),
MKLDNNExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()),
DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()),
internalBlob->getTensorDesc().getPrecision(),
elementsCount);
return internalBlob;
}
void MKLDNNConvolutionNode::prepareParams() {
void Convolution::prepareParams() {
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
auto dstMemPtr = getOutputMemory();
@ -1101,7 +1103,7 @@ void MKLDNNConvolutionNode::prepareParams() {
IE_THROW() << "Input memory was not allocated.";
if (!wghMemPtr || !wghMemPtr->isAllocated())
IE_THROW() << "Weight memory was not allocated.";
MKLDNNMemoryPtr biasMemPtr = nullptr;
MemoryPtr biasMemPtr = nullptr;
if (withBiases) {
biasMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
if (!biasMemPtr || !biasMemPtr->isAllocated())
@ -1161,27 +1163,27 @@ void MKLDNNConvolutionNode::prepareParams() {
const std::vector<ptrdiff_t>& dilation,
const std::vector<ptrdiff_t>& paddingL,
const std::vector<ptrdiff_t>& paddingR,
mkldnn::algorithm alg) -> std::shared_ptr<MKLDNNDescriptor> {
mkldnn::algorithm alg) -> std::shared_ptr<DnnlDesriptor> {
mkldnn::memory::desc dnnlBiasDesc;
if (biasDescPtr) {
// WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
dnnlBiasDesc = biasDescPtr->getDnnlDesc().reshape({dstDesc.dims()[1]});
}
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
wghDesc,
dnnlBiasDesc,
dstDesc,
(biasDescPtr != nullptr),
stride,
dilation,
paddingL,
paddingR,
alg));
return std::make_shared<DnnlDesriptor>(createDescriptorInternal(srcDesc,
wghDesc,
dnnlBiasDesc,
dstDesc,
(biasDescPtr != nullptr),
stride,
dilation,
paddingL,
paddingR,
alg));
};
const auto alg = (key.implType & impl_desc_type::winograd) ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
std::shared_ptr<MKLDNNDescriptor> desc = createMkldnnConvDesc(key.inp0->getDnnlDesc(),
std::shared_ptr<DnnlDesriptor> desc = createMkldnnConvDesc(key.inp0->getDnnlDesc(),
key.inp1->getDnnlDesc(),
key.out->getDnnlDesc(),
key.bias,
@ -1213,25 +1215,25 @@ void MKLDNNConvolutionNode::prepareParams() {
}
if (!execPtr) {
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(key.inp0->getShape().getStaticDims()),
auto inDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp0->getShape().getStaticDims()),
key.inp0->getDataType(),
memory::format_tag::any);
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()),
auto wghDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()),
key.inp1->getDataType(),
memory::format_tag::any);
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(key.out->getShape().getStaticDims()),
auto outDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.out->getShape().getStaticDims()),
key.out->getDataType(),
memory::format_tag::any);
std::shared_ptr<MKLDNNDescriptor> reorderConvDesc = createMkldnnConvDesc(inDesc,
wghDesc,
outDesc,
key.bias,
key.stride,
key.dilation,
key.paddingL,
key.paddingR,
mkldnn::algorithm::convolution_direct);
std::shared_ptr<DnnlDesriptor> reorderConvDesc = createMkldnnConvDesc(inDesc,
wghDesc,
outDesc,
key.bias,
key.stride,
key.dilation,
key.paddingL,
key.paddingR,
mkldnn::algorithm::convolution_direct);
auto reordItpd = reorderConvDesc->createPrimitiveDescriptorIterator(engine, key.attr);
if (static_cast<bool>(reordItpd)) {
@ -1263,13 +1265,13 @@ void MKLDNNConvolutionNode::prepareParams() {
}
appendZeroPointsArgs();
MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs);
Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs);
} else {
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
}
}
MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
Convolution::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
@ -1289,14 +1291,14 @@ MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::co
}
}
void MKLDNNConvolutionNode::execute(mkldnn::stream strm) {
void Convolution::execute(mkldnn::stream strm) {
if (!execPtr) {
IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled";
}
execPtr->exec(primArgs, strm);
}
void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
void Convolution::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
if (withSumBroadcast) {
if (!subgraph) {
@ -1317,7 +1319,7 @@ void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
}
}
void MKLDNNConvolutionNode::updatePadding() {
void Convolution::updatePadding() {
//update padding.
if (isDynamicNode() && autoPadding) {
paddingL = shapeInference->get_pads_begin();
@ -1325,7 +1327,7 @@ void MKLDNNConvolutionNode::updatePadding() {
}
}
void MKLDNNConvolutionNode::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
void Convolution::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
if (withSum) {
const size_t sumPortNum = getParentEdges().size() - 1;
const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory();
@ -1345,17 +1347,17 @@ void MKLDNNConvolutionNode::redefineOutputMemory(const std::vector<VectorDims> &
withSumBroadcast = false;
}
}
MKLDNNNode::redefineOutputMemory(newOutputShapes);
Node::redefineOutputMemory(newOutputShapes);
}
MemoryDescPtr MKLDNNConvolutionNode::getSumMemDesc(primitive_desc_iterator &primitive_desc_it) {
MemoryDescPtr Convolution::getSumMemDesc(primitive_desc_iterator &primitive_desc_it) {
if (getOutputShapeAtPort(0).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(0), getInputShapeAtPort(getParentEdges().size() - 1));
return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(0), getInputShapeAtPort(getParentEdges().size() - 1));
}
return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(0));
return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(0));
}
MKLDNNMemoryPtr MKLDNNConvolutionNode::getOutputMemory() const {
MemoryPtr Convolution::getOutputMemory() const {
if (withSumBroadcast) {
if (!subgraph) {
IE_THROW(Unexpected) << "Fused ops subgraph has not been created in " << getTypeStr() << " with name " << getName();
@ -1367,10 +1369,10 @@ MKLDNNMemoryPtr MKLDNNConvolutionNode::getOutputMemory() const {
}
}
void MKLDNNConvolutionNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
if (Eltwise == fusingNode->getType()) {
if (fusingNode->getAlgorithm() == EltwiseAdd) {
auto eltwiseNode = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(fusingNode);
void Convolution::addFusedNode(const NodePtr &fusingNode) {
if (Type::Eltwise == fusingNode->getType()) {
if (fusingNode->getAlgorithm() == Algorithm::EltwiseAdd) {
auto eltwiseNode = std::dynamic_pointer_cast<Eltwise>(fusingNode);
if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
withSum = true;
}
@ -1385,10 +1387,10 @@ void MKLDNNConvolutionNode::addFusedNode(const MKLDNNNodePtr &fusingNode) {
}
}
}
MKLDNNNode::addFusedNode(fusingNode);
Node::addFusedNode(fusingNode);
}
void MKLDNNConvolutionNode::appendZeroPointsArgs() {
void Convolution::appendZeroPointsArgs() {
if (inputZeroPointsMemPtr != nullptr) {
primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = inputZeroPointsMemPtr->GetPrimitive();
}
@ -1399,4 +1401,7 @@ void MKLDNNConvolutionNode::appendZeroPointsArgs() {
primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST] = outputCompensationMemPtr->GetPrimitive();
}
}
REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -13,12 +13,13 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNEltwiseNode;
class Eltwise;
class MKLDNNConvolutionNode : public MKLDNNNode {
class Convolution : public Node {
public:
MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
Convolution(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override;
@ -38,7 +39,7 @@ public:
mkldnn::memory getWeights() const;
mkldnn::memory getBias() const;
size_t descInputNumbers(MKLDNNDescriptor desc) override {
size_t descInputNumbers(DnnlDesriptor desc) override {
return getOriginalInputsNumber();
}
@ -55,7 +56,7 @@ public:
const std::vector<ptrdiff_t> &getPaddingL() { return paddingL; }
const std::vector<ptrdiff_t> &getPaddingR() { return paddingR; }
bool canFuse(const MKLDNNNodePtr& node) const override;
bool canFuse(const NodePtr& node) const override;
bool isDepthWise() const {
return isGrouped && 1 == groupOC && 1 == groupIC;
}
@ -65,9 +66,9 @@ public:
void setDynamicBatchLim(int lim) override;
protected:
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
InferenceEngine::Precision fusedEltwisePrecision(const NodePtr& fusingNode) const;
void redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) override;
void addFusedNode(const MKLDNNNodePtr &fusingNode) override;
void addFusedNode(const NodePtr &fusingNode) override;
private:
class FusedSubgraph;
@ -91,13 +92,13 @@ private:
void addZeroPoints(mkldnn::primitive_attr& attr);
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights);
void filterSupportedDescriptors();
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
bool isPossibleToSkipInitConfig(DnnlDesriptor &desc) const;
bool isNspcAvailable() const;
InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false);
void updatePadding();
MemoryDescPtr getSumMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it);
MKLDNNMemoryPtr getOutputMemory() const;
MemoryPtr getOutputMemory() const;
void appendZeroPointsArgs();
@ -135,12 +136,13 @@ private:
AttrPtr pAttr;
bool autoPadding = false;
FusedSubgraphPtr subgraph;
std::unordered_map<MKLDNNNodePtr, std::vector<MKLDNNNodePtr>> fusedConstNodes;
std::unordered_map<NodePtr, std::vector<NodePtr>> fusedConstNodes;
MKLDNNMemoryPtr inputZeroPointsMemPtr;
MKLDNNMemoryPtr weightsZeroPointsMemPtr;
MKLDNNMemoryPtr outputCompensationMemPtr;
MemoryPtr inputZeroPointsMemPtr;
MemoryPtr weightsZeroPointsMemPtr;
MemoryPtr outputCompensationMemPtr;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include "convert.h"
#include "common/cpu_convert.h"
#include "common/blocked_desc_creator.h"
@ -11,10 +11,13 @@
#include <utils/ngraph_utils.hpp>
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool Convert::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
if (!convert) {
@ -27,8 +30,8 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
return true;
}
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
Convert::Convert(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: Node(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
errorPrefix = "Convert node with name '" + getName() + "'";
@ -40,13 +43,13 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, co
origPrc = details::convertPrecision(convert->get_destination_type());
}
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
std::vector<VectorDims> Convert::shapeInfer() const {
return std::vector<VectorDims>{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
}
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode("Convert", nodeName, eng, cache)
Convert::Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: Node("Convert", nodeName, eng, cache)
, origPrc(outPrc) {
inputShapes.push_back(shape);
addOriginalInputPrecision(inPrc);
@ -58,7 +61,7 @@ MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::
errorPrefix = "Convert node with name '" + getName() + "'";
}
void MKLDNNConvertNode::getSupportedDescriptors() {
void Convert::getSupportedDescriptors() {
// if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
// from correspond tensor descriptors.
if (outputShapes.empty())
@ -71,14 +74,14 @@ void MKLDNNConvertNode::getSupportedDescriptors() {
IE_THROW() << errorPrefix << " has incorrect number of output edges";
}
bool MKLDNNConvertNode::isSupportedDesc(const MemoryDesc &desc) {
bool Convert::isSupportedDesc(const MemoryDesc &desc) {
bool isSupported = desc.getType() & MemoryDescType::Blocked;
if (desc.getType() == MemoryDescType::DnnlBlocked)
isSupported &= desc.as<const DnnlMemoryDesc>()->hasEmptyExtraData();
return isSupported;
}
void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
void Convert::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -129,11 +132,11 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNConvertNode::executeDynamicImpl(mkldnn::stream strm) {
void Convert::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNConvertNode::execute(mkldnn::stream strm) {
void Convert::execute(mkldnn::stream strm) {
auto& parentMem = getParentEdgeAt(0)->getMemory();
auto& childMem = getChildEdgeAt(0)->getMemory();
@ -154,8 +157,10 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
parentPaddElemCount);
}
bool MKLDNNConvertNode::created() const {
return getType() == Convert;
bool Convert::created() const {
return getType() == Type::Convert;
}
REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -11,12 +11,13 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNConvertNode : public MKLDNNNode {
class Convert : public Node {
public:
MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
Convert(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
@ -29,7 +30,7 @@ public:
// This is the interface extension designed to provide inp and output tensor descriptors without the CNNLayer.
// In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method.
// This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph
// This is useful if the Convert node is added to the graph as an auxiliary operation at the Graph
// initialization stage.
void setDescs(const MemoryDesc& input, const MemoryDesc& output) {
this->input = input.clone();
@ -54,5 +55,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,13 @@
#include "ie_parallel.hpp"
#include "ctc_greedy_decoder.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool CTCGreedyDecoder::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
if (!greedyDecOp) {
@ -25,8 +28,8 @@ bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr<cons
return true;
}
MKLDNNCTCGreedyDecoderNode::MKLDNNCTCGreedyDecoderNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
CTCGreedyDecoder::CTCGreedyDecoder(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -48,7 +51,7 @@ MKLDNNCTCGreedyDecoderNode::MKLDNNCTCGreedyDecoderNode(const std::shared_ptr<ngr
mergeRepeated = greedyDecOp->get_ctc_merge_repeated();
}
void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() {
void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -66,7 +69,7 @@ void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) {
void CTCGreedyDecoder::execute(mkldnn::stream strm) {
const float* probabilities = reinterpret_cast<const float *>(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr());
const float* sequenceMask = reinterpret_cast<const float *>(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr());
float* outputSequences = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
@ -161,16 +164,18 @@ void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) {
});
}
bool MKLDNNCTCGreedyDecoderNode::created() const {
return getType() == CTCGreedyDecoder;
bool CTCGreedyDecoder::created() const {
return getType() == Type::CTCGreedyDecoder;
}
void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(mkldnn::stream strm) {
void CTCGreedyDecoder::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNCTCGreedyDecoderNode::needPrepareParams() const {
bool CTCGreedyDecoder::needPrepareParams() const {
return false;
}
REG_MKLDNN_PRIM_FOR(MKLDNNCTCGreedyDecoderNode, CTCGreedyDecoder)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNCTCGreedyDecoderNode : public MKLDNNNode {
class CTCGreedyDecoder : public Node {
public:
MKLDNNCTCGreedyDecoderNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
CTCGreedyDecoder(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -30,5 +31,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,13 @@
#include "ie_parallel.hpp"
#include "ctc_greedy_decoder_seq_len.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool CTCGreedyDecoderSeqLen::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
if (!greedyDecOp) {
@ -25,8 +28,8 @@ bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_pt
return true;
}
MKLDNNCTCGreedyDecoderSeqLenNode::MKLDNNCTCGreedyDecoderSeqLenNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
CTCGreedyDecoderSeqLen::CTCGreedyDecoderSeqLen(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -47,7 +50,7 @@ MKLDNNCTCGreedyDecoderSeqLenNode::MKLDNNCTCGreedyDecoderSeqLenNode(const std::sh
mergeRepeated = greedyDecOp->get_merge_repeated();
}
void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() {
void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -71,7 +74,7 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) {
void CTCGreedyDecoderSeqLen::execute(mkldnn::stream strm) {
const float* probabilities = reinterpret_cast<const float *>(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr());
const int* sequenceLengths = reinterpret_cast<const int *>(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr());
int* decodedClasses = reinterpret_cast<int *>(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr());
@ -164,16 +167,18 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) {
});
}
bool MKLDNNCTCGreedyDecoderSeqLenNode::created() const {
return getType() == CTCGreedyDecoderSeqLen;
bool CTCGreedyDecoderSeqLen::created() const {
return getType() == Type::CTCGreedyDecoderSeqLen;
}
void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(mkldnn::stream strm) {
void CTCGreedyDecoderSeqLen::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNCTCGreedyDecoderSeqLenNode::needPrepareParams() const {
bool CTCGreedyDecoderSeqLen::needPrepareParams() const {
return false;
}
REG_MKLDNN_PRIM_FOR(MKLDNNCTCGreedyDecoderSeqLenNode, CTCGreedyDecoderSeqLen)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNCTCGreedyDecoderSeqLenNode : public MKLDNNNode {
class CTCGreedyDecoderSeqLen : public Node {
public:
MKLDNNCTCGreedyDecoderSeqLenNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
CTCGreedyDecoderSeqLen(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -34,5 +35,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -8,10 +8,13 @@
#include "ie_parallel.hpp"
#include "ctc_loss.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool CTCLoss::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto ctcLossOp = ngraph::as_type_ptr<const ngraph::op::v4::CTCLoss>(op);
if (!ctcLossOp) {
@ -24,8 +27,8 @@ bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr<const ngraph:
return true;
}
MKLDNNCTCLossNode::MKLDNNCTCLossNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
CTCLoss::CTCLoss(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -42,7 +45,7 @@ MKLDNNCTCLossNode::MKLDNNCTCLossNode(const std::shared_ptr<ngraph::Node>& op, co
unique = ctcLossOp->get_unique();
}
void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
void CTCLoss::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -57,11 +60,11 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNCTCLossNode::executeDynamicImpl(mkldnn::stream strm) {
void CTCLoss::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {
void CTCLoss::execute(mkldnn::stream strm) {
StatusCode returnCode = OK;
const float* logits = reinterpret_cast<const float *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
@ -277,8 +280,10 @@ void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {
parallel_nt(0, threadBody_3);
}
bool MKLDNNCTCLossNode::created() const {
return getType() == CTCLoss;
bool CTCLoss::created() const {
return getType() == Type::CTCLoss;
}
REG_MKLDNN_PRIM_FOR(MKLDNNCTCLossNode, CTCLoss)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNCTCLossNode : public MKLDNNNode {
class CTCLoss : public Node {
public:
MKLDNNCTCLossNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
CTCLoss(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -32,5 +33,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -1,7 +1,6 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "list.hpp"
#include <string>
#include <vector>
@ -14,10 +13,13 @@
#include "cum_sum.h"
#include "utils/bfloat16.hpp"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNCumSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool CumSum::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
if (!cumsum) {
@ -30,8 +32,8 @@ bool MKLDNNCumSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::
return true;
}
MKLDNNCumSumNode::MKLDNNCumSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
CumSum::CumSum(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -66,7 +68,7 @@ MKLDNNCumSumNode::MKLDNNCumSumNode(const std::shared_ptr<ngraph::Node>& op, cons
IE_THROW() << errorPrefix << " has different 'data' input and output dimensions";
}
void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
void CumSum::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -91,7 +93,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
void CumSum::execute(mkldnn::stream strm) {
if (inputShapes.size() == numOfInputs)
axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());
@ -107,7 +109,7 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
}
template <typename dataType>
void MKLDNNCumSumNode::exec() {
void CumSum::exec() {
const auto *input = reinterpret_cast<const dataType *>(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr());
auto *output = reinterpret_cast<dataType *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
const VectorDims strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
@ -128,7 +130,7 @@ void MKLDNNCumSumNode::exec() {
}
template <bool reverse, bool exclusive, typename dataType>
void MKLDNNCumSumNode::cumSum(const dataType *input, dataType *output, const VectorDims &strides) {
void CumSum::cumSum(const dataType *input, dataType *output, const VectorDims &strides) {
SizeVector iterationRange(numOfDims - 1);
size_t j = 0;
const auto &shape = getParentEdgesAtPort(CUM_SUM_DATA)[0]->getMemory().getStaticDims();
@ -192,7 +194,7 @@ void MKLDNNCumSumNode::cumSum(const dataType *input, dataType *output, const Vec
});
}
void MKLDNNCumSumNode::parallelItInit(size_t start, std::vector<size_t>& counters, const std::vector<size_t>& iterationRange) {
void CumSum::parallelItInit(size_t start, std::vector<size_t>& counters, const std::vector<size_t>& iterationRange) {
auto itCounter = counters.rbegin();
auto itWork = iterationRange.rbegin();
while (itCounter != counters.rend() && itWork != iterationRange.rend()) {
@ -203,7 +205,7 @@ void MKLDNNCumSumNode::parallelItInit(size_t start, std::vector<size_t>& counter
}
}
inline void MKLDNNCumSumNode::parallelItStep(std::vector<size_t>& counters, const std::vector<size_t>& iterationRange) {
inline void CumSum::parallelItStep(std::vector<size_t>& counters, const std::vector<size_t>& iterationRange) {
auto itCounter = counters.rbegin();
auto itWork = iterationRange.rbegin();
@ -217,7 +219,7 @@ inline void MKLDNNCumSumNode::parallelItStep(std::vector<size_t>& counters, cons
}
}
inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector<size_t> &forStartOffset, const std::vector<size_t>& strides) const {
inline size_t CumSum::getStartOffset(const std::vector<size_t> &forStartOffset, const std::vector<size_t>& strides) const {
size_t startOffset = 0;
for (size_t idx = 0; idx < forStartOffset.size(); ++idx) {
startOffset += forStartOffset[idx] * strides[idx];
@ -225,7 +227,7 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector<size_t> &forSta
return startOffset;
}
size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const {
size_t CumSum::getAxis(const Memory& _axis, const Memory& _data) const {
const auto& axisPrecision = _axis.getDesc().getPrecision();
const int64_t dataShapeSize = static_cast<int64_t>(_data.GetShape().getRank());
int64_t axisValueFromBlob = 0;
@ -249,16 +251,18 @@ size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory&
return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize);
}
bool MKLDNNCumSumNode::created() const {
return getType() == CumSum;
bool CumSum::created() const {
return getType() == Type::CumSum;
}
bool MKLDNNCumSumNode::needPrepareParams() const {
bool CumSum::needPrepareParams() const {
return false;
}
void MKLDNNCumSumNode::executeDynamicImpl(mkldnn::stream strm) {
void CumSum::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
REG_MKLDNN_PRIM_FOR(MKLDNNCumSumNode, CumSum)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -9,10 +9,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNCumSumNode : public MKLDNNNode {
class CumSum : public Node {
public:
MKLDNNCumSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
CumSum(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -37,7 +38,7 @@ private:
inline size_t getStartOffset(const std::vector<size_t> &forStartOffset, const std::vector<size_t>& strides) const;
size_t getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const;
size_t getAxis(const Memory& _axis, const Memory& _data) const;
enum { CUM_SUM_DATA, AXIS, numOfInputs };
bool exclusive;
@ -50,11 +51,12 @@ private:
template<typename T>
struct CumSumExecute {
void operator()(MKLDNNCumSumNode* node) {
void operator()(CumSum* node) {
node->exec<T>();
}
};
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -10,7 +10,7 @@
#include <string>
#include <vector>
#include <mkldnn_types.h>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include "ie_parallel.hpp"
#include "utils/general_utils.h"
#include <cpu/x64/cpu_isa_traits.hpp>
@ -26,10 +26,13 @@
#include "convolution_shape_inference.hpp"
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool Deconvolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
@ -51,10 +54,10 @@ bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const n
return true;
}
MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
Deconvolution::Deconvolution(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> DnnlMemoryDescPtr {
return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0));
return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0));
});
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
@ -65,7 +68,7 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
const auto& weightDims = getWeightDims();
if (convBackprop) {
algorithm = DeconvolutionCommon;
algorithm = Algorithm::DeconvolutionCommon;
IC = weightDims[0];
OC = weightDims[1];
@ -86,7 +89,7 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
} else if (groupConvBackprop) {
algorithm = DeconvolutionGrouped;
algorithm = Algorithm::DeconvolutionGrouped;
groupNum = weightDims[0];
IC = groupNum * weightDims[1];
@ -130,8 +133,8 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
attr = std::make_shared<mkldnn::primitive_attr>();
}
InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceEngine::SizeVector dims) {
auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgeAt(1)->getParent());
InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::SizeVector dims) {
auto constNode = std::dynamic_pointer_cast<Input>(getParentEdgeAt(1)->getParent());
if (!constNode)
IE_THROW() << "Cannot cast const input node for node " << getName() << ".";
auto blb = constNode->getMemoryPtr();
@ -154,7 +157,7 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
orderForBlockedDesc.push_back(i);
BlockingDesc blkDesc(dimsForBlockedDesc, orderForBlockedDesc);
InferenceEngine::TensorDesc tensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), dims, blkDesc);
InferenceEngine::TensorDesc tensorDesc(DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), dims, blkDesc);
Blob::Ptr internalBlob = InferenceEngine::make_shared_blob<int8_t>(tensorDesc);
internalBlob->allocate();
@ -172,8 +175,8 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
return internalBlob;
}
bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
if (std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgeAt(1)->getParent()) == nullptr) {
bool Deconvolution::canBeExecutedInInt8() const {
if (std::dynamic_pointer_cast<Input>(getParentEdgeAt(1)->getParent()) == nullptr) {
return false;
}
@ -208,10 +211,10 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
return false;
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrecision);
auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(inPrecision);
InferenceEngine::Precision weiPrecision = getOriginalInputPrecisionAtPort(1);
auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(weiPrecision);
auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(weiPrecision);
if (isDW && (inputDataType == dnnl_s8 || dilation.size() == 3))
return false;
@ -219,14 +222,14 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8;
}
bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
bool Deconvolution::canFuse(const NodePtr& node) const {
if (canBeExecutedInInt8())
return canFuseSimpleOperation(node);
return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
}
std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape() {
std::pair<VectorDims, VectorDims> Deconvolution::makeDummyInOutShape() {
auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0));
auto outShape = getOutputShapeAtPort(0);
@ -252,7 +255,7 @@ std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape()
const auto& origInDims = getInputShapeAtPort(0).getDims();
const auto& weightDims = getWeightDims();
const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
const size_t wghOffset = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0;
for (size_t i = 0; i < inputDims.size() - 2; i++) {
if (origInDims[2 + i] == Shape::UNDEFINED_DIM) {
inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) *
@ -269,7 +272,7 @@ std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape()
return {inShape.getStaticDims(), outShape.getStaticDims()};
}
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
void Deconvolution::getSupportedDescriptors() {
isInt8 = canBeExecutedInInt8();
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
@ -286,12 +289,12 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
if (!one_of(outPrecision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16))
outPrecision = InferenceEngine::Precision::FP32;
}
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrecision);
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outPrecision);
auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(inPrecision);
auto outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(outPrecision);
if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16)
inputDataType = outputDataType = memory::data_type::bf16;
if (!fusedWith.empty()) {
outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
}
if (getParentEdges().size() != 2 && getParentEdges().size() != 3)
@ -324,9 +327,9 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
setPostOps(*attr, outShape.getStaticDims());
}
void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) {
void Deconvolution::initPaddingR(const Shape &inShape, const Shape &outShape) {
for (int i = 0; i < paddingR.size(); i++) {
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
int with_group = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0;
const auto& weightDims = getWeightDims();
int krn = weightDims[with_group + 2 + i];
int src = outShape.getStaticDims()[2 + i];
@ -338,7 +341,7 @@ void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &ou
}
}
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
void Deconvolution::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
mkldnn::post_ops ops;
auto getBinPostOpShape = [&](){
@ -350,13 +353,13 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vec
};
for (auto &node : fusedWith) {
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
if (auto* eltwiseNode = dynamic_cast<Eltwise *>(node.get())) {
// TODO [DS]: change to shape from memory
// use legacy depthwise since backprop convolution does not support binary post ops
eltwiseNode->appendPostOps(ops, dims, postOpsArgs);
continue;
}
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
if (auto* fakeQuantizeNode = dynamic_cast<FakeQuantize *>(node.get())) {
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs);
continue;
}
@ -366,12 +369,12 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vec
attr.set_post_ops(ops);
}
void MKLDNNDeconvolutionNode::filterSupportedPrimitiveDescriptors() {
MKLDNNNode::filterSupportedPrimitiveDescriptors();
void Deconvolution::filterSupportedPrimitiveDescriptors() {
Node::filterSupportedPrimitiveDescriptors();
filterSupportedDescriptors();
}
void MKLDNNDeconvolutionNode::filterSupportedDescriptors() {
void Deconvolution::filterSupportedDescriptors() {
if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) {
if (inputMemoryFormatsFilter.size() > 1 || outputMemoryFormatsFilter.size() > 1) {
IE_THROW() << "Incorrect number of input or output memory formats for Deconvolution node";
@ -381,19 +384,19 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() {
bool isSuitableDesc = true;
if (!inputMemoryFormatsFilter.empty()) {
if (isInt8) {
auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr<dnnl::deconvolution_forward::desc>(*itd)->data.src_desc);
auto src_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr<dnnl::deconvolution_forward::desc>(*itd)->data.src_desc);
isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]);
} else {
auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc>(*itd)->data.diff_src_desc);
auto src_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc>(*itd)->data.diff_src_desc);
isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]);
}
}
if (!outputMemoryFormatsFilter.empty()) {
if (isInt8) {
auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::deconvolution_forward::desc>(*itd)->data.dst_desc);
auto dst_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::deconvolution_forward::desc>(*itd)->data.dst_desc);
isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]);
} else {
auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc>(*itd)->data.diff_dst_desc);
auto dst_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc>(*itd)->data.diff_dst_desc);
isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]);
}
}
@ -406,11 +409,11 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() {
}
}
bool MKLDNNDeconvolutionNode::created() const {
return getType() == Deconvolution;
bool Deconvolution::created() const {
return getType() == Type::Deconvolution;
}
bool MKLDNNDeconvolutionNode::needShapeInfer() const {
bool Deconvolution::needShapeInfer() const {
if (inputShapesModified()) {
return true;
}
@ -423,7 +426,7 @@ bool MKLDNNDeconvolutionNode::needShapeInfer() const {
return false;
}
std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
std::vector<VectorDims> Deconvolution::shapeInfer() const {
const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
std::vector<int32_t> outSpDims;
if (externOutShape) {
@ -432,8 +435,8 @@ std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)};
}
VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
std::vector<ov::StaticShape> inputShapes = {
VectorDims Deconvolution::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
std::vector<StaticShape> inputShapes = {
inDims,
getWeightDims()
};
@ -451,22 +454,22 @@ VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims,
outSpDims.data())});
}
std::vector<ov::StaticShape> outputShapes = shapeInference->infer(inputShapes, inputValues);
std::vector<StaticShape> outputShapes = shapeInference->infer(inputShapes, inputValues);
return outputShapes.back().to_shape();
}
void MKLDNNDeconvolutionNode::setDynamicBatchLim(int lim) {
void Deconvolution::setDynamicBatchLim(int lim) {
if (!execPtr) {
IE_THROW() << "Can't set dynamic batch for Deconvolution node with name: " << getName() << ", because executor is not compiled";
}
if (execPtr->needReordering()) {
IE_THROW() << "Can't execute Deconvolution node with dynamic batch via executor with reorders";
}
MKLDNNNode::setDynamicBatchLim(lim);
Node::setDynamicBatchLim(lim);
}
void MKLDNNDeconvolutionNode::cleanup() {
void Deconvolution::cleanup() {
if (!isDynamicNode()) {
internalBlobs.clear();
}
@ -480,7 +483,7 @@ void MKLDNNDeconvolutionNode::cleanup() {
}
}
void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
void Deconvolution::execute(mkldnn::stream strm) {
if (!execPtr) {
IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
}
@ -491,10 +494,10 @@ void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
}
}
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
bool isWinograd) const {
std::shared_ptr<DnnlDesriptor> Deconvolution::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
bool isWinograd) const {
mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
@ -502,21 +505,21 @@ std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDe
if (fwd_conv_pd->get(true) == nullptr) {
IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName();
}
return std::make_shared<MKLDNNDescriptor>(deconv_desc, fwd_conv_pd);
return std::make_shared<DnnlDesriptor>(deconv_desc, fwd_conv_pd);
}
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc) const {
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
std::shared_ptr<DnnlDesriptor> Deconvolution::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc) const {
return std::make_shared<DnnlDesriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
}
void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
MKLDNNMemoryPtr srcMemPtr,
MKLDNNMemoryPtr wghMemPtr,
MKLDNNMemoryPtr dstMemPtr,
AttrPtr attr,
impl_desc_type selectedImpl) {
void Deconvolution::createDeconvPrim(std::shared_ptr<DnnlDesriptor> desc,
MemoryPtr srcMemPtr,
MemoryPtr wghMemPtr,
MemoryPtr dstMemPtr,
AttrPtr attr,
impl_desc_type selectedImpl) {
auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr);
while (static_cast<bool>(itpd)) {
@ -545,17 +548,17 @@ void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor>
}
if (!itpd.next_impl()) {
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
auto inDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
memory::data_type::f32,
memory::format_tag::any);
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
auto wghDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
memory::data_type::f32,
memory::format_tag::any);
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
auto outDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
memory::data_type::f32,
memory::format_tag::any);
std::shared_ptr<MKLDNNDescriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
std::shared_ptr<DnnlDesriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr);
if (static_cast<bool>(anyDeconvItpd)) {
auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get());
@ -571,7 +574,7 @@ void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor>
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
}
MKLDNNNode::AttrPtr MKLDNNDeconvolutionNode::makePrimitiveAttr(const VectorDims &dims) {
Node::AttrPtr Deconvolution::makePrimitiveAttr(const VectorDims &dims) {
auto attr = std::make_shared<mkldnn::primitive_attr>(mkldnn::primitive_attr());
setPostOps(*attr, dims);
@ -579,11 +582,11 @@ MKLDNNNode::AttrPtr MKLDNNDeconvolutionNode::makePrimitiveAttr(const VectorDims
return attr;
}
MKLDNNNode::AttrPtr MKLDNNDeconvolutionNode::initPrimitiveAttr() {
Node::AttrPtr Deconvolution::initPrimitiveAttr() {
return attr;
}
void MKLDNNDeconvolutionNode::prepareParams() {
void Deconvolution::prepareParams() {
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
@ -621,7 +624,7 @@ void MKLDNNDeconvolutionNode::prepareParams() {
mkldnn::memory::desc wgh_candidate;
if (isInt8) {
if (internalBlobMemory.empty()) {
wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
wgh_candidate = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
} else {
wgh_candidate = internalBlobMemory.front()->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
}
@ -629,12 +632,12 @@ void MKLDNNDeconvolutionNode::prepareParams() {
wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
}
std::shared_ptr<MKLDNNDescriptor> desc;
std::shared_ptr<DnnlDesriptor> desc;
if (isInt8) {
desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate);
} else {
desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate,
selected_pd->getImplementationType() == ov::intel_cpu::impl_desc_type::jit_avx512_winograd);
selected_pd->getImplementationType() == impl_desc_type::jit_avx512_winograd);
}
createDeconvPrim(desc, srcMemPtr, wghMemPtr, dstMemPtr, pAttrLocal, selected_pd->getImplementationType());
@ -648,10 +651,10 @@ void MKLDNNDeconvolutionNode::prepareParams() {
{DNNL_ARG_WEIGHTS, wghMemPtr->GetPrimitive()},
{DNNL_ARG_DIFF_SRC, dstMemPtr->GetPrimitive()}};
}
MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs);
Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs);
}
void MKLDNNDeconvolutionNode::createPrimitive() {
void Deconvolution::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
@ -659,7 +662,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
}
}
MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
Deconvolution::DefaultDeconvDescs Deconvolution::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate,
mkldnn::algorithm alg) const {
@ -688,14 +691,14 @@ MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescr
return {deconv_desc, fwd_conv_pd};
}
MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
Deconvolution::Int8DeconvDesc Deconvolution::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate) const {
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
return memory::dims(orig_dims.begin(), orig_dims.end());
};
MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc;
Deconvolution::Int8DeconvDesc deconv_desc;
deconv_desc = std::make_shared<mkldnn::deconvolution_forward::desc>(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
in_candidate, wgh_candidate, out_candidate,
convertDims(stride), convertDims(dilation),
@ -703,7 +706,7 @@ MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescripto
return deconv_desc;
}
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
void Deconvolution::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
const std::vector<MemoryDescPtr> &outputDesc) {
auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims());
auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc);
@ -722,10 +725,10 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>
return;
if (isInt8) {
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
mkldnn::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate));
} else {
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()),
mkldnn::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(getWeightDims()),
dnnlInDesc.getDataType(), memory::format_tag::any);
for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
@ -738,7 +741,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>
}
}
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
std::shared_ptr<MemoryDesc> Deconvolution::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
if (idx == 2) {
return std::make_shared<CpuBlockedMemoryDesc>(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims()));
} else if (idx > 0 && isInt8) {
@ -749,34 +752,34 @@ std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primi
auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx);
if (getInputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(desc);
return DnnlExtensionUtils::makeDescriptor(desc);
}
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
std::shared_ptr<MemoryDesc> Deconvolution::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx);
if (getOutputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
return DnnlExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(desc);
return DnnlExtensionUtils::makeDescriptor(desc);
}
InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const {
InferenceEngine::Precision Deconvolution::getRuntimePrecision() const {
std::vector<InferenceEngine::Precision> inputPrecisions;
// Don't take bias precision into account
size_t inputsNumLimit = 2;
for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) {
auto parentEdge = getParentEdgeAt(i);
if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) {
inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType())));
if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) {
inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType())));
}
}
return getMaxPrecision(inputPrecisions);
}
MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
Deconvolution::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
@ -796,7 +799,7 @@ MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkld
}
}
MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
Deconvolution::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
@ -816,7 +819,7 @@ MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::de
}
}
std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
std::vector<int32_t> Deconvolution::readOutputSpatialDims() const {
if (getParentEdges().size() < 3) {
IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size();
}
@ -833,4 +836,6 @@ std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
return outSpDims;
}
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -13,14 +13,15 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNDeconvolutionNode : public MKLDNNNode {
class Deconvolution : public Node {
using DefaultDeconvDescs = std::pair<std::shared_ptr<mkldnn::convolution_backward_data::desc>,
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>>;
using Int8DeconvDesc = std::shared_ptr<mkldnn::deconvolution_forward::desc>;
public:
MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
Deconvolution(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
@ -33,7 +34,7 @@ public:
return false;
}
size_t descInputNumbers(MKLDNNDescriptor desc) override {
size_t descInputNumbers(DnnlDesriptor desc) override {
return static_cast<size_t>(getParentEdges().size());
}
@ -43,7 +44,7 @@ public:
InferenceEngine::Precision getRuntimePrecision() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
bool canFuse(const MKLDNNNodePtr& node) const override;
bool canFuse(const NodePtr& node) const override;
const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
const std::vector<ptrdiff_t>& getStride() const { return stride; }
@ -121,18 +122,18 @@ private:
Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate) const;
std::shared_ptr<MKLDNNDescriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
bool isWinograd) const;
std::shared_ptr<MKLDNNDescriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
std::shared_ptr<DnnlDesriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc) const;
const mkldnn::memory::desc& dstDesc,
bool isWinograd) const;
std::shared_ptr<DnnlDesriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc) const;
void createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
MKLDNNMemoryPtr srcMemPtr,
MKLDNNMemoryPtr wghMemPtr,
MKLDNNMemoryPtr dstMemPtr,
void createDeconvPrim(std::shared_ptr<DnnlDesriptor> desc,
MemoryPtr srcMemPtr,
MemoryPtr wghMemPtr,
MemoryPtr dstMemPtr,
AttrPtr attr,
impl_desc_type selectedImpl);
@ -142,5 +143,6 @@ private:
InferenceEngine::Blob::Ptr createWeiBlobAsIO(InferenceEngine::SizeVector dims);
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -7,13 +7,12 @@
#include <vector>
#include <math.h>
#include <mkldnn_types.h>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include <cpu/x64/jit_generator.hpp>
#include "ie_parallel.hpp"
#include "memory_desc/dnnl_blocked_memory_desc.h"
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace mkldnn;
using namespace mkldnn::impl;
@ -21,13 +20,17 @@ using namespace mkldnn::impl::cpu::x64;
using namespace mkldnn::impl::utils;
using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {
#define GET_OFF(field) offsetof(jit_def_conv_call_args, field)
template <cpu_isa_t isa>
struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_def_conv_kernel_f32)
constexpr static int sampledPointsPerPixel = MKLDNNDeformableConvolutionNode::sampledPointsPerPixel;
constexpr static int sampledPointsPerPixel = DeformableConvolution::sampledPointsPerPixel;
explicit jit_uni_def_conv_kernel_f32(const jit_def_conv_params& jcp) : jit_uni_def_conv_kernel(jcp), jit_generator() {}
@ -665,7 +668,7 @@ private:
}
};
bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool DeformableConvolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (!one_of(op->get_type_info(),
ngraph::op::v1::DeformableConvolution::get_type_info_static(),
@ -679,8 +682,8 @@ bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr
return true;
}
MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
DeformableConvolution::DeformableConvolution(const std::shared_ptr<ngraph::Node>& op,
const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -716,7 +719,7 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar
}
}
void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
void DeformableConvolution::getSupportedDescriptors() {
if (getParentEdges().size() != 3 && getParentEdges().size() != 4)
IE_THROW() << errorPrefix << " has incorrect number of input edges";
if (getChildEdges().empty())
@ -735,7 +738,7 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
}
}
void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
void DeformableConvolution::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -820,7 +823,7 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNDeformableConvolutionNode::DefConvExecutor::prepareSamplingWeights(
void DeformableConvolution::DefConvExecutor::prepareSamplingWeights(
const float* offsets, const float* modulation, bool enforceRef) {
const int MB = jcp.mb;
const int OH = jcp.oh;
@ -943,7 +946,7 @@ void MKLDNNDeformableConvolutionNode::DefConvExecutor::prepareSamplingWeights(
});
}
MKLDNNDeformableConvolutionNode::DefConvExecutor::DefConvExecutor(const DefConvAttr &defConvAttr,
DeformableConvolution::DefConvExecutor::DefConvExecutor(const DefConvAttr &defConvAttr,
const std::vector<std::shared_ptr<BlockedMemoryDesc>> &descVector) {
if (descVector.size() != 4 && descVector.size() != 5) {
IE_THROW() << "Deformable Convolution executor got incorrect desc's count (" << descVector.size() << ")";
@ -1021,7 +1024,7 @@ MKLDNNDeformableConvolutionNode::DefConvExecutor::DefConvExecutor(const DefConvA
jcp.nthr = dnnl_get_max_threads();
}
MKLDNNDeformableConvolutionNode::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr &defConvAttr,
DeformableConvolution::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr &defConvAttr,
const std::vector<std::shared_ptr<BlockedMemoryDesc>> &descVector) :
DefConvExecutor(defConvAttr, descVector) {
if (mayiuse(cpu::x64::avx512_common)) {
@ -1040,7 +1043,7 @@ MKLDNNDeformableConvolutionNode::DefConvJitExecutor::DefConvJitExecutor(const De
}
}
void MKLDNNDeformableConvolutionNode::DefConvRefExecutor::exec(const float* src, const float* offsets,
void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const float* offsets,
const float* weights, const float* modulation, float* dst,
int *pSampledCoordsVector, float *pInterpWeightsVector) {
this->pSampledCoordsVector = pSampledCoordsVector;
@ -1099,7 +1102,7 @@ void MKLDNNDeformableConvolutionNode::DefConvRefExecutor::exec(const float* src,
});
}
void MKLDNNDeformableConvolutionNode::prepareParams() {
void DeformableConvolution::prepareParams() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr();
auto& offMemPtr = getParentEdgeAt(OFF_ID)->getMemoryPtr();
@ -1160,11 +1163,11 @@ void MKLDNNDeformableConvolutionNode::prepareParams() {
}
}
void MKLDNNDeformableConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
void DeformableConvolution::executeDynamicImpl(dnnl::stream strm) {
execute(strm);
}
void MKLDNNDeformableConvolutionNode::DefConvJitExecutor::exec(const float* src, const float* offsets,
void DeformableConvolution::DefConvJitExecutor::exec(const float* src, const float* offsets,
const float* weights, const float* modulation, float* dst,
int *pSampledCoordsVector, float *pInterpWeightsVector) {
this->pSampledCoordsVector = pSampledCoordsVector;
@ -1196,7 +1199,7 @@ void MKLDNNDeformableConvolutionNode::DefConvJitExecutor::exec(const float* src,
});
}
void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
void DeformableConvolution::execute(mkldnn::stream strm) {
const size_t inputsNumber = getOriginalInputsNumber();
auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
@ -1226,18 +1229,20 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
}
}
void MKLDNNDeformableConvolutionNode::updatePadding() {
void DeformableConvolution::updatePadding() {
if (isDynamicNode() && autoPadding) {
defConvAttr.padL = shapeInference->get_pads_begin();
}
}
bool MKLDNNDeformableConvolutionNode::created() const {
return getType() == DeformableConvolution;
bool DeformableConvolution::created() const {
return getType() == Type::DeformableConvolution;
}
InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision() const {
InferenceEngine::Precision DeformableConvolution::getRuntimePrecision() const {
return getMaxPrecision(getInputPrecisions());
}
REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -11,6 +11,7 @@
namespace ov {
namespace intel_cpu {
namespace node {
struct jit_def_conv_params {
int ndims;
@ -68,9 +69,9 @@ struct jit_uni_def_conv_kernel {
jit_def_conv_params jcp_;
};
class MKLDNNDeformableConvolutionNode : public MKLDNNNode {
class DeformableConvolution : public Node {
public:
MKLDNNDeformableConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
DeformableConvolution(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override;
@ -155,5 +156,6 @@ private:
bool autoPadding = false;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -4,7 +4,7 @@
#include "depth_to_space.h"
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include <utils/general_utils.h>
#include <cmath>
@ -17,11 +17,14 @@
#define THROW_ERROR IE_THROW() << "DepthToSpace layer with name '" << getName() << "' "
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using namespace mkldnn::impl;
size_t MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::hash() const {
namespace ov {
namespace intel_cpu {
namespace node {
size_t DepthToSpace::DepthToSpaceAttrs::hash() const {
using namespace dnnl::impl;
using namespace dnnl::impl::primitive_hashing;
@ -37,7 +40,7 @@ size_t MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::hash() const {
return seed;
}
bool MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::operator==(const DepthToSpaceAttrs& rhs) const {
bool DepthToSpace::DepthToSpaceAttrs::operator==(const DepthToSpaceAttrs& rhs) const {
bool result = layoutType == rhs.layoutType && mode == rhs.mode &&
blockSize == rhs.blockSize && blockStep == rhs.blockStep &&
dataSize == rhs.dataSize && nSpatialDims == rhs.nSpatialDims &&
@ -46,7 +49,7 @@ bool MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::operator==(const DepthToSpaceAtt
return result;
}
bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool DepthToSpace::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
auto depthToSpace = ov::as_type_ptr<const ngraph::opset1::DepthToSpace>(op);
if (!depthToSpace) {
@ -64,8 +67,8 @@ bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<const ng
return true;
}
MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
DepthToSpace::DepthToSpace(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache)
: Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -104,9 +107,9 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Nod
attrs.blockStep = static_cast<size_t>(std::pow(attrs.blockSize, nSpatialDims));
}
void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {}
void DepthToSpace::getSupportedDescriptors() {}
void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
void DepthToSpace::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -158,7 +161,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNDepthToSpaceNode::createPrimitive() {
void DepthToSpace::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->isAllocated())
@ -182,7 +185,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
}
}
void MKLDNNDepthToSpaceNode::prepareParams() {
void DepthToSpace::prepareParams() {
attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
auto builder = [](const DepthToSpaceAttrs& key) -> std::shared_ptr<DepthToSpaceExecutor> {
return std::make_shared<DepthToSpaceExecutor>(key);
@ -197,11 +200,11 @@ void MKLDNNDepthToSpaceNode::prepareParams() {
execPtr = result.first;
}
MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) {
if (!ov::intel_cpu::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp))
DepthToSpace::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) {
if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp))
IE_THROW() << "DepthToSpace executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts.";
const bool isBlocked = ov::intel_cpu::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c);
const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c);
const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc;
const size_t nDims = attrs.srcBlockedDims.size();
const size_t reshapedRank = nDims + attrs.nSpatialDims + static_cast<int>(isBlocked && attrs.mode == Mode::DEPTH_FIRST);
@ -286,7 +289,7 @@ MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthTo
permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params));
}
void MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) {
void DepthToSpace::DepthToSpaceExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) {
if (!permuteKernel)
IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled.";
@ -296,7 +299,7 @@ void MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::exec(MKLDNNMemoryPtr& srcMemP
permuteKernel->execute(srcData, dstData, MB);
}
void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) {
void DepthToSpace::execute(mkldnn::stream strm) {
if (!execPtr) {
THROW_ERROR << "doesn't have a compiled executor.";
}
@ -305,11 +308,14 @@ void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) {
execPtr->exec(getParentEdgeAt(0)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr(), MB);
}
void MKLDNNDepthToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
void DepthToSpace::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNDepthToSpaceNode::created() const {
return getType() == DepthToSpace;
bool DepthToSpace::created() const {
return getType() == Type::DepthToSpace;
}
REG_MKLDNN_PRIM_FOR(MKLDNNDepthToSpaceNode, DepthToSpace);
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -11,10 +11,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNDepthToSpaceNode : public MKLDNNNode {
class DepthToSpace : public Node {
public:
MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
DepthToSpace(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override;
@ -45,7 +46,7 @@ private:
DepthToSpaceAttrs attrs;
struct DepthToSpaceExecutor {
DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs);
void exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB);
void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB);
~DepthToSpaceExecutor() = default;
private:
@ -55,5 +56,6 @@ private:
executorPtr execPtr = nullptr;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -12,9 +12,11 @@
#include "detection_output.h"
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
namespace node {
namespace {
template <typename T>
@ -31,7 +33,7 @@ bool SortScorePairDescend<std::pair<int, int>>(const std::pair<float, std::pair<
} // namespace
bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
bool DetectionOutput::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
const auto doOp = ov::as_type_ptr<const ov::op::v8::DetectionOutput>(op);
if (!doOp) {
@ -49,8 +51,8 @@ bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr<const
return true;
}
MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
DetectionOutput::DetectionOutput(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
WeightsSharing::Ptr &cache) : Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -90,7 +92,7 @@ MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ngrap
CodeType::CENTER_SIZE : CodeType::CORNER);
}
void MKLDNNDetectionOutputNode::prepareParams() {
void DetectionOutput::prepareParams() {
const auto& idPriorDims = getParentEdgeAt(ID_PRIOR)->getMemory().GetShape().getStaticDims();
const auto &idConfDims = getParentEdgeAt(ID_CONF)->getMemory().GetShape().getStaticDims();
priorsNum = static_cast<int>(idPriorDims.back() / priorSize);
@ -136,7 +138,7 @@ void MKLDNNDetectionOutputNode::prepareParams() {
numPriorsActual.resize(imgNum);
}
void MKLDNNDetectionOutputNode::initSupportedPrimitiveDescriptors() {
void DetectionOutput::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -162,11 +164,11 @@ struct ConfidenceComparatorDO {
const float* confData;
};
void MKLDNNDetectionOutputNode::executeDynamicImpl(mkldnn::stream strm) {
void DetectionOutput::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
void DetectionOutput::execute(mkldnn::stream strm) {
float *dstData = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
const float *locData = reinterpret_cast<const float *>(getParentEdgeAt(ID_LOC)->getMemoryPtr()->GetPtr());
@ -341,7 +343,7 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
generateOutput(reorderedConfData, indicesData, detectionsData, decodedBboxesData, dstData);
}
inline void MKLDNNDetectionOutputNode::getActualPriorNum(const float *priorData, int* numPriorsActual, int n) {
inline void DetectionOutput::getActualPriorNum(const float *priorData, int* numPriorsActual, int n) {
numPriorsActual[n] = priorsNum;
if (!normalized) {
int num = 0;
@ -355,7 +357,7 @@ inline void MKLDNNDetectionOutputNode::getActualPriorNum(const float *priorData,
}
}
inline void MKLDNNDetectionOutputNode::confReorderDense(const float *confData, const float *ARMConfData, float *reorderedConfData) {
inline void DetectionOutput::confReorderDense(const float *confData, const float *ARMConfData, float *reorderedConfData) {
if (withAddBoxPred) {
parallel_for2d(imgNum, priorsNum, [&](size_t n, size_t p) {
if (ARMConfData[n * priorsNum * 2 + p * 2 + 1] < objScore) {
@ -380,7 +382,7 @@ inline void MKLDNNDetectionOutputNode::confReorderDense(const float *confData, c
});
}
inline void MKLDNNDetectionOutputNode::confFilterCF(float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) {
inline void DetectionOutput::confFilterCF(float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) {
parallel_for2d(imgNum, classesNum, [&](size_t n, size_t c) {
// in: reorderedConf
// out: pindices count
@ -409,7 +411,7 @@ inline void MKLDNNDetectionOutputNode::confFilterCF(float* reorderedConfData, in
// MX filter is per image filter, max output is prior num(select max for all class within this prior)
// NMS is per class, keep topk is per image, final output is per class
inline void MKLDNNDetectionOutputNode::confFilterMX(const float* confData, const float* ARMConfData, float* reorderedConfData,
inline void DetectionOutput::confFilterMX(const float* confData, const float* ARMConfData, float* reorderedConfData,
int* indicesData, int* indicesBufData, int* detectionsData) {
for (int n = 0; n < imgNum; ++n) {
int offB = n * priorsNum * classesNum;
@ -471,7 +473,7 @@ inline void MKLDNNDetectionOutputNode::confFilterMX(const float* confData, const
}
}
inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityCF(const float* confData, const float* ARMConfData, float* reorderedConfData,
inline void DetectionOutput::confReorderAndFilterSparsityCF(const float* confData, const float* ARMConfData, float* reorderedConfData,
int* indicesData, int* indicesBufData, int* detectionsData) {
int* reorderedConfDataIndices = reinterpret_cast<int*>(reorderedConfData);
for (int n = 0; n < imgNum; ++n) {
@ -554,7 +556,7 @@ inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityCF(const floa
}
}
inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityMX(const float* confData, const float* ARMConfData, float* reorderedConfData,
inline void DetectionOutput::confReorderAndFilterSparsityMX(const float* confData, const float* ARMConfData, float* reorderedConfData,
int* indicesData, int* indicesBufData, int* detectionsData) {
for (int n = 0; n < imgNum; ++n) {
int off = n * priorsNum * classesNum;
@ -614,7 +616,7 @@ inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityMX(const floa
}
}
inline void MKLDNNDetectionOutputNode::decodeBBoxes(const float *priorData,
inline void DetectionOutput::decodeBBoxes(const float *priorData,
const float *locData,
const float *varianceData,
float *decodedBboxes,
@ -717,7 +719,7 @@ inline void MKLDNNDetectionOutputNode::decodeBBoxes(const float *priorData,
});
}
inline void MKLDNNDetectionOutputNode::topk(const int *indicesIn, int *indicesOut, const float *conf, int n, int k) {
inline void DetectionOutput::topk(const int *indicesIn, int *indicesOut, const float *conf, int n, int k) {
std::partial_sort_copy(indicesIn, indicesIn + n,
indicesOut, indicesOut + k,
ConfidenceComparatorDO(conf));
@ -760,7 +762,7 @@ static inline float JaccardOverlap(const float *decodedBbox,
return intersectSize / (bbox1Size + bbox2Size - intersectSize);
}
inline void MKLDNNDetectionOutputNode::NMSCF(int* indicesIn,
inline void DetectionOutput::NMSCF(int* indicesIn,
int& detections,
int* indicesOut,
const float* bboxes,
@ -787,7 +789,7 @@ inline void MKLDNNDetectionOutputNode::NMSCF(int* indicesIn,
}
}
inline void MKLDNNDetectionOutputNode::NMSMX(int* indicesIn,
inline void DetectionOutput::NMSMX(int* indicesIn,
int* detections,
int* indicesOut,
const float* bboxes,
@ -826,7 +828,7 @@ inline void MKLDNNDetectionOutputNode::NMSMX(int* indicesIn,
}
}
inline void MKLDNNDetectionOutputNode::generateOutput(float* reorderedConfData, int* indicesData, int* detectionsData, float* decodedBboxesData,
inline void DetectionOutput::generateOutput(float* reorderedConfData, int* indicesData, int* detectionsData, float* decodedBboxesData,
float* dstData) {
const auto& outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
const int numResults = outDims[2];
@ -895,8 +897,10 @@ inline void MKLDNNDetectionOutputNode::generateOutput(float* reorderedConfData,
}
}
bool MKLDNNDetectionOutputNode::created() const {
return getType() == DetectionOutput;
bool DetectionOutput::created() const {
return getType() == Type::DetectionOutput;
}
REG_MKLDNN_PRIM_FOR(MKLDNNDetectionOutputNode, DetectionOutput)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -10,10 +10,11 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNDetectionOutputNode : public MKLDNNNode {
class DetectionOutput : public Node {
public:
MKLDNNDetectionOutputNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
DetectionOutput(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
@ -109,5 +110,6 @@ private:
std::string errorPrefix;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -5,7 +5,7 @@
#include <string>
#include <vector>
#include <cmath>
#include <extension_utils.h>
#include <dnnl_extension_utils.h>
#include "dft.h"
#include "ie_parallel.hpp"
@ -16,10 +16,13 @@
#include <ngraph/opsets/opset7.hpp>
using namespace mkldnn;
using namespace ov::intel_cpu;
using namespace InferenceEngine;
bool MKLDNNDFTNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
namespace ov {
namespace intel_cpu {
namespace node {
bool DFT::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
@ -38,8 +41,8 @@ bool MKLDNNDFTNode::isSupportedOperation(const std::shared_ptr<const ngraph::Nod
return true;
}
MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(op, eng, cache) {
DFT::DFT(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) :
Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -74,9 +77,9 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr<ngraph::Node>& op, const mkld
inverse = std::dynamic_pointer_cast<ngraph::opset7::DFT>(op) == nullptr;
}
void MKLDNNDFTNode::getSupportedDescriptors() {}
void DFT::getSupportedDescriptors() {}
void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() {
void DFT::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
@ -226,7 +229,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector<size_t
} // namespace
void MKLDNNDFTNode::execute(mkldnn::stream strm) {
void DFT::execute(mkldnn::stream strm) {
auto axesEdge = getParentEdgeAt(AXES_INDEX);
const auto* axesStartPtr = reinterpret_cast<const int32_t*>(axesEdge->getMemoryPtr()->GetPtr());
axes = std::vector<int32_t>(axesStartPtr, axesStartPtr + axesEdge->getMemory().getStaticDims()[0]);
@ -273,7 +276,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) {
}
}
void MKLDNNDFTNode::dftNd(float* output, const std::vector<size_t>& outputStrides) const {
void DFT::dftNd(float* output, const std::vector<size_t>& outputStrides) const {
const std::vector<size_t> iterationRange(outputShape.begin(), outputShape.end() - 1);
const size_t lastDimIndex = iterationRange.size() - 1;
for (size_t axisIndex = 0; axisIndex < axes.size(); ++axisIndex) {
@ -307,7 +310,7 @@ void MKLDNNDFTNode::dftNd(float* output, const std::vector<size_t>& outputStride
}
/* Cooley Tukey implementation of FFT */
void MKLDNNDFTNode::fft(float* data, int64_t dataLength, bool parallelize) const {
void DFT::fft(float* data, int64_t dataLength, bool parallelize) const {
static int cacheSizeL3 = utils::get_cache_size(3, false);
static int elementsPerCacheLine = cacheSizeL3 / sizeof(float);
std::vector<float> bufferVector(dataLength * 2, 0);
@ -368,7 +371,7 @@ void MKLDNNDFTNode::fft(float* data, int64_t dataLength, bool parallelize) const
}
}
void MKLDNNDFTNode::naiveDFT(float* data, size_t dataLength) const {
void DFT::naiveDFT(float* data, size_t dataLength) const {
std::vector<float> outputBuffer(dataLength);
const size_t nComplex = dataLength / 2;
const auto& twiddles = twiddlesMap.find(nComplex)->second;
@ -401,7 +404,7 @@ void MKLDNNDFTNode::naiveDFT(float* data, size_t dataLength) const {
cpu_memcpy(data, outputBuffer.data(), dataLength * sizeof(float));
}
std::vector<std::pair<float, float>> MKLDNNDFTNode::generateTwiddles(size_t n_complex) const {
std::vector<std::pair<float, float>> DFT::generateTwiddles(size_t n_complex) const {
std::vector<std::pair<float, float>> twiddles(n_complex * n_complex);
parallel_for(n_complex, [&](const size_t k) {
for (size_t n = 0; n < n_complex; ++n) {
@ -414,11 +417,12 @@ std::vector<std::pair<float, float>> MKLDNNDFTNode::generateTwiddles(size_t n_co
return twiddles;
}
bool MKLDNNDFTNode::created() const {
return getType() == DFT;
bool DFT::created() const {
return getType() == Type::DFT;
}
void MKLDNNDFTNode::createPrimitive() {}
void DFT::createPrimitive() {}
REG_MKLDNN_PRIM_FOR(MKLDNNDFTNode, DFT)
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -10,11 +10,12 @@
namespace ov {
namespace intel_cpu {
namespace node {
class MKLDNNDFTNode : public MKLDNNNode {
class DFT : public Node {
public:
MKLDNNDFTNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNDFTNode() override = default;
DFT(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache);
~DFT() override = default;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
@ -43,5 +44,6 @@ private:
bool inverse;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More