diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp index 3c5aa2167cb..f387918058c 100644 --- a/src/plugins/intel_cpu/src/exec_network.cpp +++ b/src/plugins/intel_cpu/src/exec_network.cpp @@ -5,6 +5,7 @@ #include #include #include "exec_network.h" +#include #include "async_infer_request.h" #include "infer_request.h" @@ -118,7 +119,6 @@ ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network, } else { _callbackExecutor = _taskExecutor; } - int streams = std::max(1, _cfg.streamExecutorConfig._streams); std::vector tasks; tasks.resize(streams); _graphs.resize(streams); @@ -177,12 +177,21 @@ ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const { std::exception_ptr exception; auto makeGraph = [&] { try { + GraphContext::Ptr ctx; { std::lock_guard lock{*_mutex.get()}; - graphLock._graph.setConfig(_cfg); + // disable weights caching if graph was created only once + auto weightsCache = + _cfg.streamExecutorConfig._streams != 1 ? _numaNodesWeights[numaNodeId] : nullptr; + + auto isQuantizedFlag = + (_cfg.lpTransformsMode == Config::On) && + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(_network.getFunction()); + + ctx = std::make_shared(_cfg, extensionManager, weightsCache, _mutex, isQuantizedFlag); } - graphLock._graph.CreateGraph(_network, extensionManager, _numaNodesWeights[numaNodeId], _mutex); - } catch(...) { + graphLock._graph.CreateGraph(_network, ctx); + } catch (...) { exception = std::current_exception(); } }; @@ -198,19 +207,6 @@ ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const { return graphLock; } -void ExecNetwork::setProperty(const std::map &properties) { - { - std::lock_guard lock{*_mutex.get()}; - _cfg.readProperties(properties); - } - for (auto& g : _graphs) { - auto graphLock = GraphGuard::Lock(g); - if (graphLock._graph.IsReady()) { - graphLock._graph.setProperty(properties); - } - } -} - InferenceEngine::IInferRequestInternal::Ptr ExecNetwork::CreateInferRequest() { return CreateAsyncInferRequestFromSync(); } @@ -235,7 +231,7 @@ Parameter ExecNetwork::GetConfigLegacy(const std::string &name) const { IE_THROW() << "No graph was found"; /* legacy implementation return all the parameters which is actually not correct * since they are not reconfigurable. Fixed for new API */ - Config engConfig = GetGraph()._graph.getProperty(); + Config engConfig = GetGraph()._graph.getConfig(); auto option = engConfig._config.find(name); if (option != engConfig._config.end()) { return option->second; @@ -268,12 +264,12 @@ InferenceEngine::Parameter ExecNetwork::GetMetricLegacy(const std::string &name, IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { std::vector configKeys; - for (auto && key : graph.getProperty()._config) { + for (auto && key : graph.getConfig()._config) { configKeys.push_back(key.first); } IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) { - Config engConfig = graph.getProperty(); + Config engConfig = graph.getConfig(); auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)); IE_ASSERT(option != engConfig._config.end()); auto streams = std::stoi(option->second); @@ -290,7 +286,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const // @todo Can't we just use local copy (_cfg) instead? auto graphLock = GetGraph(); const auto& graph = graphLock._graph; - const auto& config = graph.getProperty(); + const auto& config = graph.getConfig(); if (isLegacyAPI()) { return GetMetricLegacy(name, graph); diff --git a/src/plugins/intel_cpu/src/exec_network.h b/src/plugins/intel_cpu/src/exec_network.h index 8b05b95e1a1..8b150958480 100644 --- a/src/plugins/intel_cpu/src/exec_network.h +++ b/src/plugins/intel_cpu/src/exec_network.h @@ -9,6 +9,7 @@ #include "graph.h" #include "extension_mngr.h" +#include "graph_context.h" #include #include @@ -38,8 +39,6 @@ public: const ExtensionManager::Ptr &extMgr, const std::shared_ptr& plugin); - void setProperty(const std::map &properties); - InferenceEngine::Parameter GetConfig(const std::string &name) const override; InferenceEngine::Parameter GetMetric(const std::string &name) const override; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index faed0446379..16d83247246 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -67,27 +67,20 @@ namespace intel_cpu { typedef std::unordered_set edge_cluster_t; typedef std::vector edge_clusters_t; -dnnl::engine Graph::eng(dnnl::engine::kind::cpu, 0); - Graph::~Graph() { CPU_DEBUG_CAP_ENABLE(summary_perf(*this)); } template -void Graph::CreateGraph(NET &net, const ExtensionManager::Ptr& extMgr, - WeightsSharing::Ptr &w_cache, const std::shared_ptr& mutex) { +void Graph::CreateGraph(NET &net, const GraphContext::CPtr ctx) { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "CreateGraph"); if (IsReady()) ForgetGraphData(); - // disable weights caching if graph was created only once - weightsCache = config.streamExecutorConfig._streams != 1 ? w_cache : nullptr; - rtParamsCache = std::make_shared(config.rtCacheCapacity); - sharedMutex = mutex; - rtScratchPad = std::make_shared(getEngine()); + context = ctx; - Replicate(net, extMgr); + Replicate(net); InitGraph(); @@ -96,15 +89,12 @@ void Graph::CreateGraph(NET &net, const ExtensionManager::Ptr& extMgr, void Graph::CreateGraph(const std::vector &graphNodes, const std::vector &graphEdges, - WeightsSharing::Ptr &w_cache, + const GraphContext::CPtr ctx, std::string name) { if (IsReady()) ForgetGraphData(); - // disable weights caching if graph was created only once - weightsCache = config.streamExecutorConfig._streams != 1 ? w_cache : nullptr; - rtParamsCache = std::make_shared(config.rtCacheCapacity); - rtScratchPad = std::make_shared(getEngine()); + context = ctx; this->_name = std::move(name); this->reuse_io_tensors = false; @@ -125,18 +115,13 @@ void Graph::CreateGraph(const std::vector &graphNodes, CPU_DEBUG_CAP_ENABLE(serialize(*this)); } -template void Graph::CreateGraph(const std::shared_ptr&, - const ExtensionManager::Ptr&, WeightsSharing::Ptr&, const std::shared_ptr& mutex); -template void Graph::CreateGraph(const CNNNetwork&, - const ExtensionManager::Ptr&, WeightsSharing::Ptr&, const std::shared_ptr& mutex); +template void Graph::CreateGraph(const std::shared_ptr&, const GraphContext::CPtr); +template void Graph::CreateGraph(const CNNNetwork&, const GraphContext::CPtr); -void Graph::Replicate(const std::shared_ptr &subgraph, const ExtensionManager::Ptr& extMgr) { +void Graph::Replicate(const std::shared_ptr &subgraph) { this->_name = "subgraph"; this->reuse_io_tensors = false; - isQuantizedFlag = (config.lpTransformsMode == Config::On) && - ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(subgraph); - // Map data object onto producer node std::map, NodePtr> op2node; @@ -156,14 +141,7 @@ void Graph::Replicate(const std::shared_ptr &subgraph, const Ex }; for (const auto op : subgraph->get_ordered_ops()) { - const NodePtr node {Node::factory().create(op, getEngine(), extMgr, weightsCache)}; - if (isQuantized()) { - node->setQuantizedGraphFlag(true); - } - - node->setRuntimeCache(rtParamsCache); - node->setSharedMutex(sharedMutex); - node->setRuntimeScratchPad(rtScratchPad); + const NodePtr node {Node::factory().create(op, context)}; graphNodes.push_back(node); @@ -208,15 +186,18 @@ void Graph::Replicate(const std::shared_ptr &subgraph, const Ex const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); const NodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), - nodeName, "Result", getEngine(), weightsCache); + nodeName, "Result", context); EdgePtr edge(new Edge(parentNode, outNode, port, 0)); outNode->addEdge(edge); graphEdges.push_back(edge); graphNodes.push_back(outNode); } + + if (getConfig().enforceBF16) + EnforceBF16(); } -void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& extMgr) { +void Graph::Replicate(const CNNNetwork &network) { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "CNNNetwork"); InputsDataMap inputsInfo = network.getInputsInfo(); @@ -228,12 +209,12 @@ void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& ex // we perform model cloning and reshaping on Replicate stage to preserve input/output information // it help to perform a graph compilation like in static case // and handle dynamic batch case in inference stage with minimal code changes - if (config.isNewApi && config.batchLimit > 0) { + if (getConfig().isNewApi && getConfig().batchLimit > 0) { auto upperBoundModel = ngraph::clone_function(*network.getFunction()); std::map, ov::PartialShape> newInShape; for (const auto& in : upperBoundModel->get_parameters()) { auto newShape = in->get_output_partial_shape(0); - newShape[0] = config.batchLimit; + newShape[0] = getConfig().batchLimit; newInShape[in] = newShape; } upperBoundModel->reshape(newInShape); @@ -247,9 +228,6 @@ void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& ex IE_THROW() << "Function pointer inside CNNNetwork is nullptr"; } - isQuantizedFlag = (config.lpTransformsMode == Config::On) && - ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); - auto orderedOps = func->get_ordered_ops(); // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node @@ -271,14 +249,7 @@ void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& ex // Replicate All Nodes in topological order for (const auto& op : orderedOps) { - const NodePtr node(Node::factory().create(op, getEngine(), extMgr, weightsCache)); - if (isQuantized()) { - node->setQuantizedGraphFlag(true); - } - - node->setRuntimeCache(rtParamsCache); - node->setSharedMutex(sharedMutex); - node->setRuntimeScratchPad(rtScratchPad); + const NodePtr node(Node::factory().create(op, context)); graphNodes.push_back(node); @@ -331,19 +302,16 @@ void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& ex const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); const NodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), - nodeName, "Result", getEngine(), weightsCache); + nodeName, "Result", context); EdgePtr edge(new Edge(parentNode, outNode, port, 0)); outNode->addEdge(edge); graphEdges.push_back(edge); graphNodes.push_back(outNode); } - if (config.enforceBF16) + if (getConfig().enforceBF16) EnforceBF16(); - if (config.fcSparseWeiDecompressionRate < 1.0f) - setMinSparseRate(config.fcSparseWeiDecompressionRate); - auto hasSubgraphConsumers = [] (const NodePtr& node) -> bool { const auto & childEdges = node->getChildEdges(); return std::any_of(childEdges.begin(), childEdges.end(), @@ -469,6 +437,7 @@ void Graph::InitDescriptors() { if (inputNode) inputNode->withMeanImage(); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.getSupportedDescriptors); node->getSupportedDescriptors(); @@ -524,7 +493,7 @@ void Graph::ExtractConstantAndExecutableNodes() { void Graph::ExecuteConstantNodesOnly() const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExecuteConstantNodesOnly"); - dnnl::stream stream(eng); + dnnl::stream stream(getEngine()); using shared_memory_ptr = WeightsSharing::SharedMemory::Ptr; @@ -537,7 +506,7 @@ void Graph::ExecuteConstantNodesOnly() const { auto edgePtr = node->getChildEdgeAt(i); if (edgePtr) { if (edgePtr->isUseExternalMemory()) { - auto ptr = weightsCache->get(edgePtr->name()); + auto ptr = context->getWeightsCache()->get(edgePtr->name()); outputs.emplace_back(ptr); if (!ptr->isValid()) hasExternalInvalidEdges = true; @@ -551,7 +520,7 @@ void Graph::ExecuteConstantNodesOnly() const { }; for (const auto &node : constantGraphNodes) { - if (weightsCache) { + if (context->getWeightsCache()) { auto sharedOutputs = acquireSharedOutputs(node); if (std::get<0>(sharedOutputs) || std::get<1>(sharedOutputs)) { @@ -636,7 +605,7 @@ void Graph::InitEdges() { inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name(); auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), - convertName, this->getEngine(), this->weightsCache); + convertName, context); convertNode->setDescs(inDesc, outDesc); InsertNode(edge, convertNode, true); @@ -720,7 +689,7 @@ void Graph::AllocateWithReuse() { auto constNode = std::static_pointer_cast(edge->getParent()); edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); } else { - edge->externalAllocate(weightsCache); + edge->externalAllocate(context->getWeightsCache()); } erase = true; } @@ -790,7 +759,7 @@ void Graph::AllocateWithReuse() { MemorySolver staticMemSolver(definedBoxes); size_t total_size = static_cast(staticMemSolver.solve()) * alignment; - memWorkspace = std::make_shared(eng); + memWorkspace = std::make_shared(getEngine()); memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); if (edge_clusters.empty()) @@ -935,15 +904,15 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob:: if (ext_data_ptr != inter_data_ptr) { auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); - Memory ext_mem(eng); + Memory ext_mem(getEngine()); ext_mem.Create(ext_tdesc, ext_data_ptr, false); // branch for handling dynamic batch feature in new API - if (getProperty().isNewApi && getProperty().batchLimit > 0 && ext_mem.getStaticDims()[0] != childEdge->getMemory().getStaticDims()[0]) { + if (getConfig().isNewApi && getConfig().batchLimit > 0 && ext_mem.getStaticDims()[0] != childEdge->getMemory().getStaticDims()[0]) { auto newDims = childEdge->getMemory().getStaticDims(); newDims[0] = ext_mem.getStaticDims()[0]; - Memory tmpMem(eng); + Memory tmpMem(getEngine()); auto newDesc = childEdge->getMemory().getDesc().cloneWithNewDims(newDims, true); tmpMem.Create(newDesc, childEdge->getMemory().GetData(), false); @@ -1006,7 +975,7 @@ void Graph::PullOutputData(BlobMap &out) { if (expectedDesc.getLayout() == InferenceEngine::Layout::BLOCKED) { expectedDesc = TensorDesc(expectedDesc.getPrecision(), expectedDesc.getLayout()); } - if (getProperty().isNewApi && getProperty().batchLimit > 0) { + if (getConfig().isNewApi && getConfig().batchLimit > 0) { outDims[0] = node->batchToProcess(); } out[name]->setShape(outDims); @@ -1020,7 +989,7 @@ void Graph::PullOutputData(BlobMap &out) { auto srcPrec = actualDesc.getPrecision(); auto dstPrec = expectedDesc.getPrecision(); - if ((getProperty().isNewApi && !getProperty().batchLimit) && srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize()) + if ((getConfig().isNewApi && !getConfig().batchLimit) && srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize()) IE_THROW() << "Output blob byte size is not equal network output byte size (" << ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ")."; @@ -1036,15 +1005,15 @@ void Graph::PullOutputData(BlobMap &out) { auto outBlobDesc = expectedDesc.getLayout() == InferenceEngine::Layout::ANY ? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims())) : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); - Memory outBloMem(eng); + Memory outBloMem(getEngine()); outBloMem.Create(outBlobDesc, ext_blob_ptr, false); // branch for handling dynamic batch feature in new API - if (getProperty().isNewApi && getProperty().batchLimit > 0 && outBloMem.getStaticDims()[0] != intr_blob.getStaticDims()[0]) { + if (getConfig().isNewApi && getConfig().batchLimit > 0 && outBloMem.getStaticDims()[0] != intr_blob.getStaticDims()[0]) { auto newDims = intr_blob.getStaticDims(); newDims[0] = outBloMem.getStaticDims()[0]; - Memory tmpMem(eng); + Memory tmpMem(getEngine()); auto newDesc = intr_blob.getDesc().cloneWithNewDims(newDims, true); tmpMem.Create(newDesc, intr_blob.GetData(), false); @@ -1056,8 +1025,8 @@ void Graph::PullOutputData(BlobMap &out) { size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount(); // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT??? // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm - if (getProperty().batchLimit) { - if (node->isDynamicNode() && !getProperty().isNewApi) { + if (getConfig().batchLimit) { + if (node->isDynamicNode() && !getConfig().isNewApi) { IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape"; } int MB_to_process = node->batchToProcess(); @@ -1070,11 +1039,11 @@ void Graph::PullOutputData(BlobMap &out) { } void Graph::InferStatic(InferRequestBase* request) { - dnnl::stream stream(eng); + dnnl::stream stream(getEngine()); for (const auto& node : executableGraphNodes) { - VERBOSE(node, config.debugCaps.verbose); - PERF(node, config.collectPerfCounters); + VERBOSE(node, getConfig().debugCaps.verbose); + PERF(node, getConfig().collectPerfCounters); if (request) request->ThrowIfCanceled(); @@ -1083,7 +1052,7 @@ void Graph::InferStatic(InferRequestBase* request) { } void Graph::InferDynamic(InferRequestBase* request) { - dnnl::stream stream(eng); + dnnl::stream stream(getEngine()); std::set syncIndsWorkSet; for (const auto& nodeIndx : syncNodesInds) { @@ -1160,8 +1129,8 @@ void Graph::InferDynamic(InferRequestBase* request) { updateNodes(stopIndx); for (; inferCounter < stopIndx; ++inferCounter) { auto& node = executableGraphNodes[inferCounter]; - VERBOSE(node, config.debugCaps.verbose); - PERF(node, config.collectPerfCounters); + VERBOSE(node, getConfig().debugCaps.verbose); + PERF(node, getConfig().collectPerfCounters); if (request) request->ThrowIfCanceled(); @@ -1171,7 +1140,8 @@ void Graph::InferDynamic(InferRequestBase* request) { } inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const { - DUMP(node, config.debugCaps, infer_count); + DUMP(node, getConfig().debugCaps, infer_count); + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute); if (node->isDynamicNode()) { @@ -1316,22 +1286,6 @@ void Graph::GetPerfData(std::map& properties) { - config.readProperties(properties); -} - -Config Graph::getProperty() const { - return config; -} - void Graph::RemoveEdge(EdgePtr& edge) { for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) { if ((*it) == edge) { @@ -1479,7 +1433,7 @@ void Graph::RemoveDroppedEdges() { NodePtr Graph::InsertReorder(EdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc, bool isOptimized, const std::vector & src_perm) { - NodePtr newReorder(new node::Reorder(layerName, getEngine(), weightsCache)); + NodePtr newReorder(new node::Reorder(layerName, context)); auto *reorderPtr = dynamic_cast(newReorder.get()); if (reorderPtr == nullptr) { IE_THROW() << "Graph::InsertReorder: Cannot cast to Reorder"; @@ -1529,12 +1483,6 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo afterNode->getParent()->childEdges.push_back(afterNode); child->parentEdges.push_back(afterNode); - if (isQuantized()) { - node->setQuantizedGraphFlag(true); - } - node->setRuntimeCache(rtParamsCache); - node->setRuntimeScratchPad(rtScratchPad); - if (initNode) { node->getSupportedDescriptors(); node->initSupportedPrimitiveDescriptors(); @@ -1553,7 +1501,7 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo void Graph::EnforceBF16() { // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default - if (!implication(isQuantized(), config.manualEnforceBF16)) + if (!implication(context->isGraphQuantized(), getConfig().manualEnforceBF16)) return; std::function& skipNodes)> searchForNodesToSkip; @@ -1594,6 +1542,9 @@ void Graph::EnforceBF16() { continue; if (node->getType() != Type::Input && node->getType() != Type::Output) { + DEBUG_LOG("#", node->getExecIndex(), + " ", node->getName(), + " is enforced to use BF16\n"); for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) { const auto &parent = node->getParentEdgesAtPort(i)[0]->getParent(); /* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing. @@ -1616,14 +1567,6 @@ void Graph::EnforceBF16() { } } -void Graph::setMinSparseRate(float minSparseRate) { - for (const auto &node : graphNodes) { - if (auto fcNodePtr = std::dynamic_pointer_cast(node)) { - fcNodePtr->setMinSparseRate(minSparseRate); - } - } -} - std::shared_ptr Graph::dump() const { return dump_graph_as_ie_ngraph_net(*this); } diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index f8ab799196b..ba4f4b0821e 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -12,6 +12,7 @@ #include "edge.h" #include "cache/multi_cache.h" #include "dnnl_scratch_pad.h" +#include "graph_context.h" #include #include #include @@ -27,7 +28,6 @@ class InferRequest; class Graph { public: typedef std::shared_ptr Ptr; - WeightsSharing::Ptr weightsCache; enum class Status { NotReady = 0, @@ -42,21 +42,16 @@ public: return (status != Status::NotReady); } - void setConfig(const Config &cfg); - const Config& getConfig() const; - - void setProperty(const std::map &properties); - Config getProperty() const; + const Config & getConfig() const { + return context->getConfig(); + } template - void CreateGraph(NET &network, - const ExtensionManager::Ptr& extMgr, - WeightsSharing::Ptr &w_cache, - const std::shared_ptr& mutex); + void CreateGraph(NET &network, const GraphContext::CPtr ctx); void CreateGraph(const std::vector &graphNodes, const std::vector &graphEdges, - WeightsSharing::Ptr &w_cache, + const GraphContext::CPtr ctx, std::string name); bool hasMeanImageFor(const std::string& name) { @@ -111,7 +106,11 @@ public: } dnnl::engine getEngine() const { - return eng; + return context->getEngine(); + } + + GraphContext::CPtr getGraphContext() const { + return context; } void GetPerfData(std::map &perfMap) const; @@ -187,10 +186,6 @@ public: void SortTopologically(); - bool isQuantized() const { - return isQuantizedFlag; - } - bool hasDynamicInput() const { return graphHasDynamicInput; } @@ -200,7 +195,6 @@ protected: void ForgetGraphData() { status = Status::NotReady; - eng = dnnl::engine(dnnl::engine::kind::cpu, 0); inputNodesMap.clear(); outputNodesMap.clear(); @@ -210,7 +204,6 @@ protected: syncNodesInds.clear(); } Status status { Status::NotReady }; - Config config; // For dumping purposes. -1 - no counting, all other positive // values mean increment it within each Infer() call @@ -226,13 +219,10 @@ protected: std::map _normalizePreprocMap; std::string _name; - bool isQuantizedFlag = false; bool graphHasDynamicInput = false; - static dnnl::engine eng; - - void Replicate(const InferenceEngine::CNNNetwork &network, const ExtensionManager::Ptr& extMgr); - void Replicate(const std::shared_ptr &subgraph, const ExtensionManager::Ptr& extMgr); + void Replicate(const InferenceEngine::CNNNetwork &network); + void Replicate(const std::shared_ptr &subgraph); void InitGraph(); void InitNodes(); void InitDescriptors(); @@ -263,13 +253,11 @@ private: std::vector constantGraphNodes; std::vector executableGraphNodes; - MultiCachePtr rtParamsCache; - std::shared_ptr sharedMutex = nullptr; - DnnlScratchPadPtr rtScratchPad; std::unordered_map syncNodesInds; + GraphContext::CPtr context; + void EnforceBF16(); - void setMinSparseRate(float minSparseRate); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp new file mode 100644 index 00000000000..e082f4e0028 --- /dev/null +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include "graph_context.h" + +namespace ov { +namespace intel_cpu { + +dnnl::engine GraphContext::eng(dnnl::engine::kind::cpu, 0); + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/graph_context.h b/src/plugins/intel_cpu/src/graph_context.h new file mode 100644 index 00000000000..81dbeb8e1a4 --- /dev/null +++ b/src/plugins/intel_cpu/src/graph_context.h @@ -0,0 +1,82 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cache/multi_cache.h" +#include "config.h" +#include "dnnl_scratch_pad.h" +#include "extension_mngr.h" +#include "weights_cache.hpp" + +namespace ov { +namespace intel_cpu { + +class GraphContext { +public: + typedef std::shared_ptr Ptr; + typedef std::shared_ptr CPtr; + + GraphContext(const Config& config, + ExtensionManager::Ptr extensionManager, + WeightsSharing::Ptr w_cache, + std::shared_ptr sharedMutex, + bool isGraphQuantized) + : config(config), + extensionManager(extensionManager), + weightsCache(w_cache), + sharedMutex(sharedMutex), + isGraphQuantizedFlag(isGraphQuantized) { + rtParamsCache = std::make_shared(config.rtCacheCapacity); + rtScratchPad = std::make_shared(eng); + } + + const Config& getConfig() const { + return config; + } + + ExtensionManager::Ptr getExtensionManager() const { + return extensionManager; + } + + WeightsSharing::Ptr getWeightsCache() const { + return weightsCache; + } + + std::shared_ptr getSharedMutex() const { + return sharedMutex; + } + + MultiCachePtr getParamsCache() const { + return rtParamsCache; + } + + DnnlScratchPadPtr getScratchPad() const { + return rtScratchPad; + } + + dnnl::engine getEngine() const { + return eng; + } + + bool isGraphQuantized() const { + return isGraphQuantizedFlag; + } + +private: + Config config; // network-level config + + ExtensionManager::Ptr extensionManager; + WeightsSharing::Ptr weightsCache; // per NUMA node caches for sharing weights data + std::shared_ptr sharedMutex; // mutex for protection of type-relaxed Op in clone_model() + + MultiCachePtr rtParamsCache; // primitive cache + DnnlScratchPadPtr rtScratchPad; // scratch pad + + bool isGraphQuantizedFlag = false; + static dnnl::engine eng; // onednn engine (singleton) +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 886f0d81b08..728fdaa752d 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -2206,10 +2206,10 @@ void GraphOptimizer::reshapeRnnSeq(Graph &graph) { parentNode->getOutputShapeAtPort(0).toPartialShape()), secondInput); unsqueeze->set_friendly_name(parentNode->getName() + "_abc_a1bc_" + std::to_string(j)); - const auto cpuUnsqueeze = std::make_shared(unsqueeze, graph.getEngine(), graph.weightsCache); + const auto cpuUnsqueeze = std::make_shared(unsqueeze, graph.getGraphContext()); graph.InsertNode(parentNode, childNode, cpuUnsqueeze, edge->getInputNum(), edge->getOutputNum(), false); - const auto cpuConstant = std::make_shared(secondInput, graph.getEngine(), graph.weightsCache); + const auto cpuConstant = std::make_shared(secondInput, graph.getGraphContext()); EdgePtr newEdge(new Edge(cpuConstant, cpuUnsqueeze, 0, 1)); cpuUnsqueeze->addEdge(newEdge); auto &graphEdges = graph.GetEdges(); diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 81efed34942..c90054f3331 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -160,7 +160,7 @@ void InferRequestBase::InferImpl() { if (graph->hasDynamicInput()) { redefineMemoryForInputNodes(); - } else if (graph->getProperty().isNewApi && graph->getProperty().batchLimit > 0) { + } else if (graph->getConfig().isNewApi && graph->getConfig().batchLimit > 0) { const auto batch = _inputs.begin()->second->getTensorDesc().getDims()[0]; SetBatch(batch); } @@ -358,10 +358,10 @@ void LegacyInferRequest::initBlobs() { } void LegacyInferRequest::SetBatch(int new_batch) { - if (!graph->getProperty().enableDynamicBatch) + if (!graph->getConfig().enableDynamicBatch) IE_THROW() << "Dynamic batch is not enabled."; - if (new_batch < 1 || new_batch > graph->getProperty().batchLimit) { + if (new_batch < 1 || new_batch > graph->getConfig().batchLimit) { IE_THROW() << "Invalid dynamic batch size " << new_batch << " for this request."; } @@ -433,7 +433,7 @@ void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine: auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory()); if (data->getTensorDesc() == pBlobDesc && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); @@ -467,7 +467,7 @@ void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine: auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory()); if (data->getTensorDesc() == pBlobDesc && - !graph->getProperty().batchLimit) { + !graph->getConfig().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); @@ -509,7 +509,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) _inputs[name] = make_blob_with_precision(desc); _inputs[name]->allocate(); if (pBlobDesc == desc && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { externalPtr[name] = _inputs[name]->buffer(); } } @@ -571,7 +571,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) } _outputs[name] = data; - if (!externalPtr.count(name) && data->getTensorDesc() == pBlobDesc && !graph->getProperty().batchLimit) { + if (!externalPtr.count(name) && data->getTensorDesc() == pBlobDesc && !graph->getConfig().batchLimit) { externalPtr[name] = data->buffer(); } } @@ -627,11 +627,11 @@ void InferRequest::initBlobs() { } void InferRequest::SetBatch(int new_batch) { - if (!graph->getProperty().batchLimit || modelInputsMap.begin()->second->get_output_partial_shape(0).is_static()) { + if (!graph->getConfig().batchLimit || modelInputsMap.begin()->second->get_output_partial_shape(0).is_static()) { IE_THROW() << "Can't set batch for model that can't be executed via legacy dynamic batch or for static model"; } - if (new_batch < 1 || new_batch > graph->getProperty().batchLimit) { + if (new_batch < 1 || new_batch > graph->getConfig().batchLimit) { IE_THROW() << "Can't set batch that is bigger than upper bound"; } @@ -704,7 +704,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: blobDesc.getDims()); } if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(blobDesc)) && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); @@ -736,7 +736,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: } const auto &desc = graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); - if (!isDynamic && blobDesc == MemoryDescUtils::convertToTensorDesc(desc) && !graph->getProperty().batchLimit) { + if (!isDynamic && blobDesc == MemoryDescUtils::convertToTensorDesc(desc) && !graph->getConfig().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); @@ -784,7 +784,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { if (!isDynamic && desc == MemoryDescUtils::convertToTensorDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { externalPtr[name] = _inputs[name]->buffer(); } } else { @@ -841,7 +841,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { _outputs[name] = data; if (!isDynamic && !externalPtr.count(name) && data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc()) && - !graph->getProperty().batchLimit) { + !graph->getConfig().batchLimit) { externalPtr[name] = data->buffer(); } } else { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index d689ffd8852..62b714bbbb9 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -77,10 +77,19 @@ Node::NodesFactory & Node::factory() { return factoryInstance; } -Node::Node(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &w_cache, const ShapeInferFactory& shapeInferFactory) - : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), - weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()), - type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { +Node::Node(const std::shared_ptr& op, + const GraphContext::CPtr ctx, + const ShapeInferFactory& shapeInferFactory) + : selectedPrimitiveDescriptorIndex(-1), + permanent(false), + temporary(false), + constant(ConstantType::Unknown), + context(ctx), + engine(ctx->getEngine()), + name(op->get_friendly_name()), + typeStr(op->get_type_name()), + type(TypeFromName(op->get_type_name())), + profiling(op->get_friendly_name()) { algorithm = Algorithm::Default; fusingPort = -1; const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name(); @@ -170,10 +179,18 @@ Node::Node(const std::shared_ptr& op, const dnnl::engine& eng, Wei } } -Node::Node(const std::string& type, const std::string& name, const dnnl::engine& eng, WeightsSharing::Ptr &w_cache) - : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), - weightCache(w_cache), engine(eng), fusingPort(-1), name(name), typeStr(type), - type(TypeFromName(type)), profiling(name) { +Node::Node(const std::string& type, const std::string& name, const GraphContext::CPtr ctx) + : selectedPrimitiveDescriptorIndex(-1), + permanent(false), + temporary(false), + constant(ConstantType::Unknown), + context(ctx), + engine(ctx->getEngine()), + fusingPort(-1), + name(name), + typeStr(type), + type(TypeFromName(type)), + profiling(name) { // TODO [NM]: What about filling inDims and outDims? } @@ -795,6 +812,7 @@ void Node::prepareMemory(const std::vector& intDescs) { }; MemoryPtr ptr; + auto weightCache = context->getWeightsCache(); if (weightCache != nullptr) { const uint64_t data_hash = weightCache->GetHashFunc().hash( internalBlob->buffer(), internalBlob->byteSize()); @@ -1206,8 +1224,7 @@ InferenceEngine::Precision Node::getRuntimePrecision() const { return runtimePrecision; } -Node* Node::NodesFactory::create(const std::shared_ptr& op, const dnnl::engine& eng, - const ExtensionManager::Ptr& extMgr, WeightsSharing::Ptr &w_cache) { +Node* Node::NodesFactory::create(const std::shared_ptr& op, const GraphContext::CPtr context) { // getExceptionDescWithoutStatus removes redundant information from the exception message. For instance, the NotImplemented // exception is generated in the form: full_path_to_src_file:line_number [ NOT_IMPLEMENTED ] reason. // An example for gather node: @@ -1229,15 +1246,15 @@ Node* Node::NodesFactory::create(const std::shared_ptr& op, const Node *newNode = nullptr; std::string errorMessage; { - std::unique_ptr ol(createNodeIfRegistered(intel_cpu, Type::Generic, op, eng, w_cache)); - if (ol != nullptr && ol->created(extMgr)) + std::unique_ptr ol(createNodeIfRegistered(intel_cpu, Type::Generic, op, context)); + if (ol != nullptr && ol->created(context->getExtensionManager())) newNode = ol.release(); } if (newNode == nullptr) { try { - std::unique_ptr ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, eng, w_cache)); - if (ol != nullptr && ol->created(extMgr)) + std::unique_ptr ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, context)); + if (ol != nullptr && ol->created(context->getExtensionManager())) newNode = ol.release(); } catch (const InferenceEngine::Exception& ex) { if (dynamic_cast(&ex) != nullptr) { @@ -1250,8 +1267,8 @@ Node* Node::NodesFactory::create(const std::shared_ptr& op, const if (newNode == nullptr) { try { - std::unique_ptr ol(new Reference(op, eng, w_cache, errorMessage)); - if (ol != nullptr && ol->created(extMgr)) + std::unique_ptr ol(new Reference(op, context, errorMessage)); + if (ol != nullptr && ol->created(context->getExtensionManager())) newNode = ol.release(); } catch (const InferenceEngine::Exception& ex) { if (dynamic_cast(&ex) != nullptr) { @@ -1264,19 +1281,6 @@ Node* Node::NodesFactory::create(const std::shared_ptr& op, const } } - // WA-start : TI node requires all attributes to construct internal subgpath - // including extManager, socket and dnnl::eng. - if (newNode) { - if (newNode->getType() == Type::TensorIterator) { - if (auto ti = dynamic_cast(newNode)) - ti->setExtManager(extMgr); - } else if (newNode->getType() == Type::If) { - if (auto ifNode = dynamic_cast(newNode)) - ifNode->setExtManager(extMgr); - } - } -// // WA-end - if (!newNode) { std::string errorDetails; if (!errorMessage.empty()) { diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index f41289cc525..05ca9886305 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -37,6 +37,7 @@ #include "utils/debug_capabilities.h" #include "dnnl_postops_composer.h" +#include "graph_context.h" namespace ov { namespace intel_cpu { @@ -527,10 +528,6 @@ public: return false; } - void setQuantizedGraphFlag(bool flag) { - isInQuantizedGraph = flag; - } - bool canBePerformedAsScaleShift(const Node *parentNode = nullptr) const; bool isDynamicNode() const { @@ -573,18 +570,6 @@ public: virtual void appendPostOps(dnnl::post_ops& ops, const VectorDims& postOpDims, std::unordered_map& postOpsMem, const int channelAxis = 1); virtual void appendPostOps(dnnl::post_ops& ops, const VectorDims& postOpDims, std::vector& postOpsMem, const int channelAxis = 1); - void setRuntimeCache(MultiCachePtr cache) { - rtParamsCache = cache; - } - - void setRuntimeScratchPad(DnnlScratchPadPtr scratchPad) { - rtScratchPad = scratchPad; - } - - void setSharedMutex(const std::shared_ptr& mutex) { - sharedMutex = mutex; - } - protected: bool canFuseSimpleOperation(const NodePtr& node) const; @@ -618,8 +603,8 @@ protected: std::string originalLayers; // contains names of the original layers separated by comma - Node(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &w_cache, const ShapeInferFactory& shapeInferFactory); - Node(const std::string& type, const std::string& name, const dnnl::engine& eng, WeightsSharing::Ptr &w_cache); + Node(const std::shared_ptr& op, const GraphContext::CPtr ctx, const ShapeInferFactory& shapeInferFactory); + Node(const std::string& type, const std::string& name, const GraphContext::CPtr ctx); int selectedPrimitiveDescriptorIndex = -1; bool permanent = false; @@ -645,12 +630,10 @@ protected: Primitive prim; std::vector descs; - WeightsSharing::Ptr weightCache; + const GraphContext::CPtr context; Algorithm algorithm = Algorithm::Default; - bool isInQuantizedGraph = false; - friend class Edge; friend class Graph; friend class GraphOptimizer; @@ -715,17 +698,9 @@ protected: IE_THROW(NotImplemented) << "[DS] prapareParams not implemented for node with type " << NameFromType(getType()); } - MultiCachePtr getRuntimeCache() const { - return rtParamsCache; - } - - DnnlScratchPadPtr getRuntimeScratchPad() const { - return rtScratchPad; - } - MemoryPtr getScratchPadMem(const const_dnnl_primitive_desc_t& pd) { auto scratchpadMemoryDesc = DnnlExtensionUtils::query_md(pd, dnnl::query::scratchpad_md); - scratchpadMem = getRuntimeScratchPad()->createScratchPadMem(scratchpadMemoryDesc); + scratchpadMem = context->getScratchPad()->createScratchPadMem(scratchpadMemoryDesc); return scratchpadMem; } @@ -733,8 +708,6 @@ protected: std::shared_ptr shapeInference; - std::shared_ptr sharedMutex = nullptr; - private: std::vector parentEdges; std::vector childEdges; @@ -744,7 +717,7 @@ private: int fusingPort; - dnnl::engine engine; + const dnnl::engine engine; std::string name; std::string typeStr; @@ -756,8 +729,6 @@ private: PerfCount perfCounter; PerfCounters profiling; - MultiCachePtr rtParamsCache; - DnnlScratchPadPtr rtScratchPad; MemoryPtr scratchpadMem; bool isEdgesEmpty(const std::vector& edges) const; @@ -796,19 +767,17 @@ constexpr uint64_t PortMask(int n, T... rest) { class Node::NodesFactory : public openvino::cc::Factory& op, - const dnnl::engine &, - WeightsSharing::Ptr &)> { + const GraphContext::CPtr)> { public: NodesFactory(); - Node* create(const std::shared_ptr& op, const dnnl::engine& eng, - const ExtensionManager::Ptr& extMgr, WeightsSharing::Ptr &w_cache); + Node* create(const std::shared_ptr& op, const GraphContext::CPtr context); }; template struct NodeImpl : public NodeType { - NodeImpl(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : NodeType(op, eng, cache) { + NodeImpl(const std::shared_ptr& op, const GraphContext::CPtr context) + : NodeType(op, context) { NodeType::perfCounters().template buildClassCounters(NameFromType(NodeType::getType())); } }; diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp index 2775cbf9ce5..8ddbd234825 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp @@ -98,8 +98,8 @@ bool AdaptivePooling::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, AdaptivePoolingShapeInferFactory(op)) { +AdaptivePooling::AdaptivePooling(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, AdaptivePoolingShapeInferFactory(op)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Adaptive Pooling layer with name '" + getName() + "' "; diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h index 7345e37ffd0..d143796da63 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h @@ -16,7 +16,7 @@ namespace node { class AdaptivePooling : public Node { public: - AdaptivePooling(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + AdaptivePooling(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp index 657d7dbdda1..3d02f322cdb 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp @@ -36,8 +36,8 @@ bool BatchToSpace::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) { +BatchToSpace::BatchToSpace(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.h b/src/plugins/intel_cpu/src/nodes/batch_to_space.h index e8b22ffce4d..a0afe87edb1 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.h @@ -16,7 +16,7 @@ namespace node { class BatchToSpace : public Node { public: - BatchToSpace(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + BatchToSpace(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index 1ae7a332980..53100f5da2c 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -897,9 +897,8 @@ bool BinaryConvolution::isSupportedOperation(const std::shared_ptr& op, - const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +BinaryConvolution::BinaryConvolution(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "BinaryConvolution node with name '" + getName() + "' "; diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.h b/src/plugins/intel_cpu/src/nodes/bin_conv.h index 18fa1a6a88f..2c6a909b0b6 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.h +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.h @@ -77,7 +77,7 @@ struct jit_uni_bin_conv_kernel { class BinaryConvolution : public Node { public: - BinaryConvolution(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + BinaryConvolution(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void createPrimitive() override; diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.cpp b/src/plugins/intel_cpu/src/nodes/broadcast.cpp index 0d4840b1882..1a716e6ffa8 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.cpp +++ b/src/plugins/intel_cpu/src/nodes/broadcast.cpp @@ -49,8 +49,8 @@ bool Broadcast::isSupportedOperation(const std::shared_ptr& op, return true; } -Broadcast::Broadcast(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX))) { +Broadcast::Broadcast(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.h b/src/plugins/intel_cpu/src/nodes/broadcast.h index 9f93ff3f788..5ece5d089bd 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.h +++ b/src/plugins/intel_cpu/src/nodes/broadcast.h @@ -16,7 +16,7 @@ namespace node { class Broadcast : public Node, public TileBroadcastCommon { public: - Broadcast(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Broadcast(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index bd2456c978d..d39d574b0be 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -30,8 +30,8 @@ bool Bucketize::isSupportedOperation(const std::shared_ptr& return true; } -Bucketize::Bucketize(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, PassThroughShapeInferFactory()) { +Bucketize::Bucketize(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, PassThroughShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.h b/src/plugins/intel_cpu/src/nodes/bucketize.h index bf17706e988..119eb2d9ba0 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.h +++ b/src/plugins/intel_cpu/src/nodes/bucketize.h @@ -13,7 +13,7 @@ namespace node { class Bucketize : public Node { public: - Bucketize(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Bucketize(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.cpp b/src/plugins/intel_cpu/src/nodes/color_convert.cpp index 9a452a134a3..846941eecaa 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/color_convert.cpp @@ -1037,10 +1037,8 @@ bool ColorConvert::isSupportedOperation(const std::shared_ptr& op, - const dnnl::engine& eng, - WeightsSharing::Ptr &cache) - : Node(op, eng, cache, ColorConvertShapeInferFactory(op)) { +ColorConvert::ColorConvert(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, ColorConvertShapeInferFactory(op)) { std::string errorMessage; std::tie(algorithm, errorMessage) = getAlgorithmFor(op); if (algorithm == Algorithm::Default) diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.h b/src/plugins/intel_cpu/src/nodes/color_convert.h index 65d911532c1..21695b675f9 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.h +++ b/src/plugins/intel_cpu/src/nodes/color_convert.h @@ -16,9 +16,7 @@ namespace node { class ColorConvert : public Node { public: - ColorConvert(const std::shared_ptr& op, - const dnnl::engine& eng, - WeightsSharing::Ptr &cache); + ColorConvert(const std::shared_ptr& op, const GraphContext::CPtr context); class Converter; public: diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 5eefad2c096..81dbe12ac74 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -49,8 +49,8 @@ bool Concat::isSupportedOperation(const std::shared_ptr& op, return true; } -Concat::Concat(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Concat::Concat(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -282,7 +282,7 @@ void Concat::selectOptimalPrimitiveDescriptor() { maxCount = it.second; convertTo = it.first; } else if (it.second == maxCount) { - if (isInQuantizedGraph && it.first == LayoutType::nspc) { + if (context->isGraphQuantized() && it.first == LayoutType::nspc) { convertTo = it.first; } else if (it.first == LayoutType::nCsp8c || it.first == LayoutType::nCsp16c) { convertTo = it.first; diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index cf9db6079fe..f776bd0bbd1 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -15,7 +15,7 @@ namespace node { class Concat : public Node { public: - Concat(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Concat(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 6da41b63676..8343544f8cf 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -103,7 +103,7 @@ bool ConvKey::operator==(const ConvKey &rhs) const { class Convolution::FusedSubgraph { public: - FusedSubgraph(const std::vector &opList, const Convolution &conv, WeightsSharing::Ptr weightCache) { + FusedSubgraph(const std::vector &opList, const Convolution &conv, const GraphContext::CPtr context) { _graph = std::unique_ptr(new Graph()); std::unordered_set nodesSet; @@ -119,11 +119,11 @@ public: //Make inputs const auto &inpMemDesc1 = conv.getBaseMemDescAtOutputPort(0); - auto inp0 = std::make_shared(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache); + auto inp0 = std::make_shared(inpMemDesc1, "inp0", "Parameter", context); inputs.push_back(inp0); const size_t sumPortNum = conv.getParentEdges().size() - 1; const auto &inpMemDesc2 = conv.getBaseMemDescAtInputPort(sumPortNum); - auto inp1 = std::make_shared(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache); + auto inp1 = std::make_shared(inpMemDesc2, "inp1", "Parameter", context); inputs.push_back(inp1); auto itr = std::find_if(opList.begin(), opList.end(), [](const NodePtr &node) { @@ -162,13 +162,13 @@ public: //Make output const auto &outMemDesc = conv.getBaseMemDescAtOutputPort(0); - auto out = std::make_shared(outMemDesc, "out", "Result", conv.getEngine(), weightCache); + auto out = std::make_shared(outMemDesc, "out", "Result", context); addEdge(*parentItr, out, 0, 0); outputs.push_back(out); std::vector nodes(nodesSet.begin(), nodesSet.end()); - _graph->CreateGraph(nodes, edges, weightCache, "fused_subgraph"); + _graph->CreateGraph(nodes, edges, context, "fused_subgraph"); } std::shared_ptr getInput(size_t idx) const { @@ -222,8 +222,8 @@ bool Convolution::isSupportedOperation(const std::shared_ptr return true; } -Convolution::Convolution(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), withBiases(false), withSum(false), withDWConv(false), +Convolution::Convolution(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), withBiases(false), withSum(false), withDWConv(false), isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef), groupNum(1lu), IC(1), groupIC(1), groupOC(1), eltwisePrecision(Precision::FP32) { std::string errorMessage; @@ -1165,7 +1165,7 @@ bool Convolution::isNspcAvailable() const { using impl::cpu::x64::mayiuse; // do not use in non-quantized networks until it is enforced externally - if (!isInQuantizedGraph) { + if (!context->isGraphQuantized()) { auto predicate = [](memory::format_tag tag) { return one_of(tag, memory::format_tag::nwc, memory::format_tag::nhwc, memory::format_tag::ndhwc); }; @@ -1426,7 +1426,7 @@ void Convolution::prepareParams() { }; execPtr = nullptr; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); execPtr = result.first; @@ -1524,7 +1524,7 @@ void Convolution::redefineOutputMemory(const std::vector &newOutputS if (newOutputShapes.front() != sumInpMem.getStaticDims()) { withSumBroadcast = true; if (!subgraph) { - subgraph = std::make_shared(fusedWith, *this, weightCache); + subgraph = std::make_shared(fusedWith, *this, context); } auto inp0 = subgraph->getInput(0); inp0->redefineOutputMemory(newOutputShapes); diff --git a/src/plugins/intel_cpu/src/nodes/conv.h b/src/plugins/intel_cpu/src/nodes/conv.h index 63cea4ed6b8..963c15dda22 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.h +++ b/src/plugins/intel_cpu/src/nodes/conv.h @@ -19,7 +19,7 @@ class Eltwise; class Convolution : public Node { public: - Convolution(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Convolution(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp index 9d7a0637b5b..b1aa9442ff1 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/convert.cpp @@ -31,8 +31,8 @@ bool Convert::isSupportedOperation(const std::shared_ptr& op return true; } -Convert::Convert(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, PassThroughShapeInferFactory()) { +Convert::Convert(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, PassThroughShapeInferFactory()) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Convert node with name '" + getName() + "'"; @@ -45,8 +45,8 @@ Convert::Convert(const std::shared_ptr& op, const dnnl::engine& en } Convert::Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, - const std::string &nodeName, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node("Convert", nodeName, eng, cache) + const std::string &nodeName, const GraphContext::CPtr context) + : Node("Convert", nodeName, context) , origPrc(outPrc) { inputShapes.push_back(shape); addOriginalInputPrecision(inPrc); diff --git a/src/plugins/intel_cpu/src/nodes/convert.h b/src/plugins/intel_cpu/src/nodes/convert.h index 0fa8958d1a4..a74f5da7ffa 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.h +++ b/src/plugins/intel_cpu/src/nodes/convert.h @@ -15,9 +15,9 @@ namespace node { class Convert : public Node { public: - Convert(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Convert(const std::shared_ptr& op, const GraphContext::CPtr context); Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, - const std::string &nodeName, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + const std::string &nodeName, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp index ee02c11f6d3..b539c20869c 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp @@ -28,8 +28,8 @@ bool CTCGreedyDecoder::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +CTCGreedyDecoder::CTCGreedyDecoder(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h index 8b51568fa36..eec5b6828d6 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h @@ -13,7 +13,7 @@ namespace node { class CTCGreedyDecoder : public Node { public: - CTCGreedyDecoder(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + CTCGreedyDecoder(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp index 29b3c4c151b..04f3acc0bc8 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp @@ -28,8 +28,8 @@ bool CTCGreedyDecoderSeqLen::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +CTCGreedyDecoderSeqLen::CTCGreedyDecoderSeqLen(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h index f463e7b5972..60e772cffb7 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h @@ -13,7 +13,7 @@ namespace node { class CTCGreedyDecoderSeqLen : public Node { public: - CTCGreedyDecoderSeqLen(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + CTCGreedyDecoderSeqLen(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp index 17a83a4014f..fcf9edf671d 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp @@ -27,8 +27,8 @@ bool CTCLoss::isSupportedOperation(const std::shared_ptr& op return true; } -CTCLoss::CTCLoss(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +CTCLoss::CTCLoss(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.h b/src/plugins/intel_cpu/src/nodes/ctc_loss.h index 934a9f4245d..6360594a7c0 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.h +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.h @@ -13,7 +13,7 @@ namespace node { class CTCLoss : public Node { public: - CTCLoss(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + CTCLoss(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp index 985d2fc3d6d..8adc4aa133b 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp @@ -32,8 +32,7 @@ bool CumSum::isSupportedOperation(const std::shared_ptr& op, return true; } -CumSum::CumSum(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +CumSum::CumSum(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.h b/src/plugins/intel_cpu/src/nodes/cum_sum.h index dee5dbba939..f38704d0340 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.h +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.h @@ -13,7 +13,7 @@ namespace node { class CumSum : public Node { public: - CumSum(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + CumSum(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 070d0ca5dc4..d431a8ffd3c 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -151,7 +151,7 @@ bool Deconvolution::isSupportedOperation(const std::shared_ptr& op, - const dnnl::engine& eng, WeightsSharing::Ptr &cache) : Node(op, eng, cache, DeconfolutionShapeInferFactory(op)) { + const GraphContext::CPtr context) : Node(op, context, DeconfolutionShapeInferFactory(op)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Deconvolution node with name '" + getName() + "'"; @@ -928,7 +928,7 @@ void Deconvolution::prepareParams() { }; execPtr = nullptr; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); execPtr = result.first; diff --git a/src/plugins/intel_cpu/src/nodes/deconv.h b/src/plugins/intel_cpu/src/nodes/deconv.h index 738704c7c35..7b6408e6b86 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.h +++ b/src/plugins/intel_cpu/src/nodes/deconv.h @@ -17,7 +17,7 @@ namespace node { class Deconvolution : public Node { public: - Deconvolution(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Deconvolution(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void createDescriptor(const std::vector& inputDesc, diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index bd7523126ec..05b92a24cca 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -740,8 +740,8 @@ bool DefConvKey::operator==(const DefConvKey &rhs) const { } // namespace -DeformableConvolution::DeformableConvolution(const std::shared_ptr& op, - const dnnl::engine& eng, WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +DeformableConvolution::DeformableConvolution(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -1221,7 +1221,7 @@ void DeformableConvolution::prepareParams() { execPtr = nullptr; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, [] (const DefConvKey& key) -> std::shared_ptr { if (key.implType == impl_desc_type::ref) { return std::make_shared(key.defConvAttr, key.descVector); diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.h b/src/plugins/intel_cpu/src/nodes/def_conv.h index 414f8778379..dd2fc4cc1ce 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.h +++ b/src/plugins/intel_cpu/src/nodes/def_conv.h @@ -71,7 +71,7 @@ struct jit_uni_def_conv_kernel { class DeformableConvolution : public Node { public: - DeformableConvolution(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + DeformableConvolution(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index ad9e8b59b8c..824c30ab260 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -67,8 +67,8 @@ bool DepthToSpace::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +DepthToSpace::DepthToSpace(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -191,7 +191,7 @@ void DepthToSpace::prepareParams() { return std::make_shared(key); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(attrs, builder); if (!result.first) { IE_THROW() << "DepthToSpaceExecutor was not found for node " << getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.h b/src/plugins/intel_cpu/src/nodes/depth_to_space.h index c441ee56eb3..0642173adb0 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.h @@ -15,7 +15,7 @@ namespace node { class DepthToSpace : public Node { public: - DepthToSpace(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + DepthToSpace(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.cpp b/src/plugins/intel_cpu/src/nodes/detection_output.cpp index 7aaccc5ef19..3f8c27e29cb 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/detection_output.cpp @@ -51,8 +51,8 @@ bool DetectionOutput::isSupportedOperation(const std::shared_ptr return true; } -DetectionOutput::DetectionOutput(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +DetectionOutput::DetectionOutput(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.h b/src/plugins/intel_cpu/src/nodes/detection_output.h index a0539ea3607..9105bc06f34 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.h +++ b/src/plugins/intel_cpu/src/nodes/detection_output.h @@ -14,7 +14,7 @@ namespace node { class DetectionOutput : public Node { public: - DetectionOutput(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + DetectionOutput(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/dft.cpp b/src/plugins/intel_cpu/src/nodes/dft.cpp index a0ca28253ca..d6e7d0f1394 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.cpp +++ b/src/plugins/intel_cpu/src/nodes/dft.cpp @@ -44,8 +44,8 @@ bool DFT::isSupportedOperation(const std::shared_ptr& op, st return true; } -DFT::DFT(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +DFT::DFT(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/dft.h b/src/plugins/intel_cpu/src/nodes/dft.h index 6186392dcab..9f3b017eebc 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.h +++ b/src/plugins/intel_cpu/src/nodes/dft.h @@ -16,7 +16,7 @@ namespace node { class DFT : public Node { public: - DFT(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + DFT(const std::shared_ptr& op, const GraphContext::CPtr context); ~DFT() override = default; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 49a5b34face..c26985de8a7 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -1666,8 +1666,8 @@ bool Eltwise::isSupportedOperation(const std::shared_ptr& op return true; } -Eltwise::Eltwise(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, EltwiseShapeInferFactory()), broadcastingPolicy(Undefined) { +Eltwise::Eltwise(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, EltwiseShapeInferFactory()), broadcastingPolicy(Undefined) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -2050,7 +2050,7 @@ void Eltwise::prepareParams() { } } - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, buildExecutor); execPtr = result.first; } diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h index 7d243f714ef..640c40cf92c 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.h +++ b/src/plugins/intel_cpu/src/nodes/eltwise.h @@ -90,7 +90,7 @@ public: using executorPtr = std::shared_ptr; public: - Eltwise(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Eltwise(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp index 0d2dbb095b6..648599f61e4 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp @@ -27,8 +27,9 @@ bool EmbeddingBagOffsetSum::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), EmbeddingBagSum(op, 3lu, 1lu, 4lu, 3lu) { +EmbeddingBagOffsetSum::EmbeddingBagOffsetSum(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), + EmbeddingBagSum(op, 3lu, 1lu, 4lu, 3lu) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h index 92f12cca5f3..e5553ae727b 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h @@ -17,7 +17,7 @@ namespace node { class EmbeddingBagOffsetSum : public Node, public EmbeddingBagSum { public: - EmbeddingBagOffsetSum(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + EmbeddingBagOffsetSum(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp index 6c877db591b..295dbd8df28 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp @@ -27,8 +27,9 @@ bool EmbeddingBagPackedSum::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), EmbeddingBagSum(op, 2lu, 1lu, 2lu, 3lu) { +EmbeddingBagPackedSum::EmbeddingBagPackedSum(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), + EmbeddingBagSum(op, 2lu, 1lu, 2lu, 3lu) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h index a979e14030a..42dac2b425c 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h @@ -17,7 +17,7 @@ namespace node { class EmbeddingBagPackedSum : public Node, public EmbeddingBagSum { public: - EmbeddingBagPackedSum(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + EmbeddingBagPackedSum(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp index 6fe7e9060a3..93416be676e 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp @@ -27,8 +27,9 @@ bool EmbeddingSegmentsSum::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(NUM_SEGMENTS_IDX))), EmbeddingBagSum(op, 4lu, 1lu, 5lu, 4lu) { +EmbeddingSegmentsSum::EmbeddingSegmentsSum(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(NUM_SEGMENTS_IDX))), + EmbeddingBagSum(op, 4lu, 1lu, 5lu, 4lu) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h index 7909401948e..9e753067235 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h @@ -17,7 +17,7 @@ namespace node { class EmbeddingSegmentsSum : public Node, public EmbeddingBagSum { public: - EmbeddingSegmentsSum(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + EmbeddingSegmentsSum(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp index bfc2db2d999..b5dd51c2034 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp @@ -236,9 +236,9 @@ bool ExperimentalDetectronDetectionOutput::isSupportedOperation(const std::share return true; } -ExperimentalDetectronDetectionOutput::ExperimentalDetectronDetectionOutput - (const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ExperimentalDetectronDetectionOutput::ExperimentalDetectronDetectionOutput(const std::shared_ptr& op, + const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h index 4478298da39..62b71527576 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h @@ -13,7 +13,7 @@ namespace node { class ExperimentalDetectronDetectionOutput : public Node { public: - ExperimentalDetectronDetectionOutput(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ExperimentalDetectronDetectionOutput(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp index c71e038953e..941d3a8c3d7 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp @@ -285,9 +285,10 @@ bool ExperimentalDetectronGenerateProposalsSingleImage::isSupportedOperation return true; } -ExperimentalDetectronGenerateProposalsSingleImage::ExperimentalDetectronGenerateProposalsSingleImage - (const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ExperimentalDetectronGenerateProposalsSingleImage::ExperimentalDetectronGenerateProposalsSingleImage( + const std::shared_ptr& op, + const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h index d94b28ae6fa..31b47e22a1c 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h @@ -14,7 +14,7 @@ namespace node { class ExperimentalDetectronGenerateProposalsSingleImage : public Node { public: ExperimentalDetectronGenerateProposalsSingleImage(const std::shared_ptr& op, - const dnnl::engine& eng, WeightsSharing::Ptr &cache); + const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp index f7342a7f0eb..6d26f1228df 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp @@ -28,9 +28,10 @@ bool ExperimentalDetectronPriorGridGenerator::isSupportedOperation(const std::sh return true; } -ExperimentalDetectronPriorGridGenerator::ExperimentalDetectronPriorGridGenerator - (const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ExperimentalDetectronPriorGridGenerator::ExperimentalDetectronPriorGridGenerator( + const std::shared_ptr& op, + const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h index 37dafbbbb1a..52a3a988109 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h @@ -13,7 +13,7 @@ namespace node { class ExperimentalDetectronPriorGridGenerator : public Node { public: - ExperimentalDetectronPriorGridGenerator(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ExperimentalDetectronPriorGridGenerator(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp index 7a4329e5073..d095adcd8c4 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp @@ -324,9 +324,10 @@ bool ExperimentalDetectronROIFeatureExtractor::isSupportedOperation(const std::s return true; } -ExperimentalDetectronROIFeatureExtractor::ExperimentalDetectronROIFeatureExtractor - (const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ExperimentalDetectronROIFeatureExtractor::ExperimentalDetectronROIFeatureExtractor( + const std::shared_ptr& op, + const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h index 011bd60e9a6..8aadae8fe8c 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h @@ -13,7 +13,7 @@ namespace node { class ExperimentalDetectronROIFeatureExtractor : public Node { public: - ExperimentalDetectronROIFeatureExtractor(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ExperimentalDetectronROIFeatureExtractor(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp index 33b354936a1..c51f988cb72 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp @@ -30,8 +30,9 @@ bool ExperimentalDetectronTopKROIs::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ExperimentalDetectronTopKROIs::ExperimentalDetectronTopKROIs(const std::shared_ptr& op, + const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h index 17c7d401c10..bdbbd518850 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h @@ -13,7 +13,7 @@ namespace node { class ExperimentalDetectronTopKROIs : public Node { public: - ExperimentalDetectronTopKROIs(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ExperimentalDetectronTopKROIs(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index e4ccc7416d5..59a869a07fb 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -327,8 +327,8 @@ bool ExtractImagePatchesKey::operator==(const ExtractImagePatchesKey& rhs) const } } // namespace -ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -398,7 +398,7 @@ void ExtractImagePatches::prepareParams() { key.prcSize); } }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, buildExecutor); execPtr = result.first; } diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.h b/src/plugins/intel_cpu/src/nodes/extract_image_patches.h index 506d283d521..bdbb8d36962 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.h +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.h @@ -44,7 +44,7 @@ struct jit_uni_extract_image_patches_kernel { class ExtractImagePatches : public Node { public: - ExtractImagePatches(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ExtractImagePatches(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp index 949d2fdc8da..b9776b12bd9 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.cpp +++ b/src/plugins/intel_cpu/src/nodes/eye.cpp @@ -48,8 +48,7 @@ private: }; } // namespace -Eye::Eye(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, EyeShapeInferFactory(op)) { +Eye::Eye(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, EyeShapeInferFactory(op)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/eye.h b/src/plugins/intel_cpu/src/nodes/eye.h index b0ed937d7fd..0bf22bf8a1a 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.h +++ b/src/plugins/intel_cpu/src/nodes/eye.h @@ -22,7 +22,7 @@ public: static constexpr size_t BATCH_SHAPE = 3lu; public: - Eye(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Eye(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index af2f0b8a1f0..6b740abc872 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -914,8 +914,8 @@ struct FakeQuantKey { }; } // namespace -FakeQuantize::FakeQuantize(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +FakeQuantize::FakeQuantize(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { algorithm = Algorithm::FQCommon; @@ -1433,7 +1433,7 @@ void FakeQuantize::prepareParams() { key.jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5); key.jqp.op_type = getAlgorithm(); - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto buildExecutor = [](const FakeQuantKey& key) { return std::make_shared(key.jqp); }; diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.h b/src/plugins/intel_cpu/src/nodes/fake_quantize.h index a7aa85738d3..a44f17139a7 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.h +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.h @@ -68,7 +68,7 @@ struct jit_uni_quantize_kernel { class FakeQuantize : public Node { public: - FakeQuantize(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + FakeQuantize(const std::shared_ptr& op, const GraphContext::CPtr context); void initSupportedPrimitiveDescriptors() override; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 62d483b0536..05ec09bec13 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -113,13 +113,16 @@ bool FullyConnected::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), withBiases(false) { +FullyConnected::FullyConnected(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), withBiases(false) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "FullyConnected node with name '" + getName() + "'"; withBiases = inputShapes.size() == 3; + + if (context->getConfig().fcSparseWeiDecompressionRate < 1.0f) + minSparseRate = context->getConfig().fcSparseWeiDecompressionRate; } else { IE_THROW(NotImplemented) << errorMessage; } @@ -330,7 +333,7 @@ void FullyConnected::prepareParams() { return execPtr; }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { @@ -867,7 +870,7 @@ MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { MemoryPtr _ptr = std::make_shared(getEngine()); _ptr->Create(weightDesc); - node::Reorder::reorderData(srcMemory, *_ptr, getRuntimeCache()); + node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache()); return _ptr; }; @@ -878,6 +881,7 @@ MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { if (privateWeightCache.end() != itr) { ptr = itr->second; } else { + auto weightCache = context->getWeightsCache(); if (weightCache != nullptr) { const std::string string_hash = getName() + "_" + format + "_" + std::to_string(blob->GetSize()) diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 91431365d59..c17948cecbf 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -17,7 +17,7 @@ namespace node { class FullyConnected : public Node { public: - FullyConnected(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + FullyConnected(const std::shared_ptr& op, const GraphContext::CPtr context); std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; @@ -59,8 +59,6 @@ public: void setDynamicBatchLim(int lim) override; - void setMinSparseRate(float sparseRate) { minSparseRate = sparseRate; } - private: void createDescriptorInternal(const dnnl::memory::desc &inputDesc, const dnnl::memory::desc &outputDesc); diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index a9dcbd161b8..dd9318a7454 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -41,8 +41,9 @@ bool Gather::isSupportedOperation(const std::shared_ptr& op, std return true; } -Gather::Gather(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(GATHER_AXIS))), batchDims(0) { +Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(GATHER_AXIS))), + batchDims(0) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h index 378012b21a9..48b52f54cea 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.h +++ b/src/plugins/intel_cpu/src/nodes/gather.h @@ -17,7 +17,7 @@ namespace node { class Gather : public Node { public: - Gather(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Gather(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp index d9ff55deec6..5b9ca097495 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp @@ -32,8 +32,8 @@ bool GatherElements::isSupportedOperation(const std::shared_ptr& return true; } -GatherElements::GatherElements(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +GatherElements::GatherElements(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.h b/src/plugins/intel_cpu/src/nodes/gather_elements.h index f17f3d8d311..383fd67f3ec 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.h +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.h @@ -16,7 +16,7 @@ namespace node { class GatherElements : public Node { public: - GatherElements(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + GatherElements(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 290da63d10d..9e02a7e79a9 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -34,8 +34,8 @@ bool GatherND::isSupportedOperation(const std::shared_ptr& o return true; } -GatherND::GatherND(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +GatherND::GatherND(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.h b/src/plugins/intel_cpu/src/nodes/gather_nd.h index 361ad9da2a9..14438e6ca4c 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.h +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.h @@ -16,7 +16,7 @@ namespace node { class GatherND : public Node { public: - GatherND(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + GatherND(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index d8d3b27ab09..195a1e4824b 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -30,8 +30,8 @@ bool GatherTree::isSupportedOperation(const std::shared_ptr& return true; } -GatherTree::GatherTree(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +GatherTree::GatherTree(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.h b/src/plugins/intel_cpu/src/nodes/gather_tree.h index a817fd7e162..ae55e399f9d 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.h +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.h @@ -13,7 +13,7 @@ namespace node { class GatherTree : public Node { public: - GatherTree(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + GatherTree(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp index abbdcdd3ce5..ae8ed924e3e 100644 --- a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp +++ b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp @@ -289,8 +289,8 @@ bool GenerateProposals::isSupportedOperation return true; } -GenerateProposals::GenerateProposals(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, InternalDynShapeInferFactory()) { +GenerateProposals::GenerateProposals(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InternalDynShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/generate_proposals.h b/src/plugins/intel_cpu/src/nodes/generate_proposals.h index f422cade297..8fa912f8626 100644 --- a/src/plugins/intel_cpu/src/nodes/generate_proposals.h +++ b/src/plugins/intel_cpu/src/nodes/generate_proposals.h @@ -13,8 +13,7 @@ namespace node { class GenerateProposals : public Node { public: - GenerateProposals(const std::shared_ptr& op, - const dnnl::engine& eng, WeightsSharing::Ptr &cache); + GenerateProposals(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/generic.cpp b/src/plugins/intel_cpu/src/nodes/generic.cpp index 338762f52a9..9801d3136b4 100644 --- a/src/plugins/intel_cpu/src/nodes/generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/generic.cpp @@ -43,8 +43,8 @@ public: }; } // namespace -Generic::Generic(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, GenericShapeInferFactory()), ngraphOp(op) { +Generic::Generic(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, GenericShapeInferFactory()), ngraphOp(op) { } void Generic::getSupportedDescriptors() { diff --git a/src/plugins/intel_cpu/src/nodes/generic.h b/src/plugins/intel_cpu/src/nodes/generic.h index 024a1bd516f..422e5869889 100644 --- a/src/plugins/intel_cpu/src/nodes/generic.h +++ b/src/plugins/intel_cpu/src/nodes/generic.h @@ -18,7 +18,7 @@ namespace node { class Generic : public Node { public: - Generic(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Generic(const std::shared_ptr& op, const GraphContext::CPtr context); ~Generic() = default; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index a97a8590e9c..32f4b5ef431 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -34,8 +34,8 @@ bool GridSample::isSupportedOperation(const std::shared_ptr& op, return true; } -GridSample::GridSample(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(1))) { +GridSample::GridSample(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp index a35673b0482..041a09d7c71 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp @@ -17,7 +17,7 @@ namespace node { class GridSample : public Node { public: - GridSample(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + GridSample(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override {}; diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 37d9503b391..3ed0be77169 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -27,8 +27,8 @@ bool GRN::isSupportedOperation(const std::shared_ptr& op, st return true; } -GRN::GRN(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +GRN::GRN(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/grn.h b/src/plugins/intel_cpu/src/nodes/grn.h index 1f15423889a..97616b40af2 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.h +++ b/src/plugins/intel_cpu/src/nodes/grn.h @@ -13,7 +13,7 @@ namespace node { class GRN : public Node { public: - GRN(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + GRN(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index 55130d59137..a827bd2f140 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -58,8 +58,8 @@ bool If::isSupportedOperation(const std::shared_ptr& op, std::st return true; } -If::If(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, InternalDynShapeInferFactory()), ovOp(op) { +If::If(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, InternalDynShapeInferFactory()), ovOp(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -71,8 +71,8 @@ void If::getSupportedDescriptors() { const std::shared_ptr& thenBody = ifOp->get_then_body(); const std::shared_ptr& elseBody = ifOp->get_else_body(); - subGraphThen.CreateGraph(thenBody, ext_mng, weightCache, sharedMutex); - subGraphElse.CreateGraph(elseBody, ext_mng, weightCache, sharedMutex); + subGraphThen.CreateGraph(thenBody, context); + subGraphElse.CreateGraph(elseBody, context); const auto &inMapThen = subGraphThen.GetInputNodesMap(); for (const auto ¶m : ifOp->get_then_body()->get_parameters()) { diff --git a/src/plugins/intel_cpu/src/nodes/if.h b/src/plugins/intel_cpu/src/nodes/if.h index 1926747c190..0c39f261535 100644 --- a/src/plugins/intel_cpu/src/nodes/if.h +++ b/src/plugins/intel_cpu/src/nodes/if.h @@ -17,7 +17,7 @@ namespace node { class If : public Node { public: - If(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + If(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initSupportedPrimitiveDescriptors() override; @@ -27,8 +27,6 @@ public: void execute(dnnl::stream strm) override; bool isExecutable() const override { return true; } - void inline setExtManager(const ExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } - protected: void executeDynamicImpl(dnnl::stream strm) override; bool needPrepareParams() const override { return false; }; diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index b1039589ac7..82c15a36f07 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -230,8 +230,8 @@ jit_has_subnormals_base::fn_t jit_has_subnormals_function() { } // namespace -Input::Input(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, PassThroughShapeInferFactory()) { +Input::Input(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, PassThroughShapeInferFactory()) { if (!one_of(op->get_type_info(), v0::Parameter::get_type_info_static(), v0::Constant::get_type_info_static(), @@ -351,6 +351,7 @@ void Input::cloneBlobIfRequired() { + "_" + ptr; }; + auto weightCache = context->getWeightsCache(); if (weightCache) { MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob); memoryPtr = std::const_pointer_cast(ptr); @@ -363,9 +364,12 @@ void Input::cloneBlobIfRequired() { } } -Input::Input(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, - const std::string &type, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(type, name, eng, cache) { +Input::Input(const Shape& shape, + const InferenceEngine::Precision& prc, + const std::string& name, + const std::string& type, + const GraphContext::CPtr context) + : Node(type, name, context) { constant = ConstantType::NoConst; if (getType() == Type::Input) { outputShapes.emplace_back(shape); @@ -376,9 +380,8 @@ Input::Input(const Shape& shape, const InferenceEngine::Precision &prc, const st } } -Input::Input(MemoryDescPtr memDesc, const std::string &name, const std::string &type, - const dnnl::engine &eng, WeightsSharing::Ptr &cache) : - Input(memDesc->getShape(), memDesc->getPrecision(), name, type, eng, cache) { +Input::Input(MemoryDescPtr memDesc, const std::string& name, const std::string& type, const GraphContext::CPtr context) + : Input(memDesc->getShape(), memDesc->getPrecision(), name, type, context) { extMemDesc = memDesc; } diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index f62012befda..2b328af5df9 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -15,11 +15,13 @@ namespace node { class Input : public Node { public: - Input(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); - Input(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, - const std::string &type, const dnnl::engine& eng, WeightsSharing::Ptr &cache); - Input(MemoryDescPtr memDesc, const std::string &name, const std::string &type, const dnnl::engine& eng, - WeightsSharing::Ptr &cache); + Input(const std::shared_ptr& op, const GraphContext::CPtr context); + Input(const Shape& shape, + const InferenceEngine::Precision& prc, + const std::string& name, + const std::string& type, + const GraphContext::CPtr context); + Input(MemoryDescPtr memDesc, const std::string& name, const std::string& type, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/interaction.cpp b/src/plugins/intel_cpu/src/nodes/interaction.cpp index 682b7a69279..80558c87feb 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.cpp +++ b/src/plugins/intel_cpu/src/nodes/interaction.cpp @@ -155,8 +155,8 @@ private: std::unordered_map> emitters; }; -Interaction::Interaction(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Interaction::Interaction(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/interaction.h b/src/plugins/intel_cpu/src/nodes/interaction.h index bf942826b3f..2b62faf4fa3 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.h +++ b/src/plugins/intel_cpu/src/nodes/interaction.h @@ -46,7 +46,7 @@ struct jit_uni_move_scale_kernel { class Interaction : public Node { public: - Interaction(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Interaction(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; void execute(dnnl::stream strm) override; diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index 59f6c630c36..5a37d60313b 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -1576,8 +1576,8 @@ private: }; } // namespace -Interpolate::Interpolate(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, InterpolateShapeInferFactory(op)) { +Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InterpolateShapeInferFactory(op)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Interpolate node with name '" + getName() + "'"; @@ -1920,7 +1920,7 @@ void Interpolate::prepareParams() { return executor; }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, buildExecutor); execPtr = result.first; diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.h b/src/plugins/intel_cpu/src/nodes/interpolate.h index 31495cfdd7a..b7d3c25be96 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.h +++ b/src/plugins/intel_cpu/src/nodes/interpolate.h @@ -102,7 +102,7 @@ public: static constexpr int CUBIC_GRID_LEN = 4; public: - Interpolate(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Interpolate(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp index 5b9ed1ef642..dd204edc83d 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp @@ -27,8 +27,8 @@ bool LogSoftmax::isSupportedOperation(const std::shared_ptr& return true; } -LogSoftmax::LogSoftmax(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +LogSoftmax::LogSoftmax(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.h b/src/plugins/intel_cpu/src/nodes/log_softmax.h index e945c9e3366..f2c02b21610 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.h +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.h @@ -13,8 +13,7 @@ namespace node { class LogSoftmax : public Node { public: - LogSoftmax(const std::shared_ptr& op, - const dnnl::engine& eng, WeightsSharing::Ptr &cache); + LogSoftmax(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index 0cc11582516..94e21315236 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -106,8 +106,8 @@ bool Lrn::isSupportedOperation(const std::shared_ptr& op, st return true; } -Lrn::Lrn(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, PassThroughShapeInferFactory()) { +Lrn::Lrn(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, PassThroughShapeInferFactory()) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "LRN node with name '" + getName() + "'"; @@ -195,7 +195,7 @@ void Lrn::prepareParams() { return std::make_shared(prim_desc); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/lrn.h b/src/plugins/intel_cpu/src/nodes/lrn.h index 5eb731a980c..1d5b047315c 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.h +++ b/src/plugins/intel_cpu/src/nodes/lrn.h @@ -16,7 +16,7 @@ namespace node { class Lrn : public Node { public: - Lrn(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Lrn(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void createDescriptor(const std::vector& inputDesc, diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.cpp b/src/plugins/intel_cpu/src/nodes/mathematics.cpp index 24fd1b524a7..2b42fb706d2 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.cpp +++ b/src/plugins/intel_cpu/src/nodes/mathematics.cpp @@ -39,8 +39,11 @@ bool Math::isSupportedOperation(const std::shared_ptr& op, s return true; } -Math::Math(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, PassThroughShapeInferFactory()), alpha(0.f), beta(0.f), gamma(0.f) { +Math::Math(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, PassThroughShapeInferFactory()), + alpha(0.f), + beta(0.f), + gamma(0.f) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.h b/src/plugins/intel_cpu/src/nodes/mathematics.h index d748b0619b8..bf2cb14d859 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.h +++ b/src/plugins/intel_cpu/src/nodes/mathematics.h @@ -13,7 +13,7 @@ namespace node { class Math : public Node { public: - Math(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Math(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 4446253f8c6..6aa4d6392c7 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -110,8 +110,8 @@ bool MatMul::isSupportedOperation(const std::shared_ptr& op, return true; } -MatMul::MatMul(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), withBiases(false) { +MatMul::MatMul(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), withBiases(false) { std::string errorMessage; errorPrefix = "MatMul node with name '" + getName() + "'"; @@ -565,7 +565,7 @@ void MatMul::prepareParams() { return std::make_shared(prim_desc); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { diff --git a/src/plugins/intel_cpu/src/nodes/matmul.h b/src/plugins/intel_cpu/src/nodes/matmul.h index cbf0481acde..e687cf37595 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.h +++ b/src/plugins/intel_cpu/src/nodes/matmul.h @@ -17,7 +17,7 @@ namespace node { class MatMul : public Node { public: - MatMul(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + MatMul(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void createDescriptor(const std::vector& inputDesc, diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp index 933a7c7702e..44cc5211a79 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp @@ -48,8 +48,8 @@ bool MatrixNms::isSupportedOperation(const std::shared_ptr& return true; } -MatrixNms::MatrixNms(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr& cache) - : Node(op, eng, cache, InternalDynShapeInferFactory()) { +MatrixNms::MatrixNms(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InternalDynShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.h b/src/plugins/intel_cpu/src/nodes/matrix_nms.h index 4fa811b44dd..2a31e72375e 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.h +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.h @@ -25,7 +25,7 @@ enum MatrixNmsDecayFunction { GAUSSIAN, LINEAR }; class MatrixNms : public Node { public: - MatrixNms(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr& cache); + MatrixNms(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 81ff1448cfe..f6f9942ef29 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -48,8 +48,8 @@ bool MemoryOutput::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) , MemoryNode(op) { +MemoryOutput::MemoryOutput(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) , MemoryNode(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -106,8 +106,8 @@ bool MemoryInput::isSupportedOperation(const std::shared_ptr return true; } -MemoryInput::MemoryInput(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Input(op, eng, cache), MemoryNode(op), dataStore(new Memory{eng}) { +MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx) + : Input(op, ctx), MemoryNode(op), dataStore(new Memory{ctx->getEngine()}) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index e433106cee4..ad53f344ccb 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -61,7 +61,7 @@ public: class MemoryOutput : public Node, public MemoryNode { public: - MemoryOutput(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + MemoryOutput(const std::shared_ptr& op, const GraphContext::CPtr context); ~MemoryOutput() override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -86,7 +86,7 @@ public: class MemoryInput : public Input, public MemoryNode { public: - MemoryInput(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr context); ~MemoryInput() override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp index e0473e5f7d3..c0956d6db61 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.cpp +++ b/src/plugins/intel_cpu/src/nodes/mha.cpp @@ -738,8 +738,8 @@ bool MHA::isSupportedOperation(const std::shared_ptr& op, std::s return true; } -MHA::MHA(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +MHA::MHA(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/mha.h b/src/plugins/intel_cpu/src/nodes/mha.h index b442728f777..a2487f440a4 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.h +++ b/src/plugins/intel_cpu/src/nodes/mha.h @@ -127,7 +127,7 @@ struct jit_uni_convert_transpose_kernel { class MHA : public Node { public: - MHA(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + MHA(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp index 216be4cee56..392c6bc4615 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp @@ -42,8 +42,8 @@ bool MultiClassNms::isSupportedOperation(const std::shared_ptr& return true; } -MultiClassNms::MultiClassNms(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr& cache) - : Node(op, eng, cache, InternalDynShapeInferFactory()) { +MultiClassNms::MultiClassNms(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InternalDynShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp index f5a06967248..20046ae4fd0 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp @@ -21,7 +21,7 @@ enum class MulticlassNmsSortResultType { class MultiClassNms : public Node { public: - MultiClassNms(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr& cache); + MultiClassNms(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index 41a6e3d17ee..41aec6c915a 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -1134,8 +1134,8 @@ bool MVN::isSupportedOperation(const std::shared_ptr& op, st return true; } -MVN::MVN(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +MVN::MVN(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -1353,7 +1353,7 @@ void MVN::prepareParams() { return executor; }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); execPtr = result.first; } diff --git a/src/plugins/intel_cpu/src/nodes/mvn.h b/src/plugins/intel_cpu/src/nodes/mvn.h index ebd4feef42e..dc8bae9aa09 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.h +++ b/src/plugins/intel_cpu/src/nodes/mvn.h @@ -80,7 +80,7 @@ struct jit_uni_mvn_kernel { class MVN : public Node { public: - MVN(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + MVN(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index ad3f6f4e4b5..f9f500bbe29 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -575,24 +575,25 @@ bool NonMaxSuppression::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, InternalDynShapeInferFactory()), isSoftSuppressedByIOU(false) { - std::string errorMessage; - if (!isSupportedOperation(op, errorMessage)) { - IE_THROW(NotImplemented) << errorMessage; - } +NonMaxSuppression::NonMaxSuppression(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InternalDynShapeInferFactory()), + isSoftSuppressedByIOU(false) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } - errorPrefix = "NMS layer with name '" + op->get_friendly_name() + "' "; + errorPrefix = "NMS layer with name '" + op->get_friendly_name() + "' "; - if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 6) - IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 6) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); - if (getOriginalOutputsNumber() != 3) - IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); - if (const auto nms9 = std::dynamic_pointer_cast(op)) { - boxEncodingType = static_cast(nms9->get_box_encoding()); - sortResultDescending = nms9->get_sort_result_descending(); + if (const auto nms9 = std::dynamic_pointer_cast(op)) { + boxEncodingType = static_cast(nms9->get_box_encoding()); + sortResultDescending = nms9->get_sort_result_descending(); // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS } else if (const auto nmsIe = std::dynamic_pointer_cast(op)) { boxEncodingType = nmsIe->m_center_point_box ? NMSBoxEncodeType::CENTER : NMSBoxEncodeType::CORNER; diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h index f83140749aa..a67cf092c17 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h @@ -64,7 +64,7 @@ struct jit_uni_nms_kernel { class NonMaxSuppression : public Node { public: - NonMaxSuppression(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + NonMaxSuppression(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp index 68c191fbd00..d87e1e5845d 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp @@ -32,8 +32,8 @@ bool NonZero::isSupportedOperation(const std::shared_ptr& op return true; } -NonZero::NonZero(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, InternalDynShapeInferFactory()) { +NonZero::NonZero(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InternalDynShapeInferFactory()) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "NonZero layer with name '" + getName() + "' "; diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.h b/src/plugins/intel_cpu/src/nodes/non_zero.h index 7086f95da79..448bc637be0 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.h +++ b/src/plugins/intel_cpu/src/nodes/non_zero.h @@ -18,7 +18,7 @@ namespace node { class NonZero : public Node { public: - NonZero(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + NonZero(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 4bc883a96fc..c6cf4c240cf 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -752,8 +752,8 @@ bool NormalizeL2::isSupportedOperation(const std::shared_ptr return true; } -NormalizeL2::NormalizeL2(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, PassThroughShapeInferFactory()) { +NormalizeL2::NormalizeL2(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, PassThroughShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -918,7 +918,7 @@ void NormalizeL2::prepareParams() { return NormalizeL2Executor::getNormalizeL2Executor(key.attrs, key.kernel_attrs, key.dims); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { diff --git a/src/plugins/intel_cpu/src/nodes/normalize.h b/src/plugins/intel_cpu/src/nodes/normalize.h index d5d07285d6b..7eb80e17086 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.h +++ b/src/plugins/intel_cpu/src/nodes/normalize.h @@ -78,7 +78,7 @@ struct jit_uni_normalize_kernel { class NormalizeL2 : public Node { public: - NormalizeL2(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + NormalizeL2(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override {}; diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.cpp b/src/plugins/intel_cpu/src/nodes/one_hot.cpp index babae942763..b4cea4c4416 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.cpp +++ b/src/plugins/intel_cpu/src/nodes/one_hot.cpp @@ -94,8 +94,8 @@ bool OneHot::isSupportedOperation(const std::shared_ptr& op, return true; } -OneHot::OneHot(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, OneHotShapeInferFactory(op)) { +OneHot::OneHot(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, OneHotShapeInferFactory(op)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.h b/src/plugins/intel_cpu/src/nodes/one_hot.h index da0f5d751de..093fa66bfe0 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.h +++ b/src/plugins/intel_cpu/src/nodes/one_hot.h @@ -17,7 +17,7 @@ namespace node { class OneHot : public Node { public: - OneHot(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + OneHot(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index 7389f67ee4e..0ed917b1db6 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -60,8 +60,8 @@ bool Pad::isSupportedOperation(const std::shared_ptr& op, st return true; } -Pad::Pad(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(PADS_BEGIN_ID, PADS_END_ID))) { +Pad::Pad(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(PADS_BEGIN_ID, PADS_END_ID))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/pad.h b/src/plugins/intel_cpu/src/nodes/pad.h index b481983f16b..4c662e3910d 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.h +++ b/src/plugins/intel_cpu/src/nodes/pad.h @@ -14,7 +14,7 @@ namespace node { class Pad : public Node { public: - Pad(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Pad(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 00a6f4aeaee..c8000c68d00 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -139,8 +139,8 @@ bool Pooling::isSupportedOperation(const std::shared_ptr& op, st return true; } -Pooling::Pooling(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Pooling::Pooling(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -378,7 +378,7 @@ void Pooling::prepareParams() { return std::make_shared(prim_desc); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { diff --git a/src/plugins/intel_cpu/src/nodes/pooling.h b/src/plugins/intel_cpu/src/nodes/pooling.h index 108c7407728..51bdeff023a 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.h +++ b/src/plugins/intel_cpu/src/nodes/pooling.h @@ -16,7 +16,7 @@ namespace node { class Pooling : public Node { public: - Pooling(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Pooling(const std::shared_ptr& op, const GraphContext::CPtr context); void createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) override; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.cpp b/src/plugins/intel_cpu/src/nodes/priorbox.cpp index 9f80ab2265d..85231381dec 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox.cpp @@ -88,10 +88,8 @@ bool PriorBox::isSupportedOperation(const std::shared_ptr& o return true; } -PriorBox::PriorBox( - const std::shared_ptr& op, - const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, PriorBoxShapeInferFactory(op)) { +PriorBox::PriorBox(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, PriorBoxShapeInferFactory(op)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.h b/src/plugins/intel_cpu/src/nodes/priorbox.h index 83e1d34e441..d4cae93e0d1 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.h +++ b/src/plugins/intel_cpu/src/nodes/priorbox.h @@ -16,7 +16,7 @@ namespace node { class PriorBox : public Node { public: - PriorBox(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + PriorBox(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp index 34c6546a3d0..a02c4a4aa20 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp @@ -79,10 +79,8 @@ bool PriorBoxClustered::isSupportedOperation(const std::shared_ptr& op, - const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, PriorBoxClusteredShapeInferFactory(op)) { +PriorBoxClustered::PriorBoxClustered(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, PriorBoxClusteredShapeInferFactory(op)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h index a94479d3533..7cfa059f540 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h @@ -16,7 +16,7 @@ namespace node { class PriorBoxClustered : public Node { public: - PriorBoxClustered(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + PriorBoxClustered(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/proposal.cpp b/src/plugins/intel_cpu/src/nodes/proposal.cpp index 5fd2943b0ce..ff46c61fd6c 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.cpp +++ b/src/plugins/intel_cpu/src/nodes/proposal.cpp @@ -93,8 +93,8 @@ bool Proposal::isSupportedOperation(const std::shared_ptr& o return true; } -Proposal::Proposal(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Proposal::Proposal(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/proposal.h b/src/plugins/intel_cpu/src/nodes/proposal.h index 83f87926a92..47778acb9d5 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.h +++ b/src/plugins/intel_cpu/src/nodes/proposal.h @@ -16,7 +16,7 @@ namespace node { class Proposal : public Node { public: - Proposal(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Proposal(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp index 56d9234092a..9347e4b05d4 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp @@ -57,8 +57,8 @@ bool PSROIPooling::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +PSROIPooling::PSROIPooling(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.h b/src/plugins/intel_cpu/src/nodes/psroi_pooling.h index 59cf9227047..7187cf51dc4 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.h @@ -16,7 +16,7 @@ namespace node { class PSROIPooling : public Node { public: - PSROIPooling(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + PSROIPooling(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/range.cpp b/src/plugins/intel_cpu/src/nodes/range.cpp index 0d1050632c7..8fe9e3ff21d 100644 --- a/src/plugins/intel_cpu/src/nodes/range.cpp +++ b/src/plugins/intel_cpu/src/nodes/range.cpp @@ -27,8 +27,8 @@ bool Range::isSupportedOperation(const std::shared_ptr& op, return true; } -Range::Range(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, InternalDynShapeInferFactory()) { +Range::Range(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, InternalDynShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/range.h b/src/plugins/intel_cpu/src/nodes/range.h index f5060183667..91ecbc4495c 100644 --- a/src/plugins/intel_cpu/src/nodes/range.h +++ b/src/plugins/intel_cpu/src/nodes/range.h @@ -13,7 +13,7 @@ namespace node { class Range : public Node { public: - Range(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Range(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/rdft.cpp b/src/plugins/intel_cpu/src/nodes/rdft.cpp index 0ee3026a483..60118d70877 100644 --- a/src/plugins/intel_cpu/src/nodes/rdft.cpp +++ b/src/plugins/intel_cpu/src/nodes/rdft.cpp @@ -76,8 +76,8 @@ static std::vector getDefaultSignalSizes(const VectorDims& inputShape, cons return signalSizes; } -RDFT::RDFT(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +RDFT::RDFT(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -915,7 +915,7 @@ void RDFT::prepareParams() { return executor; }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, buildExecutor); executor = result.first; if (axes.size() > 0 && signalSizes.size() > 0 && outputShapes[0].isStatic()) { diff --git a/src/plugins/intel_cpu/src/nodes/rdft.h b/src/plugins/intel_cpu/src/nodes/rdft.h index da177dec66e..78ca19464cd 100644 --- a/src/plugins/intel_cpu/src/nodes/rdft.h +++ b/src/plugins/intel_cpu/src/nodes/rdft.h @@ -75,7 +75,7 @@ struct RDFTExecutor { class RDFT : public Node { public: - RDFT(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + RDFT(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index b04411cd59d..b42690b4f77 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -1738,8 +1738,8 @@ bool Reduce::isSupportedOperation(const std::shared_ptr& op, return true; } -Reduce::Reduce(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(REDUCE_INDEXES))) { +Reduce::Reduce(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(REDUCE_INDEXES))) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Reduce node with name '" + getName() + "'"; @@ -1918,7 +1918,7 @@ void Reduce::prepareParams() { setPostOps(attr, dst_dims, true); ReduceKey key = {jcp, attr.get_post_ops()}; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { IE_THROW() << errorPrefix << " has not found jit_uni_reduce_post_kernel_f32."; diff --git a/src/plugins/intel_cpu/src/nodes/reduce.h b/src/plugins/intel_cpu/src/nodes/reduce.h index a8adb54cfc8..4c59e6f7cc1 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.h +++ b/src/plugins/intel_cpu/src/nodes/reduce.h @@ -85,7 +85,7 @@ struct jit_uni_reduce_post_kernel { class Reduce : public Node { public: - Reduce(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Reduce(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index ad4426b970f..807f561e2d7 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -17,9 +17,9 @@ namespace ov { namespace intel_cpu { namespace node { -Reference::Reference(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache, +Reference::Reference(const std::shared_ptr& op, const GraphContext::CPtr context, const std::string& errorMessage) : - Node(op, eng, cache, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ngraphOp(op), additionalErrorMessage(errorMessage) { + Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ngraphOp(op), additionalErrorMessage(errorMessage) { if (!op->has_evaluate()) { IE_THROW(NotImplemented) << "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)"; } diff --git a/src/plugins/intel_cpu/src/nodes/reference.h b/src/plugins/intel_cpu/src/nodes/reference.h index eb8b968f20a..976f65eeb0d 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.h +++ b/src/plugins/intel_cpu/src/nodes/reference.h @@ -12,7 +12,7 @@ namespace node { class Reference : public Node { public: - Reference(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache, const std::string& errorMessage); + Reference(const std::shared_ptr& op, const GraphContext::CPtr context, const std::string& errorMessage); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp index dfdbaa6f292..36a9b399036 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp @@ -244,8 +244,8 @@ bool RegionYolo::needPrepareParams() const { return false; } -RegionYolo::RegionYolo(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +RegionYolo::RegionYolo(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.h b/src/plugins/intel_cpu/src/nodes/region_yolo.h index 036854f13f7..5174cd4f856 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.h +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.h @@ -41,7 +41,7 @@ struct jit_uni_logistic_kernel { class RegionYolo : public Node { public: - RegionYolo(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + RegionYolo(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index 9fc31ad5235..9fabf6e11e0 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -55,13 +55,13 @@ bool Reorder::isExecutable() const { return Node::isExecutable() && !isOptimized; } -Reorder::Reorder(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &w_cache) : - Node(op, eng, w_cache, PassThroughShapeInferFactory()) { +Reorder::Reorder(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, PassThroughShapeInferFactory()) { IE_THROW() << "Can't create reorder node from ngraph node"; } -Reorder::Reorder(const std::string& name, const dnnl::engine& eng, WeightsSharing::Ptr &w_cache) : - Node("Reorder", name, eng, w_cache) {} +Reorder::Reorder(const std::string& name, const GraphContext::CPtr context) : + Node("Reorder", name, context) {} void Reorder::getSupportedDescriptors() { if (getParentEdges().size() != 1) @@ -247,7 +247,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, return std::make_shared(pd); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); std::pair, CacheEntryBase::LookUpStatus> result{ nullptr, CacheEntryBase::LookUpStatus::Miss}; diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index 31128453737..25981902de0 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -17,8 +17,8 @@ namespace node { class Reorder : public Node { public: - Reorder(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); - Reorder(const std::string& name, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Reorder(const std::shared_ptr& op, const GraphContext::CPtr context); + Reorder(const std::string& name, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp index 2e0f6a0f416..b2d80f9fd67 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp @@ -27,8 +27,8 @@ bool ReorgYolo::isSupportedOperation(const std::shared_ptr& return true; } -ReorgYolo::ReorgYolo(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ReorgYolo::ReorgYolo(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.h b/src/plugins/intel_cpu/src/nodes/reorg_yolo.h index 567ccd5ec55..9bc2663764f 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.h +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.h @@ -13,7 +13,7 @@ namespace node { class ReorgYolo : public Node { public: - ReorgYolo(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ReorgYolo(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index 62a6e755e82..94819195027 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -34,8 +34,8 @@ bool Reshape::isSupportedOperation(const std::shared_ptr& op return true; } -Reshape::Reshape(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(1))) { +Reshape::Reshape(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/reshape.h b/src/plugins/intel_cpu/src/nodes/reshape.h index a05dcccb94d..3ac59e7c37c 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.h +++ b/src/plugins/intel_cpu/src/nodes/reshape.h @@ -17,7 +17,7 @@ namespace node { class Reshape : public Node { public: - Reshape(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Reshape(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index 6583e1e0ec0..18c2e48c677 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -28,8 +28,8 @@ bool ReverseSequence::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ReverseSequence::ReverseSequence(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h index d650a8c3003..af06caea5e5 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h @@ -13,7 +13,7 @@ namespace node { class ReverseSequence : public Node { public: - ReverseSequence(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ReverseSequence(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 4084a173839..8c6e0d1e36c 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -342,8 +342,8 @@ private: } // namespace -RNN::RNN(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, RnnShapeInferFactory(op)) { +RNN::RNN(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, RnnShapeInferFactory(op)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -1061,7 +1061,7 @@ void RNN::prepareParams() { } }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { diff --git a/src/plugins/intel_cpu/src/nodes/rnn.h b/src/plugins/intel_cpu/src/nodes/rnn.h index 7f02755d8d2..c2b95b3aef3 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.h +++ b/src/plugins/intel_cpu/src/nodes/rnn.h @@ -17,7 +17,7 @@ namespace node { class RNN : public Node { public: - RNN(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + RNN(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; static bool isCell(const std::shared_ptr& op); diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 5f8e039bcac..7aaf292108e 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -674,8 +674,8 @@ bool ROIAlign::isSupportedOperation(const std::shared_ptr& o return true; } -ROIAlign::ROIAlign(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ROIAlign::ROIAlign(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "ROIPooling layer with name '" + getName() + "' "; diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.h b/src/plugins/intel_cpu/src/nodes/roi_align.h index 8aec9482908..ae4c81af724 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.h +++ b/src/plugins/intel_cpu/src/nodes/roi_align.h @@ -67,7 +67,7 @@ struct jit_uni_roi_align_kernel { class ROIAlign : public Node { public: - ROIAlign(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ROIAlign(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index 826c9f42a5f..9cee1b9c583 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -383,8 +383,8 @@ bool ROIPooling::isSupportedOperation(const std::shared_ptr& return true; } -ROIPooling::ROIPooling(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ROIPooling::ROIPooling(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -523,7 +523,7 @@ void ROIPooling::prepareParams() { auto builder = [](const RoiPoolingKey& key) { return ROIPoolingExecutor::createROIPoolingNewExecutor(key.refParams); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); execPtr = result.first; } diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.h b/src/plugins/intel_cpu/src/nodes/roi_pooling.h index af4f731a285..4845a254f4c 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.h @@ -68,7 +68,7 @@ struct jit_uni_roi_pooling_kernel { class ROIPooling : public Node { public: - ROIPooling(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ROIPooling(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index 9f76d2f3e76..01db7c2ea2c 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -35,8 +35,8 @@ bool Roll::isSupportedOperation(const std::shared_ptr& op, s return true; } -Roll::Roll(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Roll::Roll(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { layerErrorPrefix = "Roll layer with name '" + getName() + "'"; diff --git a/src/plugins/intel_cpu/src/nodes/roll.h b/src/plugins/intel_cpu/src/nodes/roll.h index a03a289c906..b57811d19c7 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.h +++ b/src/plugins/intel_cpu/src/nodes/roll.h @@ -14,7 +14,7 @@ namespace node { class Roll : public Node { public: - Roll(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Roll(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp index d1779b56403..0a8f4fd12fa 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp @@ -41,8 +41,8 @@ bool ScatterUpdate::isExecutable() const { return !isInputTensorAtPortEmpty(DATA_ID); } -ScatterUpdate::ScatterUpdate(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), +ScatterUpdate::ScatterUpdate(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)), dataSize(0lu), indicesSize(0lu), axisSize(0lu), dataPrec(Precision::UNSPECIFIED), indicesPrec(Precision::UNSPECIFIED), diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.h b/src/plugins/intel_cpu/src/nodes/scatter_update.h index 619149ead19..36b7a25bdc5 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.h +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.h @@ -22,7 +22,7 @@ enum class ScatterUpdateMode { class ScatterUpdate : public Node { public: - ScatterUpdate(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ScatterUpdate(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/select.cpp b/src/plugins/intel_cpu/src/nodes/select.cpp index 95226aecb66..93367b2a75d 100644 --- a/src/plugins/intel_cpu/src/nodes/select.cpp +++ b/src/plugins/intel_cpu/src/nodes/select.cpp @@ -36,8 +36,8 @@ bool Select::isSupportedOperation(const std::shared_ptr& op, return true; } -Select::Select(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Select::Select(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/select.h b/src/plugins/intel_cpu/src/nodes/select.h index d58ec1217e8..25eb7f2c68c 100644 --- a/src/plugins/intel_cpu/src/nodes/select.h +++ b/src/plugins/intel_cpu/src/nodes/select.h @@ -16,7 +16,7 @@ namespace node { class Select : public Node { public: - Select(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Select(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.cpp b/src/plugins/intel_cpu/src/nodes/shapeof.cpp index 130fbd0b43a..1677c24339f 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.cpp +++ b/src/plugins/intel_cpu/src/nodes/shapeof.cpp @@ -55,8 +55,8 @@ bool ShapeOf::isSupportedOperation(const std::shared_ptr& op return true; } -ShapeOf::ShapeOf(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, ShapeOfShapeInferFactory()) { +ShapeOf::ShapeOf(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, ShapeOfShapeInferFactory()) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "ShapeOf layer with name '" + getName() + "' "; diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.h b/src/plugins/intel_cpu/src/nodes/shapeof.h index 06d652c4b30..045716e3cbb 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.h +++ b/src/plugins/intel_cpu/src/nodes/shapeof.h @@ -16,7 +16,7 @@ namespace node { class ShapeOf : public Node { public: - ShapeOf(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ShapeOf(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index 1114c38d8ac..d946d5508a0 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -65,8 +65,8 @@ bool ShuffleChannels::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +ShuffleChannels::ShuffleChannels(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -106,8 +106,8 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { } // use ncsp as default for non-quantized networks and nspc for quantized - auto firstCreatorType = isInQuantizedGraph ? LayoutType::nspc : LayoutType::ncsp; - auto secondCreatorType = isInQuantizedGraph ? LayoutType::ncsp : LayoutType::nspc; + auto firstCreatorType = context->isGraphQuantized() ? LayoutType::nspc : LayoutType::ncsp; + auto secondCreatorType = context->isGraphQuantized() ? LayoutType::ncsp : LayoutType::nspc; addSupportedPrimDesc({{firstCreatorType, precision}}, {{firstCreatorType, precision}}, @@ -158,7 +158,7 @@ void ShuffleChannels::prepareParams() { attrs.srcDims = srcMemPtr->getStaticDims(); attrs.srcBlockedDims = srcMemPtr->GetDescWithType()->getBlockDims(); - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(attrs, builder); if (!result.first) { IE_THROW() << "ShuffleChannelsExecutor was not found for node " << getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.h b/src/plugins/intel_cpu/src/nodes/shuffle_channels.h index 027aa6e1a19..6dbfab1d836 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.h +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.h @@ -17,7 +17,7 @@ namespace node { class ShuffleChannels : public Node { public: - ShuffleChannels(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + ShuffleChannels(const std::shared_ptr& op, const GraphContext::CPtr context); ~ShuffleChannels() override = default; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp index 7aa22649243..6ffff101aa1 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp @@ -66,8 +66,8 @@ bool SoftMax::isSupportedOperation(const std::shared_ptr& op return true; } -SoftMax::SoftMax(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, PassThroughShapeInferFactory()) { +SoftMax::SoftMax(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, PassThroughShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -174,7 +174,7 @@ void SoftMax::prepareParams() { return std::make_shared(prim_desc); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { diff --git a/src/plugins/intel_cpu/src/nodes/softmax.h b/src/plugins/intel_cpu/src/nodes/softmax.h index 23d65f2560a..b0e846de5f7 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.h +++ b/src/plugins/intel_cpu/src/nodes/softmax.h @@ -16,7 +16,7 @@ namespace node { class SoftMax : public Node { public: - SoftMax(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + SoftMax(const std::shared_ptr& op, const GraphContext::CPtr context); void initOptimalPrimitiveDescriptor() override; void createDescriptor(const std::vector& inputDesc, diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 84c1d7fe3db..07d8d85f34e 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -38,8 +38,8 @@ bool SpaceToBatch::isSupportedOperation(const std::shared_ptr& op, const dnnl::engine& eng, - WeightsSharing::Ptr &cache) : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) { +SpaceToBatch::SpaceToBatch(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2, 3))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.h b/src/plugins/intel_cpu/src/nodes/space_to_batch.h index 5e328de98a1..8e10ffa1a6e 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.h +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.h @@ -16,7 +16,7 @@ namespace node { class SpaceToBatch : public Node { public: - SpaceToBatch(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + SpaceToBatch(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index f0d1ee796d0..afa720df902 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -72,10 +72,8 @@ bool SpaceToDepth::isSupportedOperation(const std::shared_ptr& op, - const dnnl::engine& eng, - WeightsSharing::Ptr& cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +SpaceToDepth::SpaceToDepth(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -200,7 +198,7 @@ void SpaceToDepth::prepareParams() { return std::make_shared(key); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(attrs, builder); if (!result.first) { IE_THROW() << "SpaceToDepthExecutor was not found for node " << getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.h b/src/plugins/intel_cpu/src/nodes/space_to_depth.h index 8c4e68c4221..530938601c1 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.h +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.h @@ -15,7 +15,7 @@ namespace node { class SpaceToDepth : public Node { public: - SpaceToDepth(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + SpaceToDepth(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index a1174c698ae..bae3a568907 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -46,8 +46,8 @@ bool Split::isSupportedOperation(const std::shared_ptr& op, return true; } -Split::Split(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(1, 2))) { +Split::Split(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index 68b24934728..18080c71fc9 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -14,7 +14,7 @@ namespace node { class Split : public Node { public: - Split(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Split(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp index 3402b069b38..690a39f564c 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp @@ -38,8 +38,8 @@ bool StridedSlice::isSupportedOperation(const std::shared_ptr& o return true; } -StridedSlice::StridedSlice(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(1, 2, 3, 4))) { +StridedSlice::StridedSlice(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, PortMask(1, 2, 3, 4))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.h b/src/plugins/intel_cpu/src/nodes/strided_slice.h index 6ae6210d1cc..73996e9431e 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.h +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.h @@ -14,7 +14,7 @@ namespace node { class StridedSlice : public Node { public: - StridedSlice(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + StridedSlice(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index afe66545c66..2612d9aef1b 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -36,8 +36,8 @@ namespace intel_cpu { namespace node { -Snippet::Snippet(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Snippet::Snippet(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ? dnnl::impl::cpu::x64::avx512_core : dnnl::impl::cpu::x64::avx2; original_snippet = ov::as_type_ptr(op); @@ -55,10 +55,7 @@ void Snippet::copy_snippet() { std::shared_ptr new_body = nullptr; // Ticket[79554]: TypeRelaxed ops aren't thread safe so we use mutex to avoid collision in throughput mode if (original_snippet->has_type_relaxed_ops()) { - if (!sharedMutex) { - IE_THROW() << "Subgraph doesn't have shared mutex"; - } - std::lock_guard lock(*sharedMutex); + std::lock_guard lock(*context->getSharedMutex()); new_body = ov::clone_model(*original_snippet->body_ptr()); } else { new_body = ov::clone_model(*original_snippet->body_ptr()); diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h index 9c302555fb6..9b9b06036fb 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.h +++ b/src/plugins/intel_cpu/src/nodes/subgraph.h @@ -24,7 +24,7 @@ namespace node { /// precision: fp32 class Snippet : public Node { public: - Snippet(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Snippet(const std::shared_ptr& op, const GraphContext::CPtr context); ~Snippet() override = default; void getSupportedDescriptors() override {}; diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 5e6c2603d12..c925653266b 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -350,8 +350,8 @@ bool TensorIterator::isSupportedOperation(const std::shared_ptr& return true; } -TensorIterator::TensorIterator(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, InternalDynShapeInferFactory()), ngraphOp(op) { +TensorIterator::TensorIterator(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, InternalDynShapeInferFactory()), ngraphOp(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -364,7 +364,7 @@ void TensorIterator::getSupportedDescriptors() { THROW_ERROR << "cannot be cast to ov::op::util::SubGraphOp"; } const std::shared_ptr body = tiOp->get_function(); - sub_graph.CreateGraph(body, ext_mng, weightCache, sharedMutex); + sub_graph.CreateGraph(body, context); const auto &inMap = sub_graph.GetInputNodesMap(); for (const auto ¶m : tiOp->get_function()->get_parameters()) { diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h index 659fcdce852..51180740803 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h @@ -96,7 +96,7 @@ private: class TensorIterator : public Node { public: - TensorIterator(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + TensorIterator(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initSupportedPrimitiveDescriptors() override; @@ -106,8 +106,6 @@ public: void execute(dnnl::stream strm) override; bool isExecutable() const override { return true; } - void setExtManager(const ExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } - protected: // needShapeInfer() should return false // because we cannot resolve the output dimensions before the inference is completed diff --git a/src/plugins/intel_cpu/src/nodes/tile.cpp b/src/plugins/intel_cpu/src/nodes/tile.cpp index 0fcbb53a7e7..146cb3d7279 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.cpp +++ b/src/plugins/intel_cpu/src/nodes/tile.cpp @@ -32,8 +32,8 @@ bool Tile::isSupportedOperation(const std::shared_ptr& op, std:: return true; } -Tile::Tile(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(TILE_REPEATS))) { +Tile::Tile(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, NgraphShapeInferFactory(op, PortMask(TILE_REPEATS))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/tile.h b/src/plugins/intel_cpu/src/nodes/tile.h index 172ecf6adab..a1e47493bcd 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.h +++ b/src/plugins/intel_cpu/src/nodes/tile.h @@ -14,7 +14,7 @@ namespace node { class Tile : public Node, public TileBroadcastCommon { public: - Tile(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Tile(const std::shared_ptr& op, const GraphContext::CPtr context); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index f9229f5fb77..691f260996c 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -1821,8 +1821,8 @@ bool TopK::isSupportedOperation(const std::shared_ptr& op, s return true; } -TopK::TopK(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, PortMask(TOPK_K))) { +TopK::TopK(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, PortMask(TOPK_K))) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "TopK layer with name '" + getName() + "'"; diff --git a/src/plugins/intel_cpu/src/nodes/topk.h b/src/plugins/intel_cpu/src/nodes/topk.h index cc26e6cdd95..8091bf8732e 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.h +++ b/src/plugins/intel_cpu/src/nodes/topk.h @@ -78,7 +78,7 @@ struct jit_uni_topk_kernel { class TopK : public Node { public: - TopK(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + TopK(const std::shared_ptr& op, const GraphContext::CPtr context); ~TopK() override = default; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 5e16e08d1c5..9c49e372606 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -61,8 +61,8 @@ bool Transpose::isSupportedOperation(const std::shared_ptr& op, return true; } -Transpose::Transpose(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) - : Node(op, eng, cache, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { +Transpose::Transpose(const std::shared_ptr& op, const GraphContext::CPtr context) + : Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -177,7 +177,7 @@ void Transpose::prepareParams() { return std::make_shared(pd); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); if (!result.first) { @@ -208,7 +208,7 @@ void Transpose::prepareParams() { return std::make_shared(key); }; - auto cache = getRuntimeCache(); + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(params, builder); if (!result.first) { diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h index 2ba726326c1..290a9280d1d 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.h +++ b/src/plugins/intel_cpu/src/nodes/transpose.h @@ -17,7 +17,7 @@ namespace node { class Transpose : public Node { public: - Transpose(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Transpose(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 62463cd6643..e9e9c2409ac 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -32,8 +32,8 @@ bool Unique::isSupportedOperation(const std::shared_ptr& op, std return true; } -Unique::Unique(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : - Node(op, eng, cache, InternalDynShapeInferFactory()) { +Unique::Unique(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, InternalDynShapeInferFactory()) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; diff --git a/src/plugins/intel_cpu/src/nodes/unique.hpp b/src/plugins/intel_cpu/src/nodes/unique.hpp index c96c8d9d8a6..b4d451dd523 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.hpp +++ b/src/plugins/intel_cpu/src/nodes/unique.hpp @@ -16,7 +16,7 @@ namespace node { class Unique : public Node { public: - Unique(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + Unique(const std::shared_ptr& op, const GraphContext::CPtr context); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override {}; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 920a9042248..33e76c413ba 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -656,6 +656,9 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma IE_THROW() << "Only ngraph-based models are supported!"; } + auto context = + std::make_shared(conf, extensionManager, fake_w_cache, std::make_shared(), false); + auto supported = GetSupportedNodes(model, [&](std::shared_ptr& model) { Transformations transformation(model, enableLPT, enableSnippets, conf.enforceBF16, isLegacyAPI(), engConfig); @@ -665,7 +668,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma [&](const std::shared_ptr& op) { std::unique_ptr ptr; try { - ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache)); + ptr.reset(Node::factory().create(op, context)); } catch (const InferenceEngine::Exception&) { return false; } diff --git a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp index cecb75c615b..2561ca7eb68 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp @@ -108,20 +108,24 @@ class ReorderCPUTestGraph { public: void buildReorderGraph(const ov::intel_cpu::CpuBlockedMemoryDesc& inputDesc, const ov::intel_cpu::CpuBlockedMemoryDesc& outputDesc) { - const dnnl::engine cpuEngine = {dnnl::engine::kind::cpu, 0}; - ov::intel_cpu::WeightsSharing::Ptr weightsCache; + Config conf; + conf.rtCacheCapacity = 100; + auto context = std::make_shared(conf, + nullptr, + std::make_shared(), + std::make_shared(), + false); + const dnnl::engine cpuEngine = context->getEngine(); inputNode = std::make_shared(inputDesc.clone(), "Reorder_Input", "Parameter", - cpuEngine, - weightsCache); - reorderNode = std::make_shared("Reorder", cpuEngine, weightsCache); + context); + reorderNode = std::make_shared("Reorder", context); outputNode = std::make_shared(outputDesc.clone(), "Reorder_Output", "Result", - cpuEngine, - weightsCache); + context); parentEdge = std::make_shared(inputNode, reorderNode, 0, 0); childEdge = std::make_shared(reorderNode, outputNode, 0, 0); @@ -130,8 +134,6 @@ public: reorderNode->addEdge(parentEdge); reorderNode->addEdge(childEdge); - auto rtParamsCache = std::make_shared(100); - auto parentMemory = std::make_shared(cpuEngine); auto childMemory = std::make_shared(cpuEngine); parentMemory->Create(inputDesc, nullptr); @@ -141,7 +143,6 @@ public: childEdge->reuse(childMemory); reorderNode->setDescs(inputDesc, outputDesc); - reorderNode->setRuntimeCache(rtParamsCache); std::array, 3> nodes{inputNode, reorderNode, outputNode}; for (auto& n : nodes) { n->init();