[CPU] Execute constants in order with the create primitives calls (#16795)

This commit is contained in:
Maksim Kutakov
2023-04-20 12:22:57 +02:00
committed by GitHub
parent 0f7e6de346
commit 70c3979602
4 changed files with 27 additions and 36 deletions

View File

@@ -386,7 +386,7 @@ void Graph::InitGraph() {
if (node->isDynamicNode()) {
haveDynNodes = true;
if (node->outputShapeDataDependency() ||
// WA: for convolution plus summ(broadcast). Due to the fact that a convolution with sum use the same memory for second sum term and the output
// WA: for convolution plus sum(broadcast). Due to the fact that a convolution with sum use the same memory for second sum term and the output
// tensors (inPlace) resizing the output tensor, may lead to reallocation of this second term memory and possible data lost. The reallocation
// may happen when the second term shape is broadcasted to the output tensor shape. To avoid the data loss, we have a special processing for
// such cases inside the convolution node, but it works properly only when dynamic shapes inference, preparation and execution a called
@@ -398,25 +398,25 @@ void Graph::InitGraph() {
}
// In case of dynamic shapes, tensors may be resized due to the shapes variations.
// If the input tensor is included to memory reuse, that means its memory manager is shared with other tensors in the graph, which in turn may cause data
// loss when one of the tensor dow the graph requested mem resize, while the input data have not been yet read by the consumers. To avoid such situations
// we disalbe io mem reuse for the case of dynamic shapes.
// If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data
// loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations
// we disable io mem reuse for the case of dynamic shapes.
if (haveDynNodes) {
this->reuse_io_tensors = false;
}
Allocate();
CreatePrimitives();
CreatePrimitivesAndExecConstants();
#ifndef CPU_DEBUG_CAPS
for (auto &graphNode : graphNodes) {
graphNode->cleanup();
}
#endif
ExtractConstantAndExecutableNodes();
ExecuteConstantNodesOnly();
ExtractExecutableNodes();
status = haveDynNodes ? Status::ReadyDynamic : Status::ReadyStatic;
}
@@ -483,12 +483,10 @@ void Graph::InitOptimalPrimitiveDescriptors() {
}
}
void Graph::ExtractConstantAndExecutableNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExtractConstantAndExecutableNodes");
void Graph::ExtractExecutableNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExtractExecutableNodes");
for (const auto& graphNode : graphNodes) {
if (graphNode->isConstant()) {
constantGraphNodes.emplace_back(graphNode);
} else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable()) || graphNode->isDynamicNode()) {
if ((!graphNode->isConstant() && CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable())) || graphNode->isDynamicNode()) {
/* @todo
* Revise implementation.
* With current way it is possible that with debug_caps enabled
@@ -503,8 +501,8 @@ void Graph::ExtractConstantAndExecutableNodes() {
}
}
void Graph::ExecuteConstantNodesOnly() const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExecuteConstantNodesOnly");
void Graph::CreatePrimitivesAndExecConstants() const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::CreatePrimitivesAndExecConstants");
dnnl::stream stream(getEngine());
using shared_memory_ptr = WeightsSharing::SharedMemory::Ptr;
@@ -531,7 +529,17 @@ void Graph::ExecuteConstantNodesOnly() const {
return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs);
};
for (const auto &node : constantGraphNodes) {
for (const auto &node : graphNodes) {
{
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.createPrimitive);
DEBUG_LOG(*node);
node->createPrimitive();
}
if (!node->isConstant()) {
continue;
}
if (context->getWeightsCache()) {
auto sharedOutputs = acquireSharedOutputs(node);
@@ -884,15 +892,6 @@ void Graph::Allocate() {
for (auto& edge : graphEdges) edge->validate();
}
void Graph::CreatePrimitives() {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::CreatePrimitives");
for (auto& node : graphNodes) {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.createPrimitive);
DEBUG_LOG(*node);
node->createPrimitive();
}
}
void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) {
if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready.";

View File

@@ -230,10 +230,9 @@ protected:
void InitEdges();
void Allocate();
void AllocateWithReuse();
void CreatePrimitives();
void ExtractConstantAndExecutableNodes();
void ExtractExecutableNodes();
void ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const;
void ExecuteConstantNodesOnly() const;
void CreatePrimitivesAndExecConstants() const;
void InferStatic(InferRequestBase* request);
void InferDynamic(InferRequestBase* request);
@@ -248,9 +247,8 @@ private:
std::map<std::string, NodePtr> outputNodesMap;
// these node pointers (from graphNodes) are to avoid regular checking for
// constantness of nodes in ExecuteConstantNodesOnly, Infer methods and calls of
// constantness of nodes in Infer methods and calls of
// non-executable (optimized out) nodes, such as Input, Reshape, etc.
std::vector<NodePtr> constantGraphNodes;
std::vector<NodePtr> executableGraphNodes;
std::unordered_map<Node*, size_t> syncNodesInds;

View File

@@ -1501,7 +1501,7 @@ void Convolution::prepareParams() {
auto it = primArgs.find(DNNL_ARG_WEIGHTS);
if (it == primArgs.end() || !prevExecPtr ||
!execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) {
pendingConstWeightReorder = true;
primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive();
}
} else {
// non-const weight will be reordered by executor on every exec
@@ -1558,11 +1558,6 @@ void Convolution::execute(dnnl::stream strm) {
IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled";
}
if (pendingConstWeightReorder) {
primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive();
pendingConstWeightReorder = false;
}
execPtr->exec(primArgs, strm);
}

View File

@@ -98,7 +98,6 @@ private:
const dnnl::engine& engine,
bool constWeight);
};
bool pendingConstWeightReorder = false;
void prepareParams() override;
void execute(dnnl::stream strm) override;