Constant nodes outputs sharing between streams (#4561)

This commit is contained in:
Vladislav Volkov 2021-03-03 16:57:15 +03:00 committed by GitHub
parent 430adbc191
commit e0573e7e7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 253 additions and 86 deletions

View File

@ -27,6 +27,10 @@ const MKLDNNNodePtr MKLDNNEdge::getChild() const {
return childPtr;
}
bool MKLDNNEdge::isUseExternalMemory() const {
return externalMemoryPtr;
}
bool MKLDNNEdge::isDropped() {
bool not_in_parent = true;
bool not_in_child = true;
@ -164,6 +168,31 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
status = Status::Allocated;
}
std::string MKLDNNEdge::name() const {
auto childPtr = getChild();
auto parentPtr = getParent();
return childPtr->getName() + "<->" + parentPtr->getName();
}
void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
if (status != Status::NeedAllocation)
return;
if (weightsCache) {
auto alloc = [this] () {
allocate();
return memoryPtr;
};
auto ptr = weightsCache->findOrCreate(name(), alloc, false);
memoryPtr = *ptr;
externalMemoryPtr = true;
status = Status::Allocated;
} else {
allocate();
}
}
void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) {
if (state == Status::NotAllocated) {
THROW_IE_EXCEPTION << "Incorrect behaviour! Use method sharedMemFrom()";
@ -570,6 +599,7 @@ void MKLDNNEdge::validate() {
getParent();
getChild();
getDims();
if (status != Status::Allocated) {
THROW_IE_EXCEPTION << "Error memory is not allocated!";
}
@ -585,6 +615,10 @@ MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const {
return memoryFromEdgePtr;
}
MKLDNNEdgePtr MKLDNNEdge::getSharedEdge(std::nothrow_t) const {
return memoryFromEdge.lock();
}
void MKLDNNEdge::init() {
if (status != Status::NeedAllocation && status != Status::Uninitialized)
return;

View File

@ -8,6 +8,7 @@
#include "mkldnn_memory.h"
#include "mkldnn_dims.h"
#include "mkldnn_weights_cache.hpp"
#include "mkldnn/ie_mkldnn.h"
#include <map>
@ -42,9 +43,10 @@ public:
void changeStatus(Status state);
virtual void init();
virtual void allocate(const void* mem_ptr = nullptr);
virtual void validate();
void init();
void allocate(const void* mem_ptr = nullptr);
void externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache);
void validate();
void drop();
const std::shared_ptr<MKLDNNNode> getParent() const;
@ -59,12 +61,17 @@ public:
bool needReorder();
bool isDropped();
bool isUseExternalMemory() const;
int getInputNum();
int getOutputNum();
void sharedMemFrom(const MKLDNNEdgePtr& edge);
MKLDNNEdgePtr getSharedEdge() const;
MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
private:
std::string name() const;
private:
std::weak_ptr<MKLDNNNode> parent;
@ -72,6 +79,7 @@ private:
int parent_port;
int child_port;
bool externalMemoryPtr = false;
MKLDNNEdgeWeakPtr memoryFromEdge;
MKLDNNDims dims;
MKLDNNMemoryPtr memoryPtr;

View File

@ -64,6 +64,9 @@ using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
typedef std::unordered_set<MKLDNNEdgePtr> edge_cluster_t;
typedef std::vector<edge_cluster_t> edge_clusters_t;
template<typename NET>
void MKLDNNGraph::ApplyUnrollPasses(NET &net) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses");
@ -445,10 +448,40 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
void MKLDNNGraph::ExecuteConstantNodesOnly() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
mkldnn::stream stream(eng);
using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr;
auto acquireSharedOutputs = [this](MKLDNNNodePtr & graphNode) {
std::vector<shared_memory_ptr> outputs;
for (size_t i = 0; i < graphNode->getChildEdges().size(); ++i) {
auto edgePtr = graphNode->getChildEdgeAt(i);
if (edgePtr && edgePtr->isUseExternalMemory()) {
outputs.emplace_back(weightsCache->get(edgePtr->name()));
}
}
return outputs;
};
for (auto &graphNode : graphNodes) {
if (!graphNode->isConstant())
continue;
graphNode->execute(stream);
if (weightsCache) {
auto sharedOutputs = acquireSharedOutputs(graphNode);
if (std::find_if(sharedOutputs.begin(), sharedOutputs.end(),
[](const shared_memory_ptr & ptr) {
return !ptr->isValid();
}) != sharedOutputs.end()) {
graphNode->execute(stream);
for (auto & output : sharedOutputs)
output->valid(true);
}
} else {
graphNode->execute(stream);
}
}
}
@ -526,81 +559,85 @@ static inline bool isConstOutput(MKLDNNEdgePtr edge) {
return edge->getParent()->isConstant() && !edge->getChild()->isConstant();
}
static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graphEdges) {
typedef std::unordered_map<MKLDNNEdgePtr, size_t> edge_cluster_idx_map_t;
edge_clusters_t edge_clusters;
edge_cluster_idx_map_t edge_cluster_indices;
for (auto &edge : graphEdges) {
auto edge_it = edge_cluster_indices.find(edge);
if (edge_it != edge_cluster_indices.end())
continue; // edge is visited
size_t cluster_idx = edge_clusters.size();
MKLDNNEdgePtr last_shared_edge = nullptr;
// find cluster index
for (auto shared_edge = edge->getSharedEdge(std::nothrow);
shared_edge;
shared_edge = shared_edge->getSharedEdge(std::nothrow)) {
auto shared_edge_it = edge_cluster_indices.find(shared_edge);
if (shared_edge_it != edge_cluster_indices.end()) {
cluster_idx = shared_edge_it->second;
last_shared_edge = shared_edge;
break;
}
}
// add shared edges to cluster
edge_cluster_indices.emplace(edge, cluster_idx);
if (cluster_idx == edge_clusters.size())
edge_clusters.emplace_back(edge_cluster_t { edge });
else
edge_clusters[cluster_idx].emplace(edge);
for (auto shared_edge = edge->getSharedEdge(std::nothrow);
shared_edge != last_shared_edge;
shared_edge = shared_edge->getSharedEdge(std::nothrow)) {
edge_cluster_indices.emplace(shared_edge, cluster_idx);
edge_clusters[cluster_idx].emplace(shared_edge);
}
}
return edge_clusters;
}
void MKLDNNGraph::AllocateWithReuse() {
std::vector<std::vector<MKLDNNEdgePtr>> edge_clasters;
edge_clusters_t edge_clusters = findEdgeClusters(graphEdges);
// detect edge clusters which are view on one.
for (auto &edge : graphEdges) {
MKLDNNEdgePtr par = (edge->getStatus() == MKLDNNEdge::Status::NotAllocated)
? edge->getSharedEdge()
: nullptr;
if (par) {
bool found = false;
for (auto &claster : edge_clasters) {
for (auto &element : claster) {
if (element == par) {
if (std::find(claster.begin(), claster.end(), edge) == claster.end())
claster.push_back(edge);
found = true;
break;
}
}
size_t edge_clusters_count = edge_clusters.size();
for (size_t i = 0; i < edge_clusters_count;) {
auto &cluster = edge_clusters[i];
bool erase = false;
for (auto &edge : cluster) {
if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation
&& edge->getParent()->isConstant()) {
edge->externalAllocate(weightsCache);
erase = true;
}
if (!found)
edge_clasters.push_back({par, edge});
}
if (erase) {
std::swap(edge_clusters[i], edge_clusters[edge_clusters_count - 1]);
--edge_clusters_count;
} else {
bool found = false;
for (auto &claster : edge_clasters) {
for (auto &element : claster) {
if (element == edge) {
found = true;
break;
}
}
}
if (!found)
edge_clasters.push_back({edge});
++i;
}
}
//======= WA. getSharedEdge() returns not identical edges ============
// Will try to merge clasters with matched edges
for (auto &edge : graphEdges) {
std::vector<decltype(&edge_clasters[0])> to_merge;
for (auto &claster : edge_clasters)
if (std::find(claster.begin(), claster.end(), edge) != claster.end())
to_merge.push_back(&claster);
if (to_merge.size() > 1) {
// Merge clasters
auto base_classter = to_merge[0];
for (int i = 1; i < to_merge.size(); i++) {
base_classter->insert(base_classter->end(),
to_merge[i]->begin(), to_merge[i]->end());
to_merge[i]->clear();
}
// remove duplicates in merged claster
std::sort(base_classter->begin(), base_classter->end());
base_classter->erase(std::unique(base_classter->begin(), base_classter->end()),
base_classter->end() );
// remove empty clasters
edge_clasters.erase(std::remove_if(edge_clasters.begin(), edge_clasters.end(),
[] ( std::vector<MKLDNNEdgePtr> &cls) { return cls.empty(); }),
edge_clasters.end());
}
}
//======= End of WA ============
edge_clusters.resize(edge_clusters_count);
const int64_t alignment = 32; // 32 bytes
std::vector<MemorySolver::Box> boxes(edge_clasters.size());
for (int i = 0; i < edge_clasters.size(); i++) {
std::vector<MemorySolver::Box> boxes(edge_clusters.size());
for (int i = 0; i < edge_clusters.size(); i++) {
MemorySolver::Box &box = boxes[i];
box = { std::numeric_limits<int>::max(), 0, 0, i };
for (auto &edge : edge_clasters[i]) {
for (auto &edge : edge_clusters[i]) {
int e_start = edge->getParent()->execIndex;
int e_finish = edge->getChild()->execIndex;
@ -630,7 +667,7 @@ void MKLDNNGraph::AllocateWithReuse() {
// So we need it untouchable during all execution time
// -1 is a place holder for a max timestamp.
bool isConst = false, isOutput = false, isInput = false;
for (auto &edge : edge_clasters[i]) {
for (auto &edge : edge_clusters[i]) {
isConst |= isConstOutput(edge);
isOutput |= edge->getChild()->getType() == Output;
isInput |= edge->getParent()->getType() == Input;
@ -654,11 +691,15 @@ void MKLDNNGraph::AllocateWithReuse() {
memWorkspace = std::make_shared<MKLDNNMemory>(eng);
memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::I8, {total_size}, Layout::C)));
if (edge_clusters.empty())
return;
auto* workspace_ptr = static_cast<int8_t*>(memWorkspace->GetData());
for (int i = 0; i < edge_clasters.size(); i++) {
for (int i = 0; i < edge_clusters.size(); i++) {
int count = 0;
for (auto &edge : edge_clasters[i]) {
for (auto &edge : edge_clusters[i]) {
if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation) {
int64_t offset = memSolver.getOffset(i);
// !! Fallback to individual memory allocation !!

View File

@ -766,7 +766,7 @@ void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::pri
+ "_" + std::to_string(internalBlob->byteSize())
+ "_" + std::to_string(data_hash);
ptr = weightCache->findOrCreate(string_hash, create);
ptr = *weightCache->findOrCreate(string_hash, create);
} else {
ptr = create();
}

View File

@ -11,6 +11,66 @@ namespace MKLDNNPlugin {
const SimpleDataHash MKLDNNWeightsSharing::simpleCRC;
MKLDNNWeightsSharing::MKLDNNSharedMemory::MKLDNNSharedMemory(
std::unique_lock<std::mutex> && lock,
const MKLDNNMemoryInfo::Ptr & memory,
MKLDNNMemoryPtr newPtr)
: lock(std::move(lock))
, memory(memory)
, newPtr(newPtr)
{}
MKLDNNWeightsSharing::MKLDNNSharedMemory::operator MKLDNNMemoryPtr() const {
return memory->sharedMemory.lock();
}
bool MKLDNNWeightsSharing::MKLDNNSharedMemory::isValid() const {
return memory->valid;
}
void MKLDNNWeightsSharing::MKLDNNSharedMemory::valid(bool b) {
memory->valid = b;
}
MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::findOrCreate(
const std::string& key,
std::function<MKLDNNMemoryPtr(void)> create,
bool valid) {
std::unique_lock<std::mutex> lock(guard);
auto found = sharedWeights.find(key);
MKLDNNMemoryInfo::Ptr ptr;
MKLDNNMemoryPtr newPtr;
if (found == sharedWeights.end()
|| !(ptr = found->second)
|| ptr->sharedMemory.expired()) {
newPtr = create();
ptr = std::make_shared<MKLDNNMemoryInfo>(newPtr, valid);
sharedWeights[key] = ptr;
}
return std::make_shared<MKLDNNSharedMemory>(ptr->valid
? std::unique_lock<std::mutex>(ptr->guard, std::defer_lock)
: std::unique_lock<std::mutex>(ptr->guard), ptr, newPtr);
}
MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::get(const std::string& key) const {
std::unique_lock<std::mutex> lock(guard);
auto found = sharedWeights.find(key);
MKLDNNMemoryInfo::Ptr ptr;
if (found == sharedWeights.end()
|| !(ptr = found->second)
|| ptr->sharedMemory.expired())
THROW_IE_EXCEPTION << "Unknown shared memory with key " << key;
return std::make_shared<MKLDNNSharedMemory>(ptr->valid
? std::unique_lock<std::mutex>(ptr->guard, std::defer_lock)
: std::unique_lock<std::mutex>(ptr->guard), ptr);
}
NumaNodesWeights::NumaNodesWeights() {
for (auto numa_id : InferenceEngine::getAvailableNUMANodes())
_cache_map[numa_id] = std::make_shared<MKLDNNWeightsSharing>();

View File

@ -52,25 +52,51 @@ protected:
* Is a thread safe
*/
class MKLDNNWeightsSharing {
struct MKLDNNMemoryInfo {
typedef std::shared_ptr<MKLDNNMemoryInfo> Ptr;
MKLDNNMemoryInfo(MKLDNNMemoryPtr memoryPtr, bool valid)
: sharedMemory(memoryPtr)
, valid(valid)
{}
std::mutex guard;
std::weak_ptr<MKLDNNMemory> sharedMemory;
bool valid;
};
public:
typedef std::shared_ptr<MKLDNNWeightsSharing> Ptr;
MKLDNNMemoryPtr findOrCreate(const std::string& name_hash,
std::function<MKLDNNMemoryPtr(void)> create) {
std::unique_lock<std::mutex> lock(guard);
auto found = sharedWeights.find(name_hash);
MKLDNNMemoryPtr ptr;
if (found == sharedWeights.end() || !(ptr = found->second.lock())) {
ptr = create();
sharedWeights[name_hash] = ptr;
}
return ptr;
}
class MKLDNNSharedMemory {
public:
typedef std::shared_ptr<MKLDNNSharedMemory> Ptr;
MKLDNNSharedMemory(std::unique_lock<std::mutex> && lock,
const MKLDNNMemoryInfo::Ptr & memory,
MKLDNNMemoryPtr newPtr = nullptr);
operator MKLDNNMemoryPtr() const;
bool isValid() const;
void valid(bool b);
private:
std::unique_lock<std::mutex> lock;
MKLDNNMemoryInfo::Ptr memory;
MKLDNNMemoryPtr newPtr;
};
MKLDNNSharedMemory::Ptr findOrCreate(const std::string& key,
std::function<MKLDNNMemoryPtr(void)> create,
bool valid = true);
MKLDNNSharedMemory::Ptr get(const std::string& key) const;
static const SimpleDataHash& GetHashFunc () { return simpleCRC; }
protected:
std::unordered_map<std::string, std::weak_ptr<MKLDNNMemory>> sharedWeights;
std::mutex guard;
mutable std::mutex guard;
std::unordered_map<std::string, MKLDNNMemoryInfo::Ptr> sharedWeights;
static const SimpleDataHash simpleCRC;
};

View File

@ -33,4 +33,3 @@ private:
};
} // namespace MKLDNNPlugin

View File

@ -64,4 +64,3 @@ private:
};
} // namespace MKLDNNPlugin