Constant nodes outputs sharing between streams (#4561)
This commit is contained in:
parent
430adbc191
commit
e0573e7e7b
@ -27,6 +27,10 @@ const MKLDNNNodePtr MKLDNNEdge::getChild() const {
|
||||
return childPtr;
|
||||
}
|
||||
|
||||
bool MKLDNNEdge::isUseExternalMemory() const {
|
||||
return externalMemoryPtr;
|
||||
}
|
||||
|
||||
bool MKLDNNEdge::isDropped() {
|
||||
bool not_in_parent = true;
|
||||
bool not_in_child = true;
|
||||
@ -164,6 +168,31 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
|
||||
status = Status::Allocated;
|
||||
}
|
||||
|
||||
std::string MKLDNNEdge::name() const {
|
||||
auto childPtr = getChild();
|
||||
auto parentPtr = getParent();
|
||||
return childPtr->getName() + "<->" + parentPtr->getName();
|
||||
}
|
||||
|
||||
void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
|
||||
if (status != Status::NeedAllocation)
|
||||
return;
|
||||
|
||||
if (weightsCache) {
|
||||
auto alloc = [this] () {
|
||||
allocate();
|
||||
return memoryPtr;
|
||||
};
|
||||
|
||||
auto ptr = weightsCache->findOrCreate(name(), alloc, false);
|
||||
memoryPtr = *ptr;
|
||||
externalMemoryPtr = true;
|
||||
status = Status::Allocated;
|
||||
} else {
|
||||
allocate();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) {
|
||||
if (state == Status::NotAllocated) {
|
||||
THROW_IE_EXCEPTION << "Incorrect behaviour! Use method sharedMemFrom()";
|
||||
@ -570,6 +599,7 @@ void MKLDNNEdge::validate() {
|
||||
getParent();
|
||||
getChild();
|
||||
getDims();
|
||||
|
||||
if (status != Status::Allocated) {
|
||||
THROW_IE_EXCEPTION << "Error memory is not allocated!";
|
||||
}
|
||||
@ -585,6 +615,10 @@ MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const {
|
||||
return memoryFromEdgePtr;
|
||||
}
|
||||
|
||||
MKLDNNEdgePtr MKLDNNEdge::getSharedEdge(std::nothrow_t) const {
|
||||
return memoryFromEdge.lock();
|
||||
}
|
||||
|
||||
void MKLDNNEdge::init() {
|
||||
if (status != Status::NeedAllocation && status != Status::Uninitialized)
|
||||
return;
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "mkldnn_memory.h"
|
||||
#include "mkldnn_dims.h"
|
||||
#include "mkldnn_weights_cache.hpp"
|
||||
#include "mkldnn/ie_mkldnn.h"
|
||||
|
||||
#include <map>
|
||||
@ -42,9 +43,10 @@ public:
|
||||
|
||||
void changeStatus(Status state);
|
||||
|
||||
virtual void init();
|
||||
virtual void allocate(const void* mem_ptr = nullptr);
|
||||
virtual void validate();
|
||||
void init();
|
||||
void allocate(const void* mem_ptr = nullptr);
|
||||
void externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache);
|
||||
void validate();
|
||||
void drop();
|
||||
|
||||
const std::shared_ptr<MKLDNNNode> getParent() const;
|
||||
@ -59,12 +61,17 @@ public:
|
||||
|
||||
bool needReorder();
|
||||
bool isDropped();
|
||||
bool isUseExternalMemory() const;
|
||||
|
||||
int getInputNum();
|
||||
int getOutputNum();
|
||||
|
||||
void sharedMemFrom(const MKLDNNEdgePtr& edge);
|
||||
MKLDNNEdgePtr getSharedEdge() const;
|
||||
MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
|
||||
|
||||
private:
|
||||
std::string name() const;
|
||||
|
||||
private:
|
||||
std::weak_ptr<MKLDNNNode> parent;
|
||||
@ -72,6 +79,7 @@ private:
|
||||
int parent_port;
|
||||
int child_port;
|
||||
|
||||
bool externalMemoryPtr = false;
|
||||
MKLDNNEdgeWeakPtr memoryFromEdge;
|
||||
MKLDNNDims dims;
|
||||
MKLDNNMemoryPtr memoryPtr;
|
||||
|
@ -64,6 +64,9 @@ using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
typedef std::unordered_set<MKLDNNEdgePtr> edge_cluster_t;
|
||||
typedef std::vector<edge_cluster_t> edge_clusters_t;
|
||||
|
||||
template<typename NET>
|
||||
void MKLDNNGraph::ApplyUnrollPasses(NET &net) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses");
|
||||
@ -445,10 +448,40 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
|
||||
void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
|
||||
mkldnn::stream stream(eng);
|
||||
|
||||
using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr;
|
||||
|
||||
auto acquireSharedOutputs = [this](MKLDNNNodePtr & graphNode) {
|
||||
std::vector<shared_memory_ptr> outputs;
|
||||
|
||||
for (size_t i = 0; i < graphNode->getChildEdges().size(); ++i) {
|
||||
auto edgePtr = graphNode->getChildEdgeAt(i);
|
||||
if (edgePtr && edgePtr->isUseExternalMemory()) {
|
||||
outputs.emplace_back(weightsCache->get(edgePtr->name()));
|
||||
}
|
||||
}
|
||||
|
||||
return outputs;
|
||||
};
|
||||
|
||||
for (auto &graphNode : graphNodes) {
|
||||
if (!graphNode->isConstant())
|
||||
continue;
|
||||
graphNode->execute(stream);
|
||||
|
||||
if (weightsCache) {
|
||||
auto sharedOutputs = acquireSharedOutputs(graphNode);
|
||||
|
||||
if (std::find_if(sharedOutputs.begin(), sharedOutputs.end(),
|
||||
[](const shared_memory_ptr & ptr) {
|
||||
return !ptr->isValid();
|
||||
}) != sharedOutputs.end()) {
|
||||
graphNode->execute(stream);
|
||||
for (auto & output : sharedOutputs)
|
||||
output->valid(true);
|
||||
}
|
||||
} else {
|
||||
graphNode->execute(stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -526,81 +559,85 @@ static inline bool isConstOutput(MKLDNNEdgePtr edge) {
|
||||
return edge->getParent()->isConstant() && !edge->getChild()->isConstant();
|
||||
}
|
||||
|
||||
static edge_clusters_t findEdgeClusters(const std::vector<MKLDNNEdgePtr> & graphEdges) {
|
||||
typedef std::unordered_map<MKLDNNEdgePtr, size_t> edge_cluster_idx_map_t;
|
||||
|
||||
edge_clusters_t edge_clusters;
|
||||
edge_cluster_idx_map_t edge_cluster_indices;
|
||||
|
||||
for (auto &edge : graphEdges) {
|
||||
auto edge_it = edge_cluster_indices.find(edge);
|
||||
|
||||
if (edge_it != edge_cluster_indices.end())
|
||||
continue; // edge is visited
|
||||
|
||||
size_t cluster_idx = edge_clusters.size();
|
||||
MKLDNNEdgePtr last_shared_edge = nullptr;
|
||||
|
||||
// find cluster index
|
||||
for (auto shared_edge = edge->getSharedEdge(std::nothrow);
|
||||
shared_edge;
|
||||
shared_edge = shared_edge->getSharedEdge(std::nothrow)) {
|
||||
auto shared_edge_it = edge_cluster_indices.find(shared_edge);
|
||||
if (shared_edge_it != edge_cluster_indices.end()) {
|
||||
cluster_idx = shared_edge_it->second;
|
||||
last_shared_edge = shared_edge;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add shared edges to cluster
|
||||
edge_cluster_indices.emplace(edge, cluster_idx);
|
||||
|
||||
if (cluster_idx == edge_clusters.size())
|
||||
edge_clusters.emplace_back(edge_cluster_t { edge });
|
||||
else
|
||||
edge_clusters[cluster_idx].emplace(edge);
|
||||
|
||||
for (auto shared_edge = edge->getSharedEdge(std::nothrow);
|
||||
shared_edge != last_shared_edge;
|
||||
shared_edge = shared_edge->getSharedEdge(std::nothrow)) {
|
||||
edge_cluster_indices.emplace(shared_edge, cluster_idx);
|
||||
edge_clusters[cluster_idx].emplace(shared_edge);
|
||||
}
|
||||
}
|
||||
|
||||
return edge_clusters;
|
||||
}
|
||||
|
||||
void MKLDNNGraph::AllocateWithReuse() {
|
||||
std::vector<std::vector<MKLDNNEdgePtr>> edge_clasters;
|
||||
edge_clusters_t edge_clusters = findEdgeClusters(graphEdges);
|
||||
|
||||
// detect edge clusters which are view on one.
|
||||
for (auto &edge : graphEdges) {
|
||||
MKLDNNEdgePtr par = (edge->getStatus() == MKLDNNEdge::Status::NotAllocated)
|
||||
? edge->getSharedEdge()
|
||||
: nullptr;
|
||||
if (par) {
|
||||
bool found = false;
|
||||
for (auto &claster : edge_clasters) {
|
||||
for (auto &element : claster) {
|
||||
if (element == par) {
|
||||
if (std::find(claster.begin(), claster.end(), edge) == claster.end())
|
||||
claster.push_back(edge);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
size_t edge_clusters_count = edge_clusters.size();
|
||||
|
||||
for (size_t i = 0; i < edge_clusters_count;) {
|
||||
auto &cluster = edge_clusters[i];
|
||||
bool erase = false;
|
||||
for (auto &edge : cluster) {
|
||||
if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation
|
||||
&& edge->getParent()->isConstant()) {
|
||||
edge->externalAllocate(weightsCache);
|
||||
erase = true;
|
||||
}
|
||||
if (!found)
|
||||
edge_clasters.push_back({par, edge});
|
||||
}
|
||||
|
||||
if (erase) {
|
||||
std::swap(edge_clusters[i], edge_clusters[edge_clusters_count - 1]);
|
||||
--edge_clusters_count;
|
||||
} else {
|
||||
bool found = false;
|
||||
for (auto &claster : edge_clasters) {
|
||||
for (auto &element : claster) {
|
||||
if (element == edge) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
edge_clasters.push_back({edge});
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
//======= WA. getSharedEdge() returns not identical edges ============
|
||||
// Will try to merge clasters with matched edges
|
||||
for (auto &edge : graphEdges) {
|
||||
std::vector<decltype(&edge_clasters[0])> to_merge;
|
||||
|
||||
for (auto &claster : edge_clasters)
|
||||
if (std::find(claster.begin(), claster.end(), edge) != claster.end())
|
||||
to_merge.push_back(&claster);
|
||||
|
||||
if (to_merge.size() > 1) {
|
||||
// Merge clasters
|
||||
auto base_classter = to_merge[0];
|
||||
for (int i = 1; i < to_merge.size(); i++) {
|
||||
base_classter->insert(base_classter->end(),
|
||||
to_merge[i]->begin(), to_merge[i]->end());
|
||||
to_merge[i]->clear();
|
||||
}
|
||||
|
||||
// remove duplicates in merged claster
|
||||
std::sort(base_classter->begin(), base_classter->end());
|
||||
base_classter->erase(std::unique(base_classter->begin(), base_classter->end()),
|
||||
base_classter->end() );
|
||||
|
||||
// remove empty clasters
|
||||
edge_clasters.erase(std::remove_if(edge_clasters.begin(), edge_clasters.end(),
|
||||
[] ( std::vector<MKLDNNEdgePtr> &cls) { return cls.empty(); }),
|
||||
edge_clasters.end());
|
||||
}
|
||||
}
|
||||
//======= End of WA ============
|
||||
edge_clusters.resize(edge_clusters_count);
|
||||
|
||||
const int64_t alignment = 32; // 32 bytes
|
||||
|
||||
std::vector<MemorySolver::Box> boxes(edge_clasters.size());
|
||||
for (int i = 0; i < edge_clasters.size(); i++) {
|
||||
std::vector<MemorySolver::Box> boxes(edge_clusters.size());
|
||||
for (int i = 0; i < edge_clusters.size(); i++) {
|
||||
MemorySolver::Box &box = boxes[i];
|
||||
box = { std::numeric_limits<int>::max(), 0, 0, i };
|
||||
for (auto &edge : edge_clasters[i]) {
|
||||
for (auto &edge : edge_clusters[i]) {
|
||||
int e_start = edge->getParent()->execIndex;
|
||||
int e_finish = edge->getChild()->execIndex;
|
||||
|
||||
@ -630,7 +667,7 @@ void MKLDNNGraph::AllocateWithReuse() {
|
||||
// So we need it untouchable during all execution time
|
||||
// -1 is a place holder for a max timestamp.
|
||||
bool isConst = false, isOutput = false, isInput = false;
|
||||
for (auto &edge : edge_clasters[i]) {
|
||||
for (auto &edge : edge_clusters[i]) {
|
||||
isConst |= isConstOutput(edge);
|
||||
isOutput |= edge->getChild()->getType() == Output;
|
||||
isInput |= edge->getParent()->getType() == Input;
|
||||
@ -654,11 +691,15 @@ void MKLDNNGraph::AllocateWithReuse() {
|
||||
|
||||
memWorkspace = std::make_shared<MKLDNNMemory>(eng);
|
||||
memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::I8, {total_size}, Layout::C)));
|
||||
|
||||
if (edge_clusters.empty())
|
||||
return;
|
||||
|
||||
auto* workspace_ptr = static_cast<int8_t*>(memWorkspace->GetData());
|
||||
|
||||
for (int i = 0; i < edge_clasters.size(); i++) {
|
||||
for (int i = 0; i < edge_clusters.size(); i++) {
|
||||
int count = 0;
|
||||
for (auto &edge : edge_clasters[i]) {
|
||||
for (auto &edge : edge_clusters[i]) {
|
||||
if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation) {
|
||||
int64_t offset = memSolver.getOffset(i);
|
||||
// !! Fallback to individual memory allocation !!
|
||||
|
@ -766,7 +766,7 @@ void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::pri
|
||||
+ "_" + std::to_string(internalBlob->byteSize())
|
||||
+ "_" + std::to_string(data_hash);
|
||||
|
||||
ptr = weightCache->findOrCreate(string_hash, create);
|
||||
ptr = *weightCache->findOrCreate(string_hash, create);
|
||||
} else {
|
||||
ptr = create();
|
||||
}
|
||||
|
@ -11,6 +11,66 @@ namespace MKLDNNPlugin {
|
||||
|
||||
const SimpleDataHash MKLDNNWeightsSharing::simpleCRC;
|
||||
|
||||
MKLDNNWeightsSharing::MKLDNNSharedMemory::MKLDNNSharedMemory(
|
||||
std::unique_lock<std::mutex> && lock,
|
||||
const MKLDNNMemoryInfo::Ptr & memory,
|
||||
MKLDNNMemoryPtr newPtr)
|
||||
: lock(std::move(lock))
|
||||
, memory(memory)
|
||||
, newPtr(newPtr)
|
||||
{}
|
||||
|
||||
MKLDNNWeightsSharing::MKLDNNSharedMemory::operator MKLDNNMemoryPtr() const {
|
||||
return memory->sharedMemory.lock();
|
||||
}
|
||||
|
||||
bool MKLDNNWeightsSharing::MKLDNNSharedMemory::isValid() const {
|
||||
return memory->valid;
|
||||
}
|
||||
|
||||
void MKLDNNWeightsSharing::MKLDNNSharedMemory::valid(bool b) {
|
||||
memory->valid = b;
|
||||
}
|
||||
|
||||
MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::findOrCreate(
|
||||
const std::string& key,
|
||||
std::function<MKLDNNMemoryPtr(void)> create,
|
||||
bool valid) {
|
||||
std::unique_lock<std::mutex> lock(guard);
|
||||
auto found = sharedWeights.find(key);
|
||||
|
||||
MKLDNNMemoryInfo::Ptr ptr;
|
||||
MKLDNNMemoryPtr newPtr;
|
||||
|
||||
if (found == sharedWeights.end()
|
||||
|| !(ptr = found->second)
|
||||
|| ptr->sharedMemory.expired()) {
|
||||
newPtr = create();
|
||||
ptr = std::make_shared<MKLDNNMemoryInfo>(newPtr, valid);
|
||||
sharedWeights[key] = ptr;
|
||||
}
|
||||
|
||||
return std::make_shared<MKLDNNSharedMemory>(ptr->valid
|
||||
? std::unique_lock<std::mutex>(ptr->guard, std::defer_lock)
|
||||
: std::unique_lock<std::mutex>(ptr->guard), ptr, newPtr);
|
||||
}
|
||||
|
||||
MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::get(const std::string& key) const {
|
||||
std::unique_lock<std::mutex> lock(guard);
|
||||
auto found = sharedWeights.find(key);
|
||||
|
||||
MKLDNNMemoryInfo::Ptr ptr;
|
||||
|
||||
if (found == sharedWeights.end()
|
||||
|| !(ptr = found->second)
|
||||
|| ptr->sharedMemory.expired())
|
||||
THROW_IE_EXCEPTION << "Unknown shared memory with key " << key;
|
||||
|
||||
return std::make_shared<MKLDNNSharedMemory>(ptr->valid
|
||||
? std::unique_lock<std::mutex>(ptr->guard, std::defer_lock)
|
||||
: std::unique_lock<std::mutex>(ptr->guard), ptr);
|
||||
}
|
||||
|
||||
NumaNodesWeights::NumaNodesWeights() {
|
||||
for (auto numa_id : InferenceEngine::getAvailableNUMANodes())
|
||||
_cache_map[numa_id] = std::make_shared<MKLDNNWeightsSharing>();
|
||||
|
@ -52,25 +52,51 @@ protected:
|
||||
* Is a thread safe
|
||||
*/
|
||||
class MKLDNNWeightsSharing {
|
||||
struct MKLDNNMemoryInfo {
|
||||
typedef std::shared_ptr<MKLDNNMemoryInfo> Ptr;
|
||||
|
||||
MKLDNNMemoryInfo(MKLDNNMemoryPtr memoryPtr, bool valid)
|
||||
: sharedMemory(memoryPtr)
|
||||
, valid(valid)
|
||||
{}
|
||||
|
||||
std::mutex guard;
|
||||
std::weak_ptr<MKLDNNMemory> sharedMemory;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
public:
|
||||
typedef std::shared_ptr<MKLDNNWeightsSharing> Ptr;
|
||||
MKLDNNMemoryPtr findOrCreate(const std::string& name_hash,
|
||||
std::function<MKLDNNMemoryPtr(void)> create) {
|
||||
std::unique_lock<std::mutex> lock(guard);
|
||||
auto found = sharedWeights.find(name_hash);
|
||||
|
||||
MKLDNNMemoryPtr ptr;
|
||||
if (found == sharedWeights.end() || !(ptr = found->second.lock())) {
|
||||
ptr = create();
|
||||
sharedWeights[name_hash] = ptr;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
class MKLDNNSharedMemory {
|
||||
public:
|
||||
typedef std::shared_ptr<MKLDNNSharedMemory> Ptr;
|
||||
|
||||
MKLDNNSharedMemory(std::unique_lock<std::mutex> && lock,
|
||||
const MKLDNNMemoryInfo::Ptr & memory,
|
||||
MKLDNNMemoryPtr newPtr = nullptr);
|
||||
|
||||
operator MKLDNNMemoryPtr() const;
|
||||
bool isValid() const;
|
||||
void valid(bool b);
|
||||
|
||||
private:
|
||||
std::unique_lock<std::mutex> lock;
|
||||
MKLDNNMemoryInfo::Ptr memory;
|
||||
MKLDNNMemoryPtr newPtr;
|
||||
};
|
||||
|
||||
MKLDNNSharedMemory::Ptr findOrCreate(const std::string& key,
|
||||
std::function<MKLDNNMemoryPtr(void)> create,
|
||||
bool valid = true);
|
||||
|
||||
MKLDNNSharedMemory::Ptr get(const std::string& key) const;
|
||||
|
||||
static const SimpleDataHash& GetHashFunc () { return simpleCRC; }
|
||||
|
||||
protected:
|
||||
std::unordered_map<std::string, std::weak_ptr<MKLDNNMemory>> sharedWeights;
|
||||
std::mutex guard;
|
||||
mutable std::mutex guard;
|
||||
std::unordered_map<std::string, MKLDNNMemoryInfo::Ptr> sharedWeights;
|
||||
static const SimpleDataHash simpleCRC;
|
||||
};
|
||||
|
||||
|
@ -33,4 +33,3 @@ private:
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
|
@ -64,4 +64,3 @@ private:
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user