Shared memory_solver class (#7918)

* Shared memory_solver class

* fixed clang format
This commit is contained in:
Mikhail Ryzhov 2021-10-15 09:58:55 +03:00 committed by GitHub
parent 345033f3e1
commit a4cc31c0b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 240 additions and 259 deletions

View File

@ -19,7 +19,7 @@
#include "mkldnn_graph_optimizer.h"
#include "mkldnn_extension_utils.h"
#include "mkldnn_extension_mngr.h"
#include "mkldnn_memory_solver.hpp"
#include "memory_solver.hpp"
#include "mkldnn_itt.h"
#include "mkldnn_infer_request.h"
#include <nodes/mkldnn_input_node.h>

View File

@ -1,142 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <ie_common.h>
#include "mkldnn_memory_solver.hpp"
#include <algorithm>
#include <vector>
#include <map>
namespace MKLDNNPlugin {
MemorySolver::MemorySolver(const std::vector<Box>& boxes) : _boxes(boxes) {
int max_ts = 0;
// TODO: add validation of data correctness:
// 1. Box.start >= 0 and Box.finish >= -1
// 2. Box.finish >= Box.start (except Box.finish == -1)
// 3. Box.size > 0 (or == 0 ?)
// 4. Box.id == any unique value
for (const Box &box : _boxes) max_ts = std::max(std::max(max_ts, box.start), box.finish);
for (Box &box : _boxes) if (box.finish == -1) box.finish = max_ts;
// sort by start and finish ts
std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) -> bool
{ return l.start < r.start || (l.start == r.start && l.finish < r.finish); });
// remove unused timestamps (not a begin of some box)
// each ts should start a box
std::vector<bool> ts_exist(max_ts+1);
for (const Box &b : _boxes) ts_exist[b.start] = true;
int rm_ts_s = 0, rm_ts_f = 0;
int ts_s = 0, ts_f = 0;
for (Box &b : _boxes) {
while (ts_s < b.start) if (!ts_exist[ts_s++]) rm_ts_s++;
if (ts_f > b.finish + 1) { ts_f = ts_s; rm_ts_f = rm_ts_s; }
while (ts_f <= b.finish) if (!ts_exist[ts_f++]) rm_ts_f++;
b.start -= rm_ts_s;
b.finish -= rm_ts_f;
}
_time_duration = ts_f - rm_ts_f;
}
inline bool popupTogetherWith(MemorySolver::Box &box_new, const MemorySolver::Box &box_old) {
if (box_new.id+box_new.size > box_old.id &&
box_old.id+box_old.size > box_new.id) {
// Move the new one up. There is an intersection
box_new.id = box_old.id + box_old.size;
return true;
} else {
return false;
}
}
int64_t MemorySolver::solve() {
maxTopDepth(); // at first make sure that we no need more for boxes sorted by box.start
std::vector<std::vector<const Box*>> time_slots(_time_duration);
for (auto & slot : time_slots) slot.reserve(_top_depth); // 2D array [_time_duration][_top_depth]
// Sort be box size. First is biggest
// Comment this line to check other order of box putting
std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r)
{ return l.size > r.size; });
int64_t _min_required = 0;
for (Box& box : _boxes) {
// start from bottom and will lift it up if intersect with other present
int64_t id = box.id;
box.id = 0; // id will be used as a temp offset storage
bool popped_up;
do {
popped_up = false;
for (int i_slot = box.start; i_slot <= box.finish; i_slot++) {
for (auto *box_in_slot : time_slots[i_slot]) {
// intersect with already stored boxes for all covered time slots
// and move up the new one if needed
popped_up |= popupTogetherWith(box, *box_in_slot);
}
}
} while (popped_up);
// add current box to covered time slot
for (int i_slot = box.start; i_slot <= box.finish; i_slot++)
time_slots[i_slot].push_back(&box);
// store the max top bound for each box
_min_required = std::max(_min_required, box.id + box.size);
_offsets[id] = box.id; // TODO: move to constructor (use .insert instead of [])
}
return _min_required;
}
int64_t MemorySolver::maxDepth() {
if (_depth == -1) calcDepth();
return _depth;
}
int64_t MemorySolver::maxTopDepth() {
if (_top_depth == -1) calcDepth();
return _top_depth;
}
int64_t MemorySolver::getOffset(int id) const {
auto res = _offsets.find(id);
if (res == _offsets.end()) IE_THROW() << "There are no box for provided ID";
return res->second;
}
//======== Private =============//
void MemorySolver::calcDepth() {
int64_t top_depth = 0;
int64_t depth = 0;
std::map<int64_t, std::vector<const Box*>> release_at;
for (const Box& box : _boxes) {
int64_t time = box.start;
depth += box.size;
top_depth++;
release_at[box.finish+1].push_back(&box);
for (const Box *b : release_at[time]) {
depth -= b->size;
top_depth--;
}
release_at.erase(time);
IE_ASSERT(top_depth > 0);
_top_depth = std::max(_top_depth, top_depth);
_depth = std::max(_depth, depth);
}
}
} // namespace MKLDNNPlugin

View File

@ -1,94 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief The header provides a declaration of MemorySolver utility class
* @file
*/
#pragma once
#include "ie_api.h"
#include <stdint.h>
#include <vector>
#include <map>
namespace MKLDNNPlugin {
/**
* @brief Helps to solve issue of optimal memory allocation only for particular
* execution order.
*
* It works with abstract data description where
* - Node is index in execution order
* - Edge is Box object with size and start-finish indexes (live time)
*
* Example:
*
* Mem(offset)
* | |____| Box {4, 5}
* | |_____________| Box {2, 6}
* | |____| Box {3, 4}
* | |____| Box {2, 3}
* | |____| Box {6, 7}
* |_____________________________________
* 1 2 3 4 5 6 7 8 9 ExecOrder
*
* Boxes which has an ExecOrder-axis intersection should have no Mem-axis intersections.
* The goal is to define a minimal required memory blob to store all boxes with such
* constraints and specify all corresponding position on Mem axis(through offset field).
*
* NOTE!
* Exec order is predefined.
*/
class MemorySolver {
public:
/** @brief Representation of edge (size and live time)*/
struct Box {
/** Execution order index of first use. The data will be produced here. */
int start;
/**
* The execution order index of last use. After that data will be released.
* -1 is a reserved value for "till to end". The data will be alive to very
* end of execution.
*/
int finish;
/** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */
int64_t size;
/** Box identifier, unique for each box. Will be used to querying calculated offset. */
int64_t id;
};
explicit MemorySolver(const std::vector<Box>& boxes);
/**
* @brief Solve memory location with maximal reuse.
* @return Size of common memory blob required for storing all
*/
int64_t solve();
/** Provides calculated offset for specified box id */
int64_t getOffset(int id) const;
/** Additional info. Max sum of box sizes required for any time stamp. */
int64_t maxDepth();
/** Additional info. Max num of boxes required for any time stamp. */
int64_t maxTopDepth();
private:
std::vector<Box> _boxes;
std::map<int64_t, int64_t> _offsets;
int64_t _top_depth = -1;
int64_t _depth = -1;
int _time_duration = -1;
void calcDepth();
};
} // namespace MKLDNNPlugin

View File

@ -0,0 +1,217 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief The header provides a declaration of MemorySolver utility class
* @file
*/
#pragma once
#include <stdint.h>
#include <algorithm>
#include <map>
#include <vector>
/**
* @brief Helps to solve issue of optimal memory allocation only for particular
* execution order.
*
* It works with abstract data description where
* - Node is index in execution order
* - Edge is Box object with size and start-finish indexes (live time)
*
* Example:
*
* Mem(offset)
* | |____| Box {4, 5}
* | |_____________| Box {2, 6}
* | |____| Box {3, 4}
* | |____| Box {2, 3}
* | |____| Box {6, 7}
* |_____________________________________
* 1 2 3 4 5 6 7 8 9 ExecOrder
*
* Boxes which has an ExecOrder-axis intersection should have no Mem-axis intersections.
* The goal is to define a minimal required memory blob to store all boxes with such
* constraints and specify all corresponding position on Mem axis(through offset field).
*
* NOTE!
* Exec order is predefined.
*/
class MemorySolver {
public:
/** @brief Representation of edge (size and live time)*/
struct Box {
/** Execution order index of first use. The data will be produced here. */
int start;
/**
* The execution order index of last use. After that data will be released.
* -1 is a reserved value for "till to end". The data will be alive to very
* end of execution.
*/
int finish;
/** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */
int64_t size;
/** Box identifier, unique for each box. Will be used to querying calculated offset. */
int64_t id;
};
explicit MemorySolver(const std::vector<Box>& boxes) : _boxes(boxes) {
int max_ts = 0;
// TODO: add validation of data correctness:
// 1. Box.start >= 0 and Box.finish >= -1
// 2. Box.finish >= Box.start (except Box.finish == -1)
// 3. Box.size > 0 (or == 0 ?)
// 4. Box.id == any unique value
for (const Box& box : _boxes)
max_ts = std::max(std::max(max_ts, box.start), box.finish);
for (Box& box : _boxes)
if (box.finish == -1)
box.finish = max_ts;
// sort by start and finish ts
std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) -> bool {
return l.start < r.start || (l.start == r.start && l.finish < r.finish);
});
// remove unused timestamps (not a begin of some box)
// each ts should start a box
std::vector<bool> ts_exist(max_ts + 1);
for (const Box& b : _boxes)
ts_exist[b.start] = true;
int rm_ts_s = 0, rm_ts_f = 0;
int ts_s = 0, ts_f = 0;
for (Box& b : _boxes) {
while (ts_s < b.start)
if (!ts_exist[ts_s++])
rm_ts_s++;
if (ts_f > b.finish + 1) {
ts_f = ts_s;
rm_ts_f = rm_ts_s;
}
while (ts_f <= b.finish)
if (!ts_exist[ts_f++])
rm_ts_f++;
b.start -= rm_ts_s;
b.finish -= rm_ts_f;
}
_time_duration = ts_f - rm_ts_f;
}
inline bool popupTogetherWith(MemorySolver::Box& box_new, const MemorySolver::Box& box_old) {
if (box_new.id + box_new.size > box_old.id && box_old.id + box_old.size > box_new.id) {
// Move the new one up. There is an intersection
box_new.id = box_old.id + box_old.size;
return true;
} else {
return false;
}
}
/**
* @brief Solve memory location with maximal reuse.
* @return Size of common memory blob required for storing all
*/
int64_t solve() {
maxTopDepth(); // at first make sure that we no need more for boxes sorted by box.start
std::vector<std::vector<const Box*>> time_slots(_time_duration);
for (auto& slot : time_slots)
slot.reserve(_top_depth); // 2D array [_time_duration][_top_depth]
// Sort be box size. First is biggest
// Comment this line to check other order of box putting
std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) {
return l.size > r.size;
});
int64_t _min_required = 0;
for (Box& box : _boxes) {
// start from bottom and will lift it up if intersect with other present
int64_t id = box.id;
box.id = 0; // id will be used as a temp offset storage
bool popped_up;
do {
popped_up = false;
for (int i_slot = box.start; i_slot <= box.finish; i_slot++) {
for (auto* box_in_slot : time_slots[i_slot]) {
// intersect with already stored boxes for all covered time slots
// and move up the new one if needed
popped_up |= popupTogetherWith(box, *box_in_slot);
}
}
} while (popped_up);
// add current box to covered time slot
for (int i_slot = box.start; i_slot <= box.finish; i_slot++)
time_slots[i_slot].push_back(&box);
// store the max top bound for each box
_min_required = std::max(_min_required, box.id + box.size);
_offsets[id] = box.id; // TODO: move to constructor (use .insert instead of [])
}
return _min_required;
}
/** Provides calculated offset for specified box id */
int64_t getOffset(int id) const {
auto res = _offsets.find(id);
if (res == _offsets.end())
IE_THROW() << "There are no box for provided ID";
return res->second;
}
/** Additional info. Max sum of box sizes required for any time stamp. */
int64_t maxDepth() {
if (_depth == -1)
calcDepth();
return _depth;
}
/** Additional info. Max num of boxes required for any time stamp. */
int64_t maxTopDepth() {
if (_top_depth == -1)
calcDepth();
return _top_depth;
}
private:
std::vector<Box> _boxes;
std::map<int64_t, int64_t> _offsets;
int64_t _top_depth = -1;
int64_t _depth = -1;
int _time_duration = -1;
void calcDepth() {
int64_t top_depth = 0;
int64_t depth = 0;
std::map<int64_t, std::vector<const Box*>> release_at;
for (const Box& box : _boxes) {
int64_t time = box.start;
depth += box.size;
top_depth++;
release_at[box.finish + 1].push_back(&box);
for (const Box* b : release_at[time]) {
depth -= b->size;
top_depth--;
}
release_at.erase(time);
IE_ASSERT(top_depth > 0);
_top_depth = std::max(_top_depth, top_depth);
_depth = std::max(_depth, depth);
}
}
};

View File

@ -6,35 +6,35 @@
#include <gtest/gtest.h>
#include <ie_common.h>
#include "mkldnn_memory_solver.hpp"
#include "memory_solver.hpp"
using Box = MKLDNNPlugin::MemorySolver::Box;
using Box = MemorySolver::Box;
TEST(MemSolverTest, CanConstruct) {
{ // Empty vector<Box>
MKLDNNPlugin::MemorySolver ms(std::vector<Box>{});
MemorySolver ms(std::vector<Box>{});
}
{ // vector with default Box
MKLDNNPlugin::MemorySolver ms(std::vector<Box>{{}});
MemorySolver ms(std::vector<Box>{{}});
}
{ // vector with Box with non-default Box
MKLDNNPlugin::MemorySolver ms(std::vector<Box>{{1, 3, 3}});
MemorySolver ms(std::vector<Box>{{1, 3, 3}});
}
{ // vector with Box with size == 0
MKLDNNPlugin::MemorySolver ms(std::vector<Box>{{0, 0, 0}});
MemorySolver ms(std::vector<Box>{{0, 0, 0}});
}
{ // vector with Box with finish == -1
MKLDNNPlugin::MemorySolver ms(std::vector<Box>{{3, -1, 6}});
MemorySolver ms(std::vector<Box>{{3, -1, 6}});
}
// TODO: enable after implement TODO from src/mkldnn_plugin/mkldnn_memory_solver.cpp#L17
// TODO: enable after implement TODO from memory_solver.hpp#L66
// { // vector with Box with negative values
// MKLDNNPlugin::MemorySolver ms(std::vector<Box> {{-5, -5, -5, -5}});
// MemorySolver ms(std::vector<Box> {{-5, -5, -5, -5}});
// }
}
@ -47,7 +47,7 @@ TEST(MemSolverTest, GetOffset) {
{n, ++n, 2, 3}, // 0 1 2 3 4
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
ms.solve();
// The correct answer is [0, 2, 0, 2] or [2, 0, 2, 0].
@ -65,7 +65,7 @@ TEST(MemSolverTest, GetOffsetThrowException) {
{n, ++n, 2, id++}, // 0 1 2 3 4
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
ms.solve();
EXPECT_THROW(ms.getOffset(100), InferenceEngine::Exception);
@ -79,7 +79,7 @@ TEST(MemSolverTest, LinearAndEven) {
{n, ++n, 2}, // |__|____||____|__
}; // 0 1 2 3
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 4);
EXPECT_EQ(ms.maxDepth(), 4);
EXPECT_EQ(ms.maxTopDepth(), 2);
@ -93,7 +93,7 @@ TEST(MemSolverTest, LinearAndNotEven) {
{n, ++n, 3}, // |__|____||____|__
}; // 0 1 2 3
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 5);
EXPECT_EQ(ms.maxDepth(), 5);
EXPECT_EQ(ms.maxTopDepth(), 2);
@ -108,7 +108,7 @@ TEST(MemSolverTest, LinearWithEmptyExecIndexes) {
{n, n += 2, 3}, // |__|_______|___|_______|__
}; // 2 3 4 5 6 7 8
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 5);
EXPECT_EQ(ms.maxDepth(), 5);
EXPECT_EQ(ms.maxTopDepth(), 2);
@ -122,7 +122,7 @@ TEST(MemSolverTest, DISABLED_Unefficiency) {
{2, 3, 2}, // 2 3 4 5 6 7 8
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 5); // currently we have answer 6
EXPECT_EQ(ms.maxDepth(), 5);
EXPECT_EQ(ms.maxTopDepth(), 2);
@ -136,7 +136,7 @@ TEST(MemSolverTest, OverlappingBoxes) {
{2, 3, 2}, // 2 3 4 5 6 7 8
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 6);
EXPECT_EQ(ms.maxDepth(), 6);
EXPECT_EQ(ms.maxTopDepth(), 2);
@ -151,7 +151,7 @@ TEST(MemSolverTest, EndOnSeveralBegins) {
{3, 4, 2}, // 0 1 2 3 4 5 6
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 6);
EXPECT_EQ(ms.maxDepth(), 6);
EXPECT_EQ(ms.maxTopDepth(), 3);
@ -166,7 +166,7 @@ TEST(MemSolverTest, ToEndBoxes) {
{3, 4, 2}, // 0 1 2 3 4 5 6
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 8);
EXPECT_EQ(ms.maxDepth(), 8);
EXPECT_EQ(ms.maxTopDepth(), 4);
@ -181,7 +181,7 @@ TEST(MemSolverTest, LastAndToEndBox) {
{3, 4, 2}, // 0 1 2 3 4 5 6
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 6);
EXPECT_EQ(ms.maxDepth(), 6);
EXPECT_EQ(ms.maxTopDepth(), 3);
@ -218,7 +218,7 @@ TEST(MemSolverTest, OptimalAlexnet) {
for (const auto &sh : shapes) boxes.push_back({n, ++n, sh[0] * sh[1] * sh[2]});
// For linear topology bottom score is reachable minRequired == maxDepth
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), ms.maxDepth());
EXPECT_EQ(ms.maxTopDepth(), 2);
}
@ -232,7 +232,7 @@ TEST(MemSolverTest, NoOverlapping) {
{2, 4, 2, n++}, // 2 3 4 5 6 7 8
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
ms.solve();
// TODO: Current algorithm doesn't solve that case. Uncomment check to see inefficiency
// EXPECT_EQ(ms.solve(), 5);
@ -258,7 +258,7 @@ TEST(MemSolverTest, BestSolution1) {
{6, 7, 3, n++}, // 2 3 4 5 6 7 8
};
MKLDNNPlugin::MemorySolver ms(boxes);
MemorySolver ms(boxes);
EXPECT_EQ(ms.solve(), 5);
auto no_overlap = [&](Box box1, Box box2) -> bool {