diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 1d94f9f50a5..fac6830cd73 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -19,7 +19,7 @@ #include "mkldnn_graph_optimizer.h" #include "mkldnn_extension_utils.h" #include "mkldnn_extension_mngr.h" -#include "mkldnn_memory_solver.hpp" +#include "memory_solver.hpp" #include "mkldnn_itt.h" #include "mkldnn_infer_request.h" #include diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.cpp deleted file mode 100644 index 42cbfa20e5e..00000000000 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "mkldnn_memory_solver.hpp" - - -#include -#include -#include - -namespace MKLDNNPlugin { - -MemorySolver::MemorySolver(const std::vector& boxes) : _boxes(boxes) { - int max_ts = 0; - // TODO: add validation of data correctness: - // 1. Box.start >= 0 and Box.finish >= -1 - // 2. Box.finish >= Box.start (except Box.finish == -1) - // 3. Box.size > 0 (or == 0 ?) - // 4. Box.id == any unique value - for (const Box &box : _boxes) max_ts = std::max(std::max(max_ts, box.start), box.finish); - for (Box &box : _boxes) if (box.finish == -1) box.finish = max_ts; - - // sort by start and finish ts - std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) -> bool - { return l.start < r.start || (l.start == r.start && l.finish < r.finish); }); - - // remove unused timestamps (not a begin of some box) - // each ts should start a box - std::vector ts_exist(max_ts+1); - for (const Box &b : _boxes) ts_exist[b.start] = true; - - int rm_ts_s = 0, rm_ts_f = 0; - int ts_s = 0, ts_f = 0; - for (Box &b : _boxes) { - while (ts_s < b.start) if (!ts_exist[ts_s++]) rm_ts_s++; - - if (ts_f > b.finish + 1) { ts_f = ts_s; rm_ts_f = rm_ts_s; } - while (ts_f <= b.finish) if (!ts_exist[ts_f++]) rm_ts_f++; - - b.start -= rm_ts_s; - b.finish -= rm_ts_f; - } - _time_duration = ts_f - rm_ts_f; -} - -inline bool popupTogetherWith(MemorySolver::Box &box_new, const MemorySolver::Box &box_old) { - if (box_new.id+box_new.size > box_old.id && - box_old.id+box_old.size > box_new.id) { - // Move the new one up. There is an intersection - box_new.id = box_old.id + box_old.size; - return true; - } else { - return false; - } -} - -int64_t MemorySolver::solve() { - maxTopDepth(); // at first make sure that we no need more for boxes sorted by box.start - std::vector> time_slots(_time_duration); - for (auto & slot : time_slots) slot.reserve(_top_depth); // 2D array [_time_duration][_top_depth] - - // Sort be box size. First is biggest - // Comment this line to check other order of box putting - std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) - { return l.size > r.size; }); - - int64_t _min_required = 0; - - for (Box& box : _boxes) { - // start from bottom and will lift it up if intersect with other present - int64_t id = box.id; - box.id = 0; // id will be used as a temp offset storage - bool popped_up; - do { - popped_up = false; - for (int i_slot = box.start; i_slot <= box.finish; i_slot++) { - for (auto *box_in_slot : time_slots[i_slot]) { - // intersect with already stored boxes for all covered time slots - // and move up the new one if needed - popped_up |= popupTogetherWith(box, *box_in_slot); - } - } - } while (popped_up); - - // add current box to covered time slot - for (int i_slot = box.start; i_slot <= box.finish; i_slot++) - time_slots[i_slot].push_back(&box); - - // store the max top bound for each box - _min_required = std::max(_min_required, box.id + box.size); - _offsets[id] = box.id; // TODO: move to constructor (use .insert instead of []) - } - - return _min_required; -} - -int64_t MemorySolver::maxDepth() { - if (_depth == -1) calcDepth(); - return _depth; -} - -int64_t MemorySolver::maxTopDepth() { - if (_top_depth == -1) calcDepth(); - return _top_depth; -} - -int64_t MemorySolver::getOffset(int id) const { - auto res = _offsets.find(id); - if (res == _offsets.end()) IE_THROW() << "There are no box for provided ID"; - return res->second; -} - -//======== Private =============// - -void MemorySolver::calcDepth() { - int64_t top_depth = 0; - int64_t depth = 0; - std::map> release_at; - - for (const Box& box : _boxes) { - int64_t time = box.start; - depth += box.size; - top_depth++; - - release_at[box.finish+1].push_back(&box); - - for (const Box *b : release_at[time]) { - depth -= b->size; - top_depth--; - } - release_at.erase(time); - IE_ASSERT(top_depth > 0); - - _top_depth = std::max(_top_depth, top_depth); - _depth = std::max(_depth, depth); - } -} - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.hpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.hpp deleted file mode 100644 index 91219a4c633..00000000000 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.hpp +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief The header provides a declaration of MemorySolver utility class - * @file - */ -#pragma once - -#include "ie_api.h" - -#include - -#include -#include - -namespace MKLDNNPlugin { - -/** - * @brief Helps to solve issue of optimal memory allocation only for particular - * execution order. - * - * It works with abstract data description where - * - Node is index in execution order - * - Edge is Box object with size and start-finish indexes (live time) - * - * Example: - * - * Mem(offset) - * | |____| Box {4, 5} - * | |_____________| Box {2, 6} - * | |____| Box {3, 4} - * | |____| Box {2, 3} - * | |____| Box {6, 7} - * |_____________________________________ - * 1 2 3 4 5 6 7 8 9 ExecOrder - * - * Boxes which has an ExecOrder-axis intersection should have no Mem-axis intersections. - * The goal is to define a minimal required memory blob to store all boxes with such - * constraints and specify all corresponding position on Mem axis(through offset field). - * - * NOTE! - * Exec order is predefined. - */ - -class MemorySolver { -public: - /** @brief Representation of edge (size and live time)*/ - struct Box { - /** Execution order index of first use. The data will be produced here. */ - int start; - - /** - * The execution order index of last use. After that data will be released. - * -1 is a reserved value for "till to end". The data will be alive to very - * end of execution. - */ - int finish; - - /** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */ - int64_t size; - - /** Box identifier, unique for each box. Will be used to querying calculated offset. */ - int64_t id; - }; - - explicit MemorySolver(const std::vector& boxes); - - /** - * @brief Solve memory location with maximal reuse. - * @return Size of common memory blob required for storing all - */ - int64_t solve(); - - /** Provides calculated offset for specified box id */ - int64_t getOffset(int id) const; - - /** Additional info. Max sum of box sizes required for any time stamp. */ - int64_t maxDepth(); - /** Additional info. Max num of boxes required for any time stamp. */ - int64_t maxTopDepth(); - -private: - std::vector _boxes; - std::map _offsets; - int64_t _top_depth = -1; - int64_t _depth = -1; - int _time_duration = -1; - - void calcDepth(); -}; - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/plugin_api/memory_solver.hpp b/inference-engine/src/plugin_api/memory_solver.hpp new file mode 100644 index 00000000000..a1261c0497a --- /dev/null +++ b/inference-engine/src/plugin_api/memory_solver.hpp @@ -0,0 +1,217 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief The header provides a declaration of MemorySolver utility class + * @file + */ +#pragma once + +#include + +#include +#include +#include + +/** + * @brief Helps to solve issue of optimal memory allocation only for particular + * execution order. + * + * It works with abstract data description where + * - Node is index in execution order + * - Edge is Box object with size and start-finish indexes (live time) + * + * Example: + * + * Mem(offset) + * | |____| Box {4, 5} + * | |_____________| Box {2, 6} + * | |____| Box {3, 4} + * | |____| Box {2, 3} + * | |____| Box {6, 7} + * |_____________________________________ + * 1 2 3 4 5 6 7 8 9 ExecOrder + * + * Boxes which has an ExecOrder-axis intersection should have no Mem-axis intersections. + * The goal is to define a minimal required memory blob to store all boxes with such + * constraints and specify all corresponding position on Mem axis(through offset field). + * + * NOTE! + * Exec order is predefined. + */ + +class MemorySolver { +public: + /** @brief Representation of edge (size and live time)*/ + struct Box { + /** Execution order index of first use. The data will be produced here. */ + int start; + + /** + * The execution order index of last use. After that data will be released. + * -1 is a reserved value for "till to end". The data will be alive to very + * end of execution. + */ + int finish; + + /** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */ + int64_t size; + + /** Box identifier, unique for each box. Will be used to querying calculated offset. */ + int64_t id; + }; + + explicit MemorySolver(const std::vector& boxes) : _boxes(boxes) { + int max_ts = 0; + // TODO: add validation of data correctness: + // 1. Box.start >= 0 and Box.finish >= -1 + // 2. Box.finish >= Box.start (except Box.finish == -1) + // 3. Box.size > 0 (or == 0 ?) + // 4. Box.id == any unique value + for (const Box& box : _boxes) + max_ts = std::max(std::max(max_ts, box.start), box.finish); + for (Box& box : _boxes) + if (box.finish == -1) + box.finish = max_ts; + + // sort by start and finish ts + std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) -> bool { + return l.start < r.start || (l.start == r.start && l.finish < r.finish); + }); + + // remove unused timestamps (not a begin of some box) + // each ts should start a box + std::vector ts_exist(max_ts + 1); + for (const Box& b : _boxes) + ts_exist[b.start] = true; + + int rm_ts_s = 0, rm_ts_f = 0; + int ts_s = 0, ts_f = 0; + for (Box& b : _boxes) { + while (ts_s < b.start) + if (!ts_exist[ts_s++]) + rm_ts_s++; + + if (ts_f > b.finish + 1) { + ts_f = ts_s; + rm_ts_f = rm_ts_s; + } + while (ts_f <= b.finish) + if (!ts_exist[ts_f++]) + rm_ts_f++; + + b.start -= rm_ts_s; + b.finish -= rm_ts_f; + } + _time_duration = ts_f - rm_ts_f; + } + + inline bool popupTogetherWith(MemorySolver::Box& box_new, const MemorySolver::Box& box_old) { + if (box_new.id + box_new.size > box_old.id && box_old.id + box_old.size > box_new.id) { + // Move the new one up. There is an intersection + box_new.id = box_old.id + box_old.size; + return true; + } else { + return false; + } + } + + /** + * @brief Solve memory location with maximal reuse. + * @return Size of common memory blob required for storing all + */ + int64_t solve() { + maxTopDepth(); // at first make sure that we no need more for boxes sorted by box.start + std::vector> time_slots(_time_duration); + for (auto& slot : time_slots) + slot.reserve(_top_depth); // 2D array [_time_duration][_top_depth] + + // Sort be box size. First is biggest + // Comment this line to check other order of box putting + std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r) { + return l.size > r.size; + }); + + int64_t _min_required = 0; + + for (Box& box : _boxes) { + // start from bottom and will lift it up if intersect with other present + int64_t id = box.id; + box.id = 0; // id will be used as a temp offset storage + bool popped_up; + do { + popped_up = false; + for (int i_slot = box.start; i_slot <= box.finish; i_slot++) { + for (auto* box_in_slot : time_slots[i_slot]) { + // intersect with already stored boxes for all covered time slots + // and move up the new one if needed + popped_up |= popupTogetherWith(box, *box_in_slot); + } + } + } while (popped_up); + + // add current box to covered time slot + for (int i_slot = box.start; i_slot <= box.finish; i_slot++) + time_slots[i_slot].push_back(&box); + + // store the max top bound for each box + _min_required = std::max(_min_required, box.id + box.size); + _offsets[id] = box.id; // TODO: move to constructor (use .insert instead of []) + } + + return _min_required; + } + + /** Provides calculated offset for specified box id */ + int64_t getOffset(int id) const { + auto res = _offsets.find(id); + if (res == _offsets.end()) + IE_THROW() << "There are no box for provided ID"; + return res->second; + } + + /** Additional info. Max sum of box sizes required for any time stamp. */ + int64_t maxDepth() { + if (_depth == -1) + calcDepth(); + return _depth; + } + /** Additional info. Max num of boxes required for any time stamp. */ + int64_t maxTopDepth() { + if (_top_depth == -1) + calcDepth(); + return _top_depth; + } + +private: + std::vector _boxes; + std::map _offsets; + int64_t _top_depth = -1; + int64_t _depth = -1; + int _time_duration = -1; + + void calcDepth() { + int64_t top_depth = 0; + int64_t depth = 0; + std::map> release_at; + + for (const Box& box : _boxes) { + int64_t time = box.start; + depth += box.size; + top_depth++; + + release_at[box.finish + 1].push_back(&box); + + for (const Box* b : release_at[time]) { + depth -= b->size; + top_depth--; + } + release_at.erase(time); + IE_ASSERT(top_depth > 0); + + _top_depth = std::max(_top_depth, top_depth); + _depth = std::max(_depth, depth); + } + } +}; diff --git a/inference-engine/tests/unit/cpu/mkldnn_memory_solver_test.cpp b/inference-engine/tests/unit/inference_engine/memory_solver_test.cpp similarity index 87% rename from inference-engine/tests/unit/cpu/mkldnn_memory_solver_test.cpp rename to inference-engine/tests/unit/inference_engine/memory_solver_test.cpp index 0debe313e98..c0327acbde6 100644 --- a/inference-engine/tests/unit/cpu/mkldnn_memory_solver_test.cpp +++ b/inference-engine/tests/unit/inference_engine/memory_solver_test.cpp @@ -6,35 +6,35 @@ #include #include -#include "mkldnn_memory_solver.hpp" +#include "memory_solver.hpp" -using Box = MKLDNNPlugin::MemorySolver::Box; +using Box = MemorySolver::Box; TEST(MemSolverTest, CanConstruct) { { // Empty vector - MKLDNNPlugin::MemorySolver ms(std::vector{}); + MemorySolver ms(std::vector{}); } { // vector with default Box - MKLDNNPlugin::MemorySolver ms(std::vector{{}}); + MemorySolver ms(std::vector{{}}); } { // vector with Box with non-default Box - MKLDNNPlugin::MemorySolver ms(std::vector{{1, 3, 3}}); + MemorySolver ms(std::vector{{1, 3, 3}}); } { // vector with Box with size == 0 - MKLDNNPlugin::MemorySolver ms(std::vector{{0, 0, 0}}); + MemorySolver ms(std::vector{{0, 0, 0}}); } { // vector with Box with finish == -1 - MKLDNNPlugin::MemorySolver ms(std::vector{{3, -1, 6}}); + MemorySolver ms(std::vector{{3, -1, 6}}); } - // TODO: enable after implement TODO from src/mkldnn_plugin/mkldnn_memory_solver.cpp#L17 + // TODO: enable after implement TODO from memory_solver.hpp#L66 // { // vector with Box with negative values -// MKLDNNPlugin::MemorySolver ms(std::vector {{-5, -5, -5, -5}}); +// MemorySolver ms(std::vector {{-5, -5, -5, -5}}); // } } @@ -47,7 +47,7 @@ TEST(MemSolverTest, GetOffset) { {n, ++n, 2, 3}, // 0 1 2 3 4 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); ms.solve(); // The correct answer is [0, 2, 0, 2] or [2, 0, 2, 0]. @@ -65,7 +65,7 @@ TEST(MemSolverTest, GetOffsetThrowException) { {n, ++n, 2, id++}, // 0 1 2 3 4 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); ms.solve(); EXPECT_THROW(ms.getOffset(100), InferenceEngine::Exception); @@ -79,7 +79,7 @@ TEST(MemSolverTest, LinearAndEven) { {n, ++n, 2}, // |__|____||____|__ }; // 0 1 2 3 - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 4); EXPECT_EQ(ms.maxDepth(), 4); EXPECT_EQ(ms.maxTopDepth(), 2); @@ -93,7 +93,7 @@ TEST(MemSolverTest, LinearAndNotEven) { {n, ++n, 3}, // |__|____||____|__ }; // 0 1 2 3 - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 5); EXPECT_EQ(ms.maxDepth(), 5); EXPECT_EQ(ms.maxTopDepth(), 2); @@ -108,7 +108,7 @@ TEST(MemSolverTest, LinearWithEmptyExecIndexes) { {n, n += 2, 3}, // |__|_______|___|_______|__ }; // 2 3 4 5 6 7 8 - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 5); EXPECT_EQ(ms.maxDepth(), 5); EXPECT_EQ(ms.maxTopDepth(), 2); @@ -122,7 +122,7 @@ TEST(MemSolverTest, DISABLED_Unefficiency) { {2, 3, 2}, // 2 3 4 5 6 7 8 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 5); // currently we have answer 6 EXPECT_EQ(ms.maxDepth(), 5); EXPECT_EQ(ms.maxTopDepth(), 2); @@ -136,7 +136,7 @@ TEST(MemSolverTest, OverlappingBoxes) { {2, 3, 2}, // 2 3 4 5 6 7 8 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 6); EXPECT_EQ(ms.maxDepth(), 6); EXPECT_EQ(ms.maxTopDepth(), 2); @@ -151,7 +151,7 @@ TEST(MemSolverTest, EndOnSeveralBegins) { {3, 4, 2}, // 0 1 2 3 4 5 6 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 6); EXPECT_EQ(ms.maxDepth(), 6); EXPECT_EQ(ms.maxTopDepth(), 3); @@ -166,7 +166,7 @@ TEST(MemSolverTest, ToEndBoxes) { {3, 4, 2}, // 0 1 2 3 4 5 6 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 8); EXPECT_EQ(ms.maxDepth(), 8); EXPECT_EQ(ms.maxTopDepth(), 4); @@ -181,7 +181,7 @@ TEST(MemSolverTest, LastAndToEndBox) { {3, 4, 2}, // 0 1 2 3 4 5 6 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 6); EXPECT_EQ(ms.maxDepth(), 6); EXPECT_EQ(ms.maxTopDepth(), 3); @@ -218,7 +218,7 @@ TEST(MemSolverTest, OptimalAlexnet) { for (const auto &sh : shapes) boxes.push_back({n, ++n, sh[0] * sh[1] * sh[2]}); // For linear topology bottom score is reachable minRequired == maxDepth - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), ms.maxDepth()); EXPECT_EQ(ms.maxTopDepth(), 2); } @@ -232,7 +232,7 @@ TEST(MemSolverTest, NoOverlapping) { {2, 4, 2, n++}, // 2 3 4 5 6 7 8 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); ms.solve(); // TODO: Current algorithm doesn't solve that case. Uncomment check to see inefficiency // EXPECT_EQ(ms.solve(), 5); @@ -258,7 +258,7 @@ TEST(MemSolverTest, BestSolution1) { {6, 7, 3, n++}, // 2 3 4 5 6 7 8 }; - MKLDNNPlugin::MemorySolver ms(boxes); + MemorySolver ms(boxes); EXPECT_EQ(ms.solve(), 5); auto no_overlap = [&](Box box1, Box box2) -> bool {