From 7cad047f53c6052654e38262768931d8dd48cac4 Mon Sep 17 00:00:00 2001 From: Ivan Kochin Date: Wed, 28 Apr 2021 21:27:11 +0300 Subject: [PATCH] Improve Hybrid CPUs support implementation (#5433) * Make find_package(TBBBIND_2_4) silent + Remove inheritance from tbb::task_arena for custom::task_arena * Make my_binding_observer unique_ptr Co-authored-by: Kochin, Ivan --- .../src/inference_engine/CMakeLists.txt | 5 +- .../threading/ie_cpu_streams_executor.cpp | 4 +- .../threading/ie_parallel_custom_arena.cpp | 96 ++++++++----------- .../threading/ie_parallel_custom_arena.hpp | 38 ++++++-- 4 files changed, 78 insertions(+), 65 deletions(-) diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index 9d1b9009867..7c9200b3ded 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -5,7 +5,10 @@ set (TARGET_NAME "inference_engine") if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") - find_package(TBBBIND_2_4) + find_package(TBBBIND_2_4 QUIET) + if (TBBBIND_2_4_FOUND) + message(STATUS "Static tbbbind_2_4 package was found") + endif() endif() file (GLOB LIBRARY_SRC diff --git a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp index ddd4f9cb2c4..8a49cc70b0b 100644 --- a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp +++ b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp @@ -32,14 +32,14 @@ struct CPUStreamsExecutor::Impl { int _ncpus = 0; int _threadBindingStep = 0; int _offset = 0; - Observer(tbb::task_arena& arena, + Observer(custom::task_arena& arena, CpuSet mask, int ncpus, const int streamId, const int threadsPerStream, const int threadBindingStep, const int threadBindingOffset) : - tbb::task_scheduler_observer(arena), + tbb::task_scheduler_observer(static_cast(arena)), _mask{std::move(mask)}, _ncpus(ncpus), _threadBindingStep(threadBindingStep), diff --git a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp b/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp index 4030ea83430..e607300fea5 100755 --- a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp +++ b/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp @@ -22,8 +22,6 @@ namespace custom { namespace detail { #if USE_TBBBIND_2_4 -class binding_handler; - extern "C" { void __TBB_internal_initialize_system_topology( std::size_t groups_num, @@ -98,40 +96,34 @@ void initialize_system_topology() { }); } -class binding_observer : public tbb::task_scheduler_observer { - binding_handler* my_binding_handler; -public: - binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) - : task_scheduler_observer(ta) { - detail::initialize_system_topology(); - my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core); - } - ~binding_observer() { - detail::__TBB_internal_deallocate_binding_handler(my_binding_handler); - } +binding_observer::binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) + : task_scheduler_observer(ta) { + detail::initialize_system_topology(); + my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core); +} - void on_scheduler_entry(bool) override { - detail::__TBB_internal_apply_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); - } - void on_scheduler_exit(bool) override { - detail::__TBB_internal_restore_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); - } -}; +binding_observer::~binding_observer() { + detail::__TBB_internal_deallocate_binding_handler(my_binding_handler); +} -binding_observer* construct_binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) { - binding_observer* observer = nullptr; +void binding_observer::on_scheduler_entry(bool) { + detail::__TBB_internal_apply_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); +} + +void binding_observer::on_scheduler_exit(bool) { + detail::__TBB_internal_restore_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); +} + +binding_oberver_ptr construct_binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) { + binding_oberver_ptr observer{}; if (detail::is_binding_environment_valid() && ((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) || c.max_threads_per_core > 0)) { - observer = new binding_observer(ta, num_slots, c); + observer.reset(new binding_observer{ta, num_slots, c}); observer->observe(true); } return observer; } -void destroy_binding_observer(binding_observer* observer) { - observer->observe(false); - delete observer; -} #endif /*USE_TBBBIND_2_4*/ #if TBB_NUMA_SUPPORT_PRESENT @@ -149,52 +141,52 @@ tbb::task_arena::constraints convert_constraints(custom::task_arena::constraints } // namespace detail task_arena::task_arena(int max_concurrency_, unsigned reserved_for_masters) - : tbb::task_arena{max_concurrency_, reserved_for_masters} + : my_task_arena{max_concurrency_, reserved_for_masters} , my_initialization_state{} , my_constraints{} - , my_binding_observer{nullptr} + , my_binding_observer{} {} task_arena::task_arena(const constraints& constraints_, unsigned reserved_for_masters) - : tbb::task_arena{info::default_concurrency(constraints_), reserved_for_masters} + : my_task_arena{info::default_concurrency(constraints_), reserved_for_masters} , my_initialization_state{} , my_constraints{constraints_} - , my_binding_observer{nullptr} + , my_binding_observer{} {} task_arena::task_arena(const task_arena &s) - : tbb::task_arena{s} + : my_task_arena{s.my_task_arena} , my_initialization_state{} , my_constraints{s.my_constraints} - , my_binding_observer{nullptr} + , my_binding_observer{} {} void task_arena::initialize() { #if USE_TBBBIND_2_4 std::call_once(my_initialization_state, [this] { - tbb::task_arena::initialize(); + my_task_arena.initialize(); my_binding_observer = detail::construct_binding_observer( - *this, tbb::task_arena::max_concurrency(), my_constraints); + my_task_arena, my_task_arena.max_concurrency(), my_constraints); }); #elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT - tbb::task_arena::initialize(convert_constraints(my_constraints)); + my_task_arena.initialize(convert_constraints(my_constraints)); #else - tbb::task_arena::initialize(); + my_task_arena.initialize(); #endif } void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) { #if USE_TBBBIND_2_4 std::call_once(my_initialization_state, [this, &max_concurrency_, &reserved_for_masters] { - tbb::task_arena::initialize(max_concurrency_, reserved_for_masters); + my_task_arena.initialize(max_concurrency_, reserved_for_masters); my_binding_observer = detail::construct_binding_observer( - *this, tbb::task_arena::max_concurrency(), my_constraints); + my_task_arena, my_task_arena.max_concurrency(), my_constraints); }); #elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT my_constraints.max_concurrency = max_concurrency_; - tbb::task_arena::initialize(convert_constraints(my_constraints), reserved_for_masters); + my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters); #else - tbb::task_arena::initialize(max_concurrency_, reserved_for_masters); + my_task_arena.initialize(max_concurrency_, reserved_for_masters); #endif } @@ -202,28 +194,24 @@ void task_arena::initialize(constraints constraints_, unsigned reserved_for_mast std::call_once(my_initialization_state, [this, &constraints_, &reserved_for_masters] { my_constraints = constraints_; #if USE_TBBBIND_2_4 - tbb::task_arena::initialize(info::default_concurrency(constraints_), reserved_for_masters); + my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters); my_binding_observer = detail::construct_binding_observer( - *this, tbb::task_arena::max_concurrency(), my_constraints); + my_task_arena, my_task_arena.max_concurrency(), my_constraints); #elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT - tbb::task_arena::initialize(convert_constraints(my_constraints), reserved_for_masters); + my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters); #else - tbb::task_arena::initialize(my_constraints.max_concurrency, reserved_for_masters); + my_task_arena.initialize(my_constraints.max_concurrency, reserved_for_masters); #endif }); } -int task_arena::max_concurrency() { - initialize(); - return tbb::task_arena::max_concurrency(); +task_arena::operator tbb::task_arena&() { + return my_task_arena; } -task_arena::~task_arena() { -#if USE_TBBBIND_2_4 - if (my_binding_observer != nullptr) { - detail::destroy_binding_observer(my_binding_observer); - } -#endif +int task_arena::max_concurrency() { + initialize(); + return my_task_arena.max_concurrency(); } namespace info { diff --git a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp b/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp index c8005ba82db..98e6f58bdd8 100755 --- a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp +++ b/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace custom { @@ -26,7 +27,7 @@ using core_type_id = int; namespace detail { struct constraints { - constraints(numa_node_id id = -1, int maximal_concurrency = -1) + constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic) : numa_id{id} , max_concurrency{maximal_concurrency} , core_type{tbb::task_arena::automatic} @@ -56,13 +57,34 @@ struct constraints { int max_threads_per_core = tbb::task_arena::automatic; }; -class binding_observer; +class binding_handler; + +class binding_observer : public tbb::task_scheduler_observer { + binding_handler* my_binding_handler; +public: + binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c); + ~binding_observer(); + + void on_scheduler_entry(bool) override; + void on_scheduler_exit(bool) override; +}; + +struct binding_observer_deleter { + void operator()(binding_observer* observer) const { + observer->observe(false); + delete observer; + } +}; + +using binding_oberver_ptr = std::unique_ptr; + } // namespace detail -class task_arena : public tbb::task_arena { +class task_arena { + tbb::task_arena my_task_arena; std::once_flag my_initialization_state; detail::constraints my_constraints; - detail::binding_observer* my_binding_observer; + detail::binding_oberver_ptr my_binding_observer; public: using constraints = detail::constraints; @@ -76,20 +98,20 @@ public: void initialize(int max_concurrency_, unsigned reserved_for_masters = 1); void initialize(constraints constraints_, unsigned reserved_for_masters = 1); + explicit operator tbb::task_arena&(); + int max_concurrency(); template void enqueue(F&& f) { initialize(); - tbb::task_arena::enqueue(std::forward(f)); + my_task_arena.enqueue(std::forward(f)); } template auto execute(F&& f) -> decltype(f()) { initialize(); - return tbb::task_arena::execute(std::forward(f)); + return my_task_arena.execute(std::forward(f)); } - - ~task_arena(); }; namespace info {