Memory leaks in tbbbind and onednn were fixed (#8825)

2021-12-01 12:56:29 +03:00 · 2021-12-01 12:56:29 +03:00 · 8a1b63ec51
commit 8a1b63ec51
parent 0c1902b8c5
2 changed files with 108 additions and 76 deletions
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@ -29,7 +29,11 @@ endif()

 function(ie_add_mkldnn)
    set(DNNL_ENABLE_JIT_PROFILING ${BUILD_SHARED_LIBS} CACHE BOOL "" FORCE)
-    set(DNNL_ENABLE_ITT_TASKS ${BUILD_SHARED_LIBS} CACHE BOOL "" FORCE)
+    if(BUILD_SHARED_LIBS AND ENABLE_PROFILING_ITT)
+        set(DNNL_ENABLE_ITT_TASKS ON CACHE BOOL "" FORCE)
+    else()
+        set(DNNL_ENABLE_ITT_TASKS OFF CACHE BOOL "" FORCE)
+    endif()
    set(DNNL_ENABLE_CONCURRENT_EXEC ON CACHE BOOL "" FORCE)
    set(DNNL_ENABLE_PRIMITIVE_CACHE OFF CACHE BOOL "" FORCE) ## TODO: try it later
    set(DNNL_ENABLE_MAX_CPU_ISA ON CACHE BOOL "" FORCE)
--- a/src/inference/src/threading/ie_parallel_custom_arena.cpp
+++ b/src/inference/src/threading/ie_parallel_custom_arena.cpp
@ -28,6 +28,7 @@ void __TBB_internal_initialize_system_topology(std::size_t groups_num,
                                               int*& numa_indexes_list,
                                               int& core_types_count,
                                               int*& core_types_indexes_list);
+void __TBB_internal_destroy_system_topology();
 binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots,
                                                         int numa_id,
                                                         int core_type_id,
@ -38,25 +39,6 @@ void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num)
 int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int max_threads_per_core);
 }

-static int get_processors_group_num() {
-#        if defined(_WIN32) || defined(_WIN64)
-    SYSTEM_INFO si;
-    GetNativeSystemInfo(&si);
-
-    DWORD_PTR pam, sam, m = 1;
-    GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam);
-    int nproc = 0;
-    for (std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1) {
-        if (pam & m)
-            ++nproc;
-    }
-    if (nproc == static_cast<int>(si.dwNumberOfProcessors)) {
-        return GetActiveProcessorGroupCount();
-    }
-#        endif
-    return 1;
-}
-
 static bool is_binding_environment_valid() {
 #        if defined(_WIN32) && !defined(_WIN64)
    static bool result = [] {
@ -73,37 +55,116 @@ static bool is_binding_environment_valid() {
 #        endif /* _WIN32 && !_WIN64 */
 }

-static int numa_nodes_count = 0;
-static int* numa_nodes_indexes = nullptr;
+#    endif

-static int core_types_count = 0;
-static int* core_types_indexes = nullptr;
-
-static void initialize_system_topology() {
-    static std::once_flag is_topology_initialized;
-
-    std::call_once(is_topology_initialized, [&] {
+class TBBbindSystemTopology {
+    TBBbindSystemTopology() {
+#    if USE_TBBBIND_2_5
        if (is_binding_environment_valid()) {
            __TBB_internal_initialize_system_topology(get_processors_group_num(),
                                                      numa_nodes_count,
                                                      numa_nodes_indexes,
                                                      core_types_count,
                                                      core_types_indexes);
-        } else {
-            static int dummy_index = task_arena::automatic;
-
-            numa_nodes_count = 1;
-            numa_nodes_indexes = &dummy_index;
-
-            core_types_count = 1;
-            core_types_indexes = &dummy_index;
        }
-    });
+#    endif
+    }
+
+public:
+    ~TBBbindSystemTopology() {
+#    if USE_TBBBIND_2_5
+        if (is_binding_environment_valid()) {
+            __TBB_internal_destroy_system_topology();
+        }
+#    endif
+    }
+
+    std::vector<numa_node_id> numa_nodes() const {
+#    if USE_TBBBIND_2_5
+        std::vector<numa_node_id> node_indexes(numa_nodes_count);
+        std::memcpy(node_indexes.data(), numa_nodes_indexes, numa_nodes_count * sizeof(int));
+        return node_indexes;
+#    elif TBB_NUMA_SUPPORT_PRESENT
+        return tbb::info::numa_nodes();
+#    else
+        return {tbb::task_arena::automatic};
+#    endif
+    }
+
+    std::vector<core_type_id> core_types() const {
+#    if USE_TBBBIND_2_5
+        std::vector<numa_node_id> core_type_indexes(core_types_count);
+        std::memcpy(core_type_indexes.data(), core_types_indexes, core_types_count * sizeof(int));
+        return core_type_indexes;
+#    elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
+        return tbb::info::core_types();
+#    else
+        return {tbb::task_arena::automatic};
+#    endif
+    }
+
+    int default_concurrency(task_arena::constraints c) const {
+        if (c.max_concurrency > 0) {
+            return c.max_concurrency;
+        }
+#    if USE_TBBBIND_2_5
+        if (is_binding_environment_valid()) {
+            return __TBB_internal_get_default_concurrency(c.numa_id, c.core_type, c.max_threads_per_core);
+        }
+        return tbb::this_task_arena::max_concurrency();
+#    elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
+        return tbb::info::default_concurrency(convert_constraints(c));
+#    elif TBB_NUMA_SUPPORT_PRESENT
+        return tbb::info::default_concurrency(c.numa_id);
+#    else
+        return tbb::this_task_arena::max_concurrency();
+#    endif
+    }
+
+    friend const TBBbindSystemTopology& system_topology();
+
+private:
+    int get_processors_group_num() const {
+#    if defined(_WIN32) || defined(_WIN64)
+        SYSTEM_INFO si;
+        GetNativeSystemInfo(&si);
+
+        DWORD_PTR pam, sam, m = 1;
+        GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam);
+        int nproc = 0;
+        for (std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1) {
+            if (pam & m)
+                ++nproc;
+        }
+        if (nproc == static_cast<int>(si.dwNumberOfProcessors)) {
+            return GetActiveProcessorGroupCount();
+        }
+#    endif
+        return 1;
+    }
+
+private:
+#    if USE_TBBBIND_2_5
+    int dummy_index = task_arena::automatic;
+
+    int numa_nodes_count = 1;
+    int* numa_nodes_indexes = &dummy_index;
+
+    int core_types_count = 1;
+    int* core_types_indexes = &dummy_index;
+#    endif
+};
+
+const TBBbindSystemTopology& system_topology() {
+    static TBBbindSystemTopology topology;
+    return topology;
 }

+#    if USE_TBBBIND_2_5
+
 binding_observer::binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c)
    : task_scheduler_observer(ta) {
-    detail::initialize_system_topology();
+    detail::system_topology();
    my_binding_handler =
        detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core);
 }
@ -219,52 +280,19 @@ int task_arena::max_concurrency() {

 namespace info {
 std::vector<numa_node_id> numa_nodes() {
-#    if USE_TBBBIND_2_5
-    detail::initialize_system_topology();
-    std::vector<numa_node_id> node_indexes(detail::numa_nodes_count);
-    std::memcpy(node_indexes.data(), detail::numa_nodes_indexes, detail::numa_nodes_count * sizeof(int));
-    return node_indexes;
-#    elif TBB_NUMA_SUPPORT_PRESENT
-    return tbb::info::numa_nodes();
-#    else
-    return {tbb::task_arena::automatic};
-#    endif
+    return detail::system_topology().numa_nodes();
 }

 std::vector<core_type_id> core_types() {
-#    if USE_TBBBIND_2_5
-    detail::initialize_system_topology();
-    std::vector<numa_node_id> core_type_indexes(detail::core_types_count);
-    std::memcpy(core_type_indexes.data(), detail::core_types_indexes, detail::core_types_count * sizeof(int));
-    return core_type_indexes;
-#    elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
-    return tbb::info::core_types();
-#    else
-    return {tbb::task_arena::automatic};
-#    endif
+    return detail::system_topology().core_types();
 }

 int default_concurrency(task_arena::constraints c) {
-    if (c.max_concurrency > 0) {
-        return c.max_concurrency;
-    }
-#    if USE_TBBBIND_2_5
-    if (detail::is_binding_environment_valid()) {
-        detail::initialize_system_topology();
-        return detail::__TBB_internal_get_default_concurrency(c.numa_id, c.core_type, c.max_threads_per_core);
-    }
-    return tbb::this_task_arena::max_concurrency();
-#    elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
-    return tbb::info::default_concurrency(convert_constraints(c));
-#    elif TBB_NUMA_SUPPORT_PRESENT
-    return tbb::info::default_concurrency(c.numa_id);
-#    else
-    return tbb::this_task_arena::max_concurrency();
-#    endif
+    return detail::system_topology().default_concurrency(c);
 }

 int default_concurrency(numa_node_id id) {
-    return default_concurrency(task_arena::constraints{}.set_numa_id(id));
+    return detail::system_topology().default_concurrency(task_arena::constraints{}.set_numa_id(id));
 }

 }  // namespace info