Move internal api to ov (#15964)

* Move cpu streams executor to new API * Remove legacy headers from new dev API * Fixed build issues * Fixed build * Fixed typo * Fixed typo * Fixed build * Fixed code style * Add exception for template constructor of SoPtr
2023-03-01 14:00:55 +04:00
parent 21ac61fef5
commit 113aefa3ff
26 changed files with 726 additions and 428 deletions
--- a/cmake/developer_package/ncc_naming_style/openvino.style
+++ b/cmake/developer_package/ncc_naming_style/openvino.style
@@ -12,7 +12,7 @@ TemplateNonTypeParameter: '^\w*$'
 ClassTemplate: '^([A-Z][\w]+|element_type_traits)$'
 TemplateTypeParameter: '^\w*$'
 ParameterName: '^\w*$'
-FunctionTemplate: '^(operator.+|[\w]+|Impl<.*>)$'
+FunctionTemplate: '^(operator.+|[\w]+|SoPtr.+|Impl<.*>)$'
 TypeAliasName: '^\w+$'
 VariableReference: '^\w+$'

--- a/src/inference/dev_api/openvino/runtime/icore.hpp
+++ b/src/inference/dev_api/openvino/runtime/icore.hpp
@@ -13,14 +13,14 @@

 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/so_ptr.hpp"
 #include "openvino/runtime/tensor.hpp"
-#include "so_ptr.hpp"

 namespace ov {

 /**
 * @interface ICore
- * @brief Minimal ICore interface to allow plugin to get information from Core Inference Engine class.
+ * @brief Minimal ICore interface to allow plugin to get information from Core OpenVINO class.
 * @ingroup ov_dev_api_plugin_api
 */
 class ICore {
--- a/src/inference/dev_api/openvino/runtime/so_ptr.hpp
+++ b/src/inference/dev_api/openvino/runtime/so_ptr.hpp
@@ -0,0 +1,92 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief This is a wrapper class for handling plugin instantiation and releasing resources
+ * @file openvino/runtime/so_ptr.hpp
+ */
+#pragma once
+
+#include <cassert>
+#include <functional>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+#include "openvino/runtime/common.hpp"
+
+namespace ov {
+
+/**
+ * @brief This class instantiate object using shared library
+ * @tparam T An type of object SoPtr can hold
+ */
+template <class T>
+struct SoPtr {
+    /**
+     * @brief Default constructor
+     */
+    SoPtr() = default;
+
+    /**
+     * @brief Destructor preserves unloading order of implementation object and reference to library
+     */
+    ~SoPtr() {
+        _ptr = {};
+    }
+
+    /**
+     * @brief Constructs an object with existing shared object reference and loaded pointer
+     * @param ptr pointer to the loaded object
+     * @param so Existing reference to library
+     */
+    SoPtr(const std::shared_ptr<T>& ptr, const std::shared_ptr<void>& so) : _ptr{ptr}, _so{so} {}
+
+    /**
+     * @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
+     * @param that copied SoPtr object
+     */
+    template <typename U>
+    SoPtr(const SoPtr<U>& that) : _ptr{std::dynamic_pointer_cast<T>(that._ptr)},
+                                  _so{that._so} {
+        IE_ASSERT(_ptr != nullptr);
+    }
+
+    /**
+     * @brief Standard pointer operator
+     * @return underlined interface with disabled Release method
+     */
+    T* operator->() const noexcept {
+        return _ptr.get();
+    }
+
+    explicit operator bool() const noexcept {
+        return _ptr != nullptr;
+    }
+
+    friend bool operator==(std::nullptr_t, const SoPtr& ptr) noexcept {
+        return !ptr;
+    }
+    friend bool operator==(const SoPtr& ptr, std::nullptr_t) noexcept {
+        return !ptr;
+    }
+    friend bool operator!=(std::nullptr_t, const SoPtr& ptr) noexcept {
+        return static_cast<bool>(ptr);
+    }
+    friend bool operator!=(const SoPtr& ptr, std::nullptr_t) noexcept {
+        return static_cast<bool>(ptr);
+    }
+
+    /**
+     * @brief Gets a smart pointer to the custom object
+     */
+    std::shared_ptr<T> _ptr;
+
+    /**
+     * @brief The shared object or dynamic loaded library
+     */
+    std::shared_ptr<void> _so;
+};
+
+}  // namespace ov
--- a/src/inference/dev_api/openvino/runtime/threading/executor_manager.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/executor_manager.hpp
@@ -12,7 +12,6 @@
 #include "openvino/runtime/common.hpp"
 #include "openvino/runtime/threading/istreams_executor.hpp"
 #include "openvino/runtime/threading/itask_executor.hpp"
-#include "threading/ie_istreams_executor.hpp"

 namespace ov {

--- a/src/inference/dev_api/openvino/runtime/threading/immediate_executor.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/immediate_executor.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @file openvino/runtime/threading/immediate_executor.hpp
+ * @brief A header file for OpenVINO Immediate Executor implementation
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "threading/ie_itask_executor.hpp"
+
+namespace ov {
+namespace threading {
+
+/**
+ * @brief Task executor implementation that just run tasks in current thread during calling of run() method
+ * @ingroup ov_dev_api_threading
+ */
+class ImmediateExecutor : public ITaskExecutor {
+public:
+    /**
+     * @brief Destroys the object.
+     */
+    ~ImmediateExecutor() override = default;
+
+    void run(Task task) override {
+        task();
+    }
+};
+
+}  // namespace threading
+}  // namespace ov
--- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
@@ -3,8 +3,8 @@
 //

 /**
- * @file ie_istreams_executor.hpp
- * @brief A header file for Inference Engine Streams-based Executor Interface
+ * @file openvino/runtime/threading/istreams_executor.hpp
+ * @brief A header file for OpenVINO Streams-based Executor Interface
 */

 #pragma once
@@ -85,7 +85,7 @@ public:

        std::string _name;          //!< Used by `ITT` to name executor threads
        int _streams = 1;           //!< Number of streams.
-        int _threadsPerStream = 0;  //!< Number of threads per stream that executes `ie_parallel` calls
+        int _threadsPerStream = 0;  //!< Number of threads per stream that executes `ov_parallel` calls
        ThreadBindingType _threadBindingType = ThreadBindingType::NONE;  //!< Thread binding to hardware resource type.
                                                                         //!< No binding by default
        int _threadBindingStep = 1;                                      //!< In case of @ref CORES binding offset type
--- a/src/inference/dev_api/openvino/runtime/threading/thread_local.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/thread_local.hpp
@@ -0,0 +1,124 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+/**
+ * @brief A file containing thread local class implementation.
+ * @file openvino/runtime/threading/thread_local.hpp
+ */
+
+#include "openvino/core/parallel.hpp"
+
+#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
+#    include <tbb/enumerable_thread_specific.h>
+#else
+#    include <functional>
+#    include <memory>
+#    include <mutex>
+#    include <thread>
+#    include <unordered_map>
+#    include <utility>
+#endif
+
+namespace ov {
+namespace threading {
+
+#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
+
+/**
+ * @brief A wrapper class to keep object to be thread local.
+ * @ingroup ov_dev_api_threading
+ * @tparam T A type of object to keep thread local.
+ */
+template <typename T>
+using ThreadLocal = tbb::enumerable_thread_specific<T>;
+
+#else
+
+template <typename T>
+struct ThreadLocal {
+    using Map = std::unordered_map<std::thread::id, T>;
+    using Create = std::function<T()>;
+    Map _map;
+    mutable std::mutex _mutex;
+    Create _create;
+
+    ThreadLocal()
+        : _create{[] {
+              return T{};
+          }} {}
+    explicit ThreadLocal(const T& init)
+        : _create{[init] {
+              return init;
+          }} {}
+    ThreadLocal(ThreadLocal&& other) : _map{std::move(other._map)}, _create{std::move(other._create)} {}
+    ThreadLocal& operator=(ThreadLocal&& other) {
+        _map = std::move(other._map);
+        _create = std::move(other._create);
+        return *this;
+    }
+    ThreadLocal(const ThreadLocal&) = delete;
+    ThreadLocal& operator=(const ThreadLocal&&) = delete;
+    explicit ThreadLocal(const Create& create_) : _create{create_} {}
+
+    T& local() {
+        auto threadId = std::this_thread::get_id();
+        std::lock_guard<std::mutex> lock{_mutex};
+        auto itThreadLocal = _map.find(threadId);
+        if (itThreadLocal != _map.end()) {
+            return itThreadLocal->second;
+        } else {
+            return _map.emplace(threadId, _create()).first->second;
+        }
+    }
+
+    auto size() const -> decltype(_map.size()) {
+        std::lock_guard<std::mutex> lock{_mutex};
+        return _map.size();
+    }
+
+    // WARNING: Thread Unsafe
+    template <typename It>
+    struct Iterator {
+        It it;
+        bool operator!=(const Iterator& other) {
+            return it != other.it;
+        }
+        Iterator& operator++() {
+            ++it;
+            return *this;
+        }
+        auto operator*() -> decltype(it->second) {
+            return it->second;
+        }
+        auto operator-> () -> decltype(&(it->second)) {
+            return &(it->second);
+        }
+        auto operator*() const -> decltype(it->second) {
+            return it->second;
+        }
+        auto operator-> () const -> decltype(&(it->second)) {
+            return &(it->second);
+        }
+    };
+
+    auto begin() -> Iterator<decltype(_map.begin())> {
+        return {_map.begin()};
+    }
+    auto end() -> Iterator<decltype(_map.end())> {
+        return {_map.end()};
+    }
+    auto begin() const -> Iterator<decltype(_map.begin())> const {
+        return {_map.begin()};
+    }
+    auto end() const -> Iterator<decltype(_map.end())> const {
+        return {_map.end()};
+    }
+};
+
+#endif
+
+}  // namespace threading
+}  // namespace ov
--- a/src/inference/dev_api/so_ptr.hpp
+++ b/src/inference/dev_api/so_ptr.hpp
@@ -8,85 +8,4 @@
 */
 #pragma once

-#include <cassert>
-#include <functional>
-#include <memory>
-#include <string>
-#include <type_traits>
-
-#include "openvino/runtime/common.hpp"
-
-namespace ov {
-
-/**
- * @brief This class instantiate object using shared library
- * @tparam T An type of object SoPtr can hold
- */
-template <class T>
-struct SoPtr {
-    /**
-     * @brief Default constructor
-     */
-    SoPtr() = default;
-
-    /**
-     * @brief Destructor preserves unloading order of implementation object and reference to library
-     */
-    ~SoPtr() {
-        _ptr = {};
-    }
-
-    /**
-     * @brief Constructs an object with existing shared object reference and loaded pointer
-     * @param ptr pointer to the loaded object
-     * @param so Existing reference to library
-     */
-    SoPtr(const std::shared_ptr<T>& ptr, const std::shared_ptr<void>& so) : _ptr{ptr}, _so{so} {}
-
-    /**
-     * @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
-     * @param that copied SoPtr object
-     */
-    template <typename U>
-    SoPtr(const SoPtr<U>& that) : _ptr{std::dynamic_pointer_cast<T>(that._ptr)},
-                                  _so{that._so} {
-        IE_ASSERT(_ptr != nullptr);
-    }
-
-    /**
-     * @brief Standard pointer operator
-     * @return underlined interface with disabled Release method
-     */
-    T* operator->() const noexcept {
-        return _ptr.get();
-    }
-
-    explicit operator bool() const noexcept {
-        return _ptr != nullptr;
-    }
-
-    friend bool operator==(std::nullptr_t, const SoPtr& ptr) noexcept {
-        return !ptr;
-    }
-    friend bool operator==(const SoPtr& ptr, std::nullptr_t) noexcept {
-        return !ptr;
-    }
-    friend bool operator!=(std::nullptr_t, const SoPtr& ptr) noexcept {
-        return static_cast<bool>(ptr);
-    }
-    friend bool operator!=(const SoPtr& ptr, std::nullptr_t) noexcept {
-        return static_cast<bool>(ptr);
-    }
-
-    /**
-     * @brief Gets a smart pointer to the custom object
-     */
-    std::shared_ptr<T> _ptr;
-
-    /**
-     * @brief The shared object or dynamic loaded library
-     */
-    std::shared_ptr<void> _so;
-};
-
-}  // namespace ov
+#include "openvino/runtime/so_ptr.hpp"
--- a/src/inference/dev_api/threading/ie_immediate_executor.hpp
+++ b/src/inference/dev_api/threading/ie_immediate_executor.hpp
@@ -12,6 +12,7 @@
 #include <memory>
 #include <string>

+#include "openvino/runtime/threading/immediate_executor.hpp"
 #include "threading/ie_itask_executor.hpp"

 namespace InferenceEngine {
--- a/src/inference/src/dev/converter_utils.cpp
+++ b/src/inference/src/dev/converter_utils.cpp
@@ -33,10 +33,10 @@
 #include "openvino/runtime/ivariable_state.hpp"
 #include "openvino/runtime/profiling_info.hpp"
 #include "openvino/runtime/remote_context.hpp"
+#include "openvino/runtime/so_ptr.hpp"
 #include "openvino/runtime/tensor.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/runtime/variable_state.hpp"
-#include "so_ptr.hpp"
 #include "threading/ie_executor_manager.hpp"
 #include "transformations/utils/utils.hpp"

--- a/src/inference/src/dev/iasync_infer_request.cpp
+++ b/src/inference/src/dev/iasync_infer_request.cpp
@@ -8,16 +8,16 @@

 #include "openvino/runtime/isync_infer_request.hpp"
 #include "openvino/runtime/ivariable_state.hpp"
+#include "openvino/runtime/threading/immediate_executor.hpp"
+#include "openvino/runtime/threading/istreams_executor.hpp"
 #include "openvino/runtime/variable_state.hpp"
-#include "threading/ie_immediate_executor.hpp"
-#include "threading/ie_istreams_executor.hpp"

 namespace {

 struct ImmediateStreamsExecutor : public ov::threading::ITaskExecutor {
    explicit ImmediateStreamsExecutor(const std::shared_ptr<ov::threading::IStreamsExecutor>& streamsExecutor)
        : _streamsExecutor{streamsExecutor} {}
-    void run(InferenceEngine::Task task) override {
+    void run(ov::threading::Task task) override {
        _streamsExecutor->execute(std::move(task));
    }
    std::shared_ptr<ov::threading::IStreamsExecutor> _streamsExecutor;
@@ -40,10 +40,10 @@ ov::IAsyncInferRequest::IAsyncInferRequest(const std::shared_ptr<IInferRequest>&
                           m_sync_request->infer();
                       }}};
    if (m_sync_request)
-        m_sync_pipeline = {{std::make_shared<InferenceEngine::ImmediateExecutor>(), [this] {
+        m_sync_pipeline = {{std::make_shared<ov::threading::ImmediateExecutor>(), [this] {
                                m_sync_request->infer();
                            }}};
-    auto streams_executor = std::dynamic_pointer_cast<InferenceEngine::IStreamsExecutor>(m_request_executor);
+    auto streams_executor = std::dynamic_pointer_cast<ov::threading::IStreamsExecutor>(m_request_executor);
    if (streams_executor != nullptr) {
        m_sync_pipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streams_executor)), [this] {
                                m_sync_request->infer();
@@ -123,7 +123,7 @@ void ov::IAsyncInferRequest::run_first_stage(const Pipeline::iterator itBeginSta
    firstStageExecutor->run(make_next_stage_task(itBeginStage, itEndStage, std::move(callbackExecutor)));
 }

-InferenceEngine::Task ov::IAsyncInferRequest::make_next_stage_task(
+ov::threading::Task ov::IAsyncInferRequest::make_next_stage_task(
    const Pipeline::iterator itStage,
    const Pipeline::iterator itEndStage,
    const std::shared_ptr<ov::threading::ITaskExecutor> callbackExecutor) {
--- a/src/inference/src/dev/icompiled_model.cpp
+++ b/src/inference/src/dev/icompiled_model.cpp
@@ -4,7 +4,6 @@

 #include "openvino/runtime/icompiled_model.hpp"

-#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
 #include "icompiled_model_wrapper.hpp"
 #include "openvino/core/model.hpp"
 #include "transformations/utils/utils.hpp"
--- a/src/inference/src/dev/threading/cpu_streams_executor.cpp
+++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp
@@ -11,12 +11,12 @@
 #include <thread>
 #include <vector>

+#include "dev/threading/parallel_custom_arena.hpp"
+#include "dev/threading/thread_affinity.hpp"
 #include "openvino/itt.hpp"
 #include "openvino/runtime/system_conf.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
-#include "threading/ie_parallel_custom_arena.hpp"
-#include "threading/ie_thread_affinity.hpp"
-#include "threading/ie_thread_local.hpp"
+#include "openvino/runtime/threading/thread_local.hpp"

 namespace ov {
 namespace threading {
@@ -24,13 +24,13 @@ struct CPUStreamsExecutor::Impl {
    struct Stream {
 #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
        struct Observer : public custom::task_scheduler_observer {
-            InferenceEngine::CpuSet _mask;
+            CpuSet _mask;
            int _ncpus = 0;
            int _threadBindingStep = 0;
            int _offset = 0;
            int _cpuIdxOffset = 0;
            Observer(custom::task_arena& arena,
-                     InferenceEngine::CpuSet mask,
+                     CpuSet mask,
                     int ncpus,
                     const int streamId,
                     const int threadsPerStream,
@@ -44,14 +44,14 @@ struct CPUStreamsExecutor::Impl {
                  _offset{streamId * threadsPerStream + threadBindingOffset},
                  _cpuIdxOffset(cpuIdxOffset) {}
            void on_scheduler_entry(bool) override {
-                InferenceEngine::PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
-                                                       _threadBindingStep,
-                                                       _ncpus,
-                                                       _mask,
-                                                       _cpuIdxOffset);
+                pin_thread_to_vacant_core(_offset + tbb::this_task_arena::current_thread_index(),
+                                          _threadBindingStep,
+                                          _ncpus,
+                                          _mask,
+                                          _cpuIdxOffset);
            }
            void on_scheduler_exit(bool) override {
-                PinCurrentThreadByMask(_ncpus, _mask);
+                pin_current_thread_by_mask(_ncpus, _mask);
            }
            ~Observer() override = default;
        };
@@ -136,9 +136,9 @@ struct CPUStreamsExecutor::Impl {
                    _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
                                                                .set_core_type(selected_core_type)
                                                                .set_max_concurrency(max_concurrency)});
-                    InferenceEngine::CpuSet processMask;
+                    CpuSet processMask;
                    int ncpus = 0;
-                    std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
+                    std::tie(processMask, ncpus) = get_process_mask();
                    if (nullptr != processMask) {
                        _observer.reset(new Observer{*_taskArena,
                                                     std::move(processMask),
@@ -157,9 +157,9 @@ struct CPUStreamsExecutor::Impl {
                       (ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
                _taskArena.reset(new custom::task_arena{concurrency});
                if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
-                    InferenceEngine::CpuSet processMask;
+                    CpuSet processMask;
                    int ncpus = 0;
-                    std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
+                    std::tie(processMask, ncpus) = get_process_mask();
                    if (nullptr != processMask) {
                        _observer.reset(new Observer{*_taskArena,
                                                     std::move(processMask),
@@ -175,32 +175,29 @@ struct CPUStreamsExecutor::Impl {
 #elif OV_THREAD == OV_THREAD_OMP
            omp_set_num_threads(_impl->_config._threadsPerStream);
            if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
-                InferenceEngine::CpuSet processMask;
+                CpuSet processMask;
                int ncpus = 0;
-                std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
+                std::tie(processMask, ncpus) = get_process_mask();
                if (nullptr != processMask) {
                    parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) {
                        int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex +
                                     _impl->_config._threadBindingOffset;
-                        InferenceEngine::PinThreadToVacantCore(thrIdx,
-                                                               _impl->_config._threadBindingStep,
-                                                               ncpus,
-                                                               processMask);
+                        pin_thread_to_vacant_core(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
                    });
                }
            }
 #elif OV_THREAD == OV_THREAD_SEQ
            if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
-                InferenceEngine::PinCurrentThreadToSocket(_numaNodeId);
+                pin_current_thread_to_socket(_numaNodeId);
            } else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
-                InferenceEngine::CpuSet processMask;
+                CpuSet processMask;
                int ncpus = 0;
-                std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
+                std::tie(processMask, ncpus) = get_process_mask();
                if (nullptr != processMask) {
-                    InferenceEngine::PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset,
-                                                           _impl->_config._threadBindingStep,
-                                                           ncpus,
-                                                           processMask);
+                    pin_thread_to_vacant_core(_streamId + _impl->_config._threadBindingOffset,
+                                              _impl->_config._threadBindingStep,
+                                              ncpus,
+                                              processMask);
                }
            }
 #endif
@@ -342,7 +339,7 @@ struct CPUStreamsExecutor::Impl {
    std::queue<Task> _taskQueue;
    bool _isStopped = false;
    std::vector<int> _usedNumaNodes;
-    InferenceEngine::ThreadLocal<std::shared_ptr<Stream>> _streams;
+    ov::threading::ThreadLocal<std::shared_ptr<Stream>> _streams;
 #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
    // stream id mapping to the core type
    // stored in the reversed order (so the big cores, with the highest core_type_id value, are populated first)
--- a/src/inference/src/dev/threading/executor_manager.cpp
+++ b/src/inference/src/dev/threading/executor_manager.cpp
@@ -7,7 +7,6 @@
 #include "openvino/core/parallel.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/threading/cpu_streams_executor.hpp"
-#include "threading/ie_cpu_streams_executor.hpp"
 #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
 #    if (TBB_INTERFACE_VERSION < 12000)
 #        include <tbb/task_scheduler_init.h>
--- a/src/inference/src/dev/threading/itt.hpp
+++ b/src/inference/src/dev/threading/itt.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief Defines openvino tbbbind domains for tracing
+ * @file openvino/runtime/threading/itt.hpp
+ */
+
+#pragma once
+
+#include "openvino/cc/selective_build.h"
+#include "openvino/core/except.hpp"
+#include "openvino/itt.hpp"
+
+namespace ov {
+namespace tbbbind {
+namespace itt {
+namespace domains {
+OV_ITT_DOMAIN(tbb_bind);
+}  // namespace domains
+}  // namespace itt
+}  // namespace tbbbind
+}  // namespace ov
+
+OV_CC_DOMAINS(tbb_bind);
+
+/*
+ * TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used.
+ */
+#if defined(SELECTIVE_BUILD_ANALYZER)
+
+#    define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region)
+#    define TBB_BIND_NUMA_ENABLED  OV_SCOPE(tbb_bind, NUMA)
+
+#elif defined(SELECTIVE_BUILD)
+
+#    define TBB_BIND_SCOPE(region)                                        \
+        if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \
+            OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1)
+#    define TBB_BIND_NUMA_ENABLED
+
+#else
+
+#    define TBB_BIND_SCOPE(region)
+#    define TBB_BIND_NUMA_ENABLED
+#endif
+
--- a/src/inference/src/dev/threading/parallel_custom_arena.cpp
+++ b/src/inference/src/dev/threading/parallel_custom_arena.cpp
@@ -1,13 +1,13 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "ie_parallel_custom_arena.hpp"
+#include "dev/threading/parallel_custom_arena.hpp"

 #include <cstring>

-#include "itt.hpp"
+#include "dev/threading/itt.hpp"

-#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
+#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO

 #    ifndef TBBBIND_2_5_AVAILABLE
 #        define TBBBIND_2_5_AVAILABLE 0
@@ -329,4 +329,4 @@ int default_concurrency(numa_node_id id) {

 }  // namespace info
 }  // namespace custom
-#endif /*IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO*/
+#endif /*OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO*/
--- a/src/inference/src/dev/threading/parallel_custom_arena.hpp
+++ b/src/inference/src/dev/threading/parallel_custom_arena.hpp
@@ -0,0 +1,141 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief Contains declarations and custom threading interfaces based on TBB info and task_arena APIs.
+ *
+ * @file dev/threading/parallel_custom_arena.hpp
+ */
+
+#pragma once
+
+#include "openvino/core/parallel.hpp"
+
+#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
+
+#    include <cstddef>
+#    include <memory>
+#    include <mutex>
+#    include <type_traits>
+#    include <vector>
+
+namespace custom {
+
+using numa_node_id = int;
+using core_type_id = int;
+
+namespace detail {
+struct constraints {
+    constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
+        : numa_id{id},
+          max_concurrency{maximal_concurrency},
+          core_type{tbb::task_arena::automatic},
+          max_threads_per_core{tbb::task_arena::automatic} {}
+
+    constraints& set_numa_id(numa_node_id id) {
+        numa_id = id;
+        return *this;
+    }
+    constraints& set_max_concurrency(int maximal_concurrency) {
+        max_concurrency = maximal_concurrency;
+        return *this;
+    }
+    constraints& set_core_type(core_type_id id) {
+        core_type = id;
+        return *this;
+    }
+    constraints& set_max_threads_per_core(int threads_number) {
+        max_threads_per_core = threads_number;
+        return *this;
+    }
+
+    numa_node_id numa_id = tbb::task_arena::automatic;
+    int max_concurrency = tbb::task_arena::automatic;
+    core_type_id core_type = tbb::task_arena::automatic;
+    int max_threads_per_core = tbb::task_arena::automatic;
+};
+
+class binding_handler;
+
+class binding_observer : public tbb::task_scheduler_observer {
+    binding_handler* my_binding_handler;
+
+public:
+    binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
+    ~binding_observer();
+
+    void on_scheduler_entry(bool) override;
+    void on_scheduler_exit(bool) override;
+};
+
+struct binding_observer_deleter {
+    void operator()(binding_observer* observer) const {
+        observer->observe(false);
+        delete observer;
+    }
+};
+
+using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
+
+}  // namespace detail
+
+class task_arena {
+    tbb::task_arena my_task_arena;
+    std::once_flag my_initialization_state;
+    detail::constraints my_constraints;
+    detail::binding_oberver_ptr my_binding_observer;
+
+public:
+    using constraints = detail::constraints;
+    static const int automatic = tbb::task_arena::automatic;
+
+    task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1);
+    task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1);
+    task_arena(const task_arena& s);
+
+    void initialize();
+    void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
+    void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
+
+    explicit operator tbb::task_arena&();
+
+    int max_concurrency();
+
+    template <typename F>
+    void enqueue(F&& f) {
+        initialize();
+        my_task_arena.enqueue(std::forward<F>(f));
+    }
+    template <typename F>
+    auto execute(F&& f) -> decltype(f()) {
+        initialize();
+        return my_task_arena.execute(std::forward<F>(f));
+    }
+};
+
+struct task_scheduler_observer : public tbb::task_scheduler_observer {
+    task_scheduler_observer(custom::task_arena& arena)
+        : tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
+          my_arena(arena) {}
+
+    void observe(bool state = true) {
+        if (state) {
+            my_arena.initialize();
+        }
+        tbb::task_scheduler_observer::observe(state);
+    }
+
+    custom::task_arena& my_arena;
+};
+
+namespace info {
+std::vector<numa_node_id> numa_nodes();
+std::vector<core_type_id> core_types();
+
+int default_concurrency(numa_node_id id = task_arena::automatic);
+int default_concurrency(task_arena::constraints c);
+}  // namespace info
+}  // namespace custom
+#endif /*(OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)*/
+
--- a/src/inference/src/dev/threading/thread_affinity.cpp
+++ b/src/inference/src/dev/threading/thread_affinity.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "dev/threading/thread_affinity.hpp"
+
+#include <cerrno>
+#include <climits>
+#include <tuple>
+#include <utility>
+
+#include "openvino/runtime/system_conf.hpp"
+
+#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+#    include <sched.h>
+#    include <unistd.h>
+#endif
+
+namespace ov {
+namespace threading {
+#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+std::tuple<CpuSet, int> get_process_mask() {
+    for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
+        CpuSet mask{CPU_ALLOC(ncpus)};
+        if (nullptr == mask)
+            break;
+        const size_t size = CPU_ALLOC_SIZE(ncpus);
+        CPU_ZERO_S(size, mask.get());
+        // the result fits the mask
+        if (0 == sched_getaffinity(getpid(), size, mask.get())) {
+            return std::make_tuple(std::move(mask), ncpus);
+        }
+        // other error
+        if (errno != EINVAL)
+            break;
+    }
+    return std::make_tuple(nullptr, 0);
+}
+
+/* Release the cores affinity mask for the current process */
+void release_process_mask(cpu_set_t* mask) {
+    if (nullptr != mask)
+        CPU_FREE(mask);
+}
+
+bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
+    return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
+}
+
+bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
+    if (procMask == nullptr)
+        return false;
+    const size_t size = CPU_ALLOC_SIZE(ncores);
+    const int num_cpus = CPU_COUNT_S(size, procMask.get());
+    thrIdx %= num_cpus;  // To limit unique number in [; num_cpus-1] range
+    // Place threads with specified step
+    int cpu_idx = cpuIdxOffset;
+    for (int i = 0, offset = 0; i < thrIdx; ++i) {
+        cpu_idx += hyperthreads;
+        if (cpu_idx >= num_cpus)
+            cpu_idx = ++offset;
+    }
+
+    // Find index of 'cpu_idx'-th bit that equals to 1
+    int mapped_idx = cpuIdxOffset - 1;
+    while (cpu_idx >= cpuIdxOffset) {
+        mapped_idx++;
+        if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
+            --cpu_idx;
+    }
+
+    CpuSet targetMask{CPU_ALLOC(ncores)};
+    CPU_ZERO_S(size, targetMask.get());
+    CPU_SET_S(mapped_idx, size, targetMask.get());
+    bool res = pin_current_thread_by_mask(ncores, targetMask);
+    return res;
+}
+
+bool pin_current_thread_to_socket(int socket) {
+    const int sockets = ov::get_available_numa_nodes().size();
+    const int cores = ov::get_number_of_cpu_cores();
+    const int cores_per_socket = cores / sockets;
+
+    int ncpus = 0;
+    CpuSet mask;
+    std::tie(mask, ncpus) = get_process_mask();
+    CpuSet targetMask{CPU_ALLOC(ncpus)};
+    const size_t size = CPU_ALLOC_SIZE(ncpus);
+    CPU_ZERO_S(size, targetMask.get());
+
+    for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
+        CPU_SET_S(core, size, targetMask.get());
+    }
+    // respect the user-defined mask for the entire process
+    CPU_AND_S(size, targetMask.get(), targetMask.get(), mask.get());
+    bool res = false;
+    if (CPU_COUNT_S(size, targetMask.get())) {  //  if we have non-zero mask to set
+        res = pin_current_thread_by_mask(ncpus, targetMask);
+    }
+    return res;
+}
+#else   // no threads pinning/binding on Win/MacOS
+std::tuple<CpuSet, int> get_process_mask() {
+    return std::make_tuple(nullptr, 0);
+}
+void release_process_mask(cpu_set_t*) {}
+
+bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
+    return false;
+}
+bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
+    return false;
+}
+bool pin_current_thread_to_socket(int socket) {
+    return false;
+}
+#endif  // !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+}  // namespace threading
+}  // namespace ov
--- a/src/inference/src/dev/threading/thread_affinity.hpp
+++ b/src/inference/src/dev/threading/thread_affinity.hpp
@@ -0,0 +1,93 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+
+#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+#    include <sched.h>
+#endif
+
+namespace ov {
+namespace threading {
+
+#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+using cpu_set_t = void;
+#endif  // (defined(__APPLE__) || defined(_WIN32))
+
+/**
+ * @brief      Release the cores affinity mask for the current process
+ * @ingroup    ov_dev_api_threading
+ *
+ * @param      mask  The mask
+ */
+void release_process_mask(cpu_set_t* mask);
+
+/**
+ * @brief      Deleter for process mask
+ * @ingroup    ov_dev_api_threading
+ */
+struct ReleaseProcessMaskDeleter {
+    /**
+     * @brief      A callable operator to release object
+     *
+     * @param      mask  The mask to release
+     */
+    void operator()(cpu_set_t* mask) const {
+        release_process_mask(mask);
+    }
+};
+
+/**
+ * @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter
+ * @ingroup ov_dev_api_threading
+ */
+using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
+
+/**
+ * @brief Get the cores affinity mask for the current process
+ * @ingroup ov_dev_api_threading
+ * @return A core affinity mask
+ */
+std::tuple<CpuSet, int> get_process_mask();
+
+/**
+ * @brief      Pins current thread to a set of cores determined by the mask
+ * @ingroup    ov_dev_api_threading
+ *
+ * @param[in]  thrIdx        The thr index
+ * @param[in]  hyperThreads  The hyper threads
+ * @param[in]  ncores        The ncores
+ * @param[in]  processMask   The process mask
+ * @return     `True` in case of success, `false` otherwise
+ */
+bool pin_thread_to_vacant_core(int thrIdx,
+                               int hyperThreads,
+                               int ncores,
+                               const CpuSet& processMask,
+                               int cpuIdxOffset = 0);
+
+/**
+ * @brief      Pins thread to a spare core in the round-robin scheme, while respecting the given process mask.
+ *             The function can also handle the hyper-threading (by populating the physical cores first)
+ * @ingroup    ov_dev_api_threading
+ *
+ * @param[in]  ncores       The ncores
+ * @param[in]  processMask  The process mask
+ * @return     `True` in case of success, `false` otherwise
+ */
+bool pin_current_thread_by_mask(int ncores, const CpuSet& processMask);
+
+/**
+ * @brief      Pins a current thread to a socket.
+ * @ingroup    ov_dev_api_threading
+ *
+ * @param[in]  socket  The socket id
+ * @return     `True` in case of success, `false` otherwise
+ */
+bool pin_current_thread_to_socket(int socket);
+}  // namespace threading
+}  // namespace ov
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -13,10 +13,10 @@
 #include <string>
 #include <vector>

-#include "ie_common.h"
-#include "ie_system_conf.h"
+#include "dev/threading/parallel_custom_arena.hpp"
+#include "openvino/core/except.hpp"
+#include "openvino/runtime/system_conf.hpp"
 #include "streams_executor.hpp"
-#include "threading/ie_parallel_custom_arena.hpp"

 namespace ov {

@@ -242,7 +242,7 @@ void parse_processor_info_linux(const int _processors,
    }
 };

-#if !((IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO))
+#if !((OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO))
 std::vector<int> get_available_numa_nodes() {
    std::vector<int> nodes((0 == cpu._sockets) ? 1 : cpu._sockets);
    std::iota(std::begin(nodes), std::end(nodes), 0);
@@ -252,7 +252,7 @@ std::vector<int> get_available_numa_nodes() {
 int get_number_of_cpu_cores(bool bigCoresOnly) {
    unsigned numberOfProcessors = cpu._processors;
    unsigned totalNumberOfCpuCores = cpu._cores;
-    IE_ASSERT(totalNumberOfCpuCores != 0);
+    OPENVINO_ASSERT(totalNumberOfCpuCores != 0);
    cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet;
    CPU_ZERO(&currentCpuSet);
    CPU_ZERO(&usedCoreSet);
@@ -270,7 +270,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
        }
    }
    int phys_cores = CPU_COUNT(&currentCoreSet);
-#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
    auto core_types = custom::info::core_types();
    if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
        phys_cores = custom::info::default_concurrency(
--- a/src/inference/src/os/win/win_system_conf.cpp
+++ b/src/inference/src/os/win/win_system_conf.cpp
@@ -11,9 +11,9 @@
 #include <memory>
 #include <vector>

+#include "dev/threading/parallel_custom_arena.hpp"
 #include "openvino/runtime/system_conf.hpp"
 #include "streams_executor.hpp"
-#include "threading/ie_parallel_custom_arena.hpp"

 namespace ov {

@@ -189,7 +189,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
        phys_cores++;
    } while (offset < sz);

-#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
    auto core_types = custom::info::core_types();
    if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
        phys_cores = custom::info::default_concurrency(
@@ -199,7 +199,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
    return phys_cores;
 }

-#if !(IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+#if !(OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
 // OMP/SEQ threading on the Windows doesn't support NUMA
 std::vector<int> get_available_numa_nodes() {
    return {-1};
--- a/src/inference/src/system_conf.cpp
+++ b/src/inference/src/system_conf.cpp
@@ -8,8 +8,8 @@
 #include <cstring>
 #include <vector>

+#include "dev/threading/parallel_custom_arena.hpp"
 #include "openvino/core/visibility.hpp"
-#include "threading/ie_parallel_custom_arena.hpp"

 #define XBYAK_NO_OP_NAMES
 #define XBYAK_UNDEF_JNL
@@ -147,7 +147,7 @@ bool check_open_mp_env_vars(bool include_omp_num_threads) {
 int get_number_of_cpu_cores(bool) {
    return parallel_get_max_threads();
 }
-#    if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
+#    if !((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
 std::vector<int> get_available_numa_nodes() {
    return {-1};
 }
@@ -158,7 +158,7 @@ int get_number_of_logical_cpu_cores(bool) {
 #else
 int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
    int logical_cores = parallel_get_max_threads();
-#    if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+#    if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
    auto core_types = custom::info::core_types();
    if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
        logical_cores = custom::info::default_concurrency(
@@ -169,7 +169,7 @@ int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
 }
 #endif

-#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
+#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
 std::vector<int> get_available_numa_nodes() {
    return custom::info::numa_nodes();
 }
--- a/src/inference/src/threading/ie_parallel_custom_arena.hpp
+++ b/src/inference/src/threading/ie_parallel_custom_arena.hpp
@@ -10,132 +10,4 @@

 #pragma once

-#include "ie_parallel.hpp"
-
-#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
-
-#include <cstddef>
-#include <type_traits>
-#include <mutex>
-#include <vector>
-#include <memory>
-
-namespace custom {
-
-using numa_node_id = int;
-using core_type_id = int;
-
-namespace detail {
-struct constraints {
-    constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
-        : numa_id{id}
-        , max_concurrency{maximal_concurrency}
-        , core_type{tbb::task_arena::automatic}
-        , max_threads_per_core{tbb::task_arena::automatic}
-    {}
-
-    constraints& set_numa_id(numa_node_id id) {
-        numa_id = id;
-        return *this;
-    }
-    constraints& set_max_concurrency(int maximal_concurrency) {
-        max_concurrency = maximal_concurrency;
-        return *this;
-    }
-    constraints& set_core_type(core_type_id id) {
-        core_type = id;
-        return *this;
-    }
-    constraints& set_max_threads_per_core(int threads_number) {
-        max_threads_per_core = threads_number;
-        return *this;
-    }
-
-    numa_node_id numa_id = tbb::task_arena::automatic;
-    int max_concurrency = tbb::task_arena::automatic;
-    core_type_id core_type = tbb::task_arena::automatic;
-    int max_threads_per_core = tbb::task_arena::automatic;
-};
-
-class binding_handler;
-
-class binding_observer : public tbb::task_scheduler_observer {
-    binding_handler* my_binding_handler;
-public:
-    binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
-    ~binding_observer();
-
-    void on_scheduler_entry(bool) override;
-    void on_scheduler_exit(bool) override;
-};
-
-struct binding_observer_deleter {
-    void operator()(binding_observer* observer) const {
-        observer->observe(false);
-        delete observer;
-    }
-};
-
-using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
-
-} // namespace detail
-
-class task_arena {
-    tbb::task_arena my_task_arena;
-    std::once_flag my_initialization_state;
-    detail::constraints my_constraints;
-    detail::binding_oberver_ptr my_binding_observer;
-
-public:
-    using constraints = detail::constraints;
-    static const int automatic = tbb::task_arena::automatic;
-
-    task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1);
-    task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1);
-    task_arena(const task_arena &s);
-
-    void initialize();
-    void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
-    void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
-
-    explicit operator tbb::task_arena&();
-
-    int max_concurrency();
-
-    template<typename F>
-    void enqueue(F&& f) {
-        initialize();
-        my_task_arena.enqueue(std::forward<F>(f));
-    }
-    template<typename F>
-    auto execute(F&& f) -> decltype(f()) {
-        initialize();
-        return my_task_arena.execute(std::forward<F>(f));
-    }
-};
-
-struct task_scheduler_observer: public tbb::task_scheduler_observer {
-    task_scheduler_observer(custom::task_arena& arena) :
-        tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
-        my_arena(arena)
-    {}
-
-    void observe(bool state = true) {
-        if (state) {
-            my_arena.initialize();
-        }
-        tbb::task_scheduler_observer::observe(state);
-    }
-
-    custom::task_arena& my_arena;
-};
-
-namespace info {
-    std::vector<numa_node_id> numa_nodes();
-    std::vector<core_type_id> core_types();
-
-    int default_concurrency(numa_node_id id = task_arena::automatic);
-    int default_concurrency(task_arena::constraints c);
-} // namespace info
-} // namespace custom
-#endif /*(IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)*/
+#include "dev/threading/parallel_custom_arena.hpp"
--- a/src/inference/src/threading/ie_thread_affinity.cpp
+++ b/src/inference/src/threading/ie_thread_affinity.cpp
@@ -4,114 +4,26 @@

 #include "threading/ie_thread_affinity.hpp"

-#include <cerrno>
-#include <climits>
-#include <tuple>
-#include <utility>
-
-#include "ie_system_conf.h"
-
-#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
-#    include <sched.h>
-#    include <unistd.h>
-#endif
+#include "dev/threading/thread_affinity.hpp"

 namespace InferenceEngine {
-#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+
 std::tuple<CpuSet, int> GetProcessMask() {
-    for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
-        CpuSet mask{CPU_ALLOC(ncpus)};
-        if (nullptr == mask)
-            break;
-        const size_t size = CPU_ALLOC_SIZE(ncpus);
-        CPU_ZERO_S(size, mask.get());
-        // the result fits the mask
-        if (0 == sched_getaffinity(getpid(), size, mask.get())) {
-            return std::make_tuple(std::move(mask), ncpus);
-        }
-        // other error
-        if (errno != EINVAL)
-            break;
-    }
-    return std::make_tuple(nullptr, 0);
+    return ov::threading::get_process_mask();
 }

-/* Release the cores affinity mask for the current process */
 void ReleaseProcessMask(cpu_set_t* mask) {
-    if (nullptr != mask)
-        CPU_FREE(mask);
-}
-
-bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
-    return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
+    ov::threading::release_process_mask(mask);
 }

 bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
-    if (procMask == nullptr)
-        return false;
-    const size_t size = CPU_ALLOC_SIZE(ncores);
-    const int num_cpus = CPU_COUNT_S(size, procMask.get());
-    thrIdx %= num_cpus;  // To limit unique number in [; num_cpus-1] range
-    // Place threads with specified step
-    int cpu_idx = cpuIdxOffset;
-    for (int i = 0, offset = 0; i < thrIdx; ++i) {
-        cpu_idx += hyperthreads;
-        if (cpu_idx >= num_cpus)
-            cpu_idx = ++offset;
-    }
-
-    // Find index of 'cpu_idx'-th bit that equals to 1
-    int mapped_idx = cpuIdxOffset - 1;
-    while (cpu_idx >= cpuIdxOffset) {
-        mapped_idx++;
-        if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
-            --cpu_idx;
-    }
-
-    CpuSet targetMask{CPU_ALLOC(ncores)};
-    CPU_ZERO_S(size, targetMask.get());
-    CPU_SET_S(mapped_idx, size, targetMask.get());
-    bool res = PinCurrentThreadByMask(ncores, targetMask);
-    return res;
-}
-
-bool PinCurrentThreadToSocket(int socket) {
-    const int sockets = InferenceEngine::getAvailableNUMANodes().size();
-    const int cores = InferenceEngine::getNumberOfCPUCores();
-    const int cores_per_socket = cores / sockets;
-
-    int ncpus = 0;
-    CpuSet mask;
-    std::tie(mask, ncpus) = GetProcessMask();
-    CpuSet targetMask{CPU_ALLOC(ncpus)};
-    const size_t size = CPU_ALLOC_SIZE(ncpus);
-    CPU_ZERO_S(size, targetMask.get());
-
-    for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
-        CPU_SET_S(core, size, targetMask.get());
-    }
-    // respect the user-defined mask for the entire process
-    CPU_AND_S(size, targetMask.get(), targetMask.get(), mask.get());
-    bool res = false;
-    if (CPU_COUNT_S(size, targetMask.get())) {  //  if we have non-zero mask to set
-        res = PinCurrentThreadByMask(ncpus, targetMask);
-    }
-    return res;
-}
-#else   // no threads pinning/binding on Win/MacOS
-std::tuple<CpuSet, int> GetProcessMask() {
-    return std::make_tuple(nullptr, 0);
-}
-void ReleaseProcessMask(cpu_set_t*) {}
-
-bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
-    return false;
+    return ov::threading::pin_thread_to_vacant_core(thrIdx, hyperthreads, ncores, procMask, cpuIdxOffset);
 }
 bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
-    return false;
+    return ov::threading::pin_current_thread_by_mask(ncores, procMask);
 }
 bool PinCurrentThreadToSocket(int socket) {
-    return false;
+    return ov::threading::pin_current_thread_to_socket(socket);
 }
-#endif  // !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
+
 }  //  namespace InferenceEngine
--- a/src/inference/src/threading/ie_thread_affinity.hpp
+++ b/src/inference/src/threading/ie_thread_affinity.hpp
@@ -7,7 +7,7 @@
 #include <memory>
 #include <tuple>

-#include "ie_api.h"
+#include "dev/threading/thread_affinity.hpp"

 #if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
 #    include <sched.h>
@@ -15,7 +15,7 @@

 namespace InferenceEngine {
 #if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
-using cpu_set_t = void;
+using cpu_set_t = ov::threading::cpu_set_t;
 #endif  // (defined(__APPLE__) || defined(_WIN32))

 /**
@@ -26,26 +26,9 @@ using cpu_set_t = void;
 */
 void ReleaseProcessMask(cpu_set_t* mask);

-/**
- * @brief      Deleter for process mask
- * @ingroup    ie_dev_api_threading
- */
-struct ReleaseProcessMaskDeleter {
-    /**
-     * @brief      A callable operator to release object
-     *
-     * @param      mask  The mask to release
-     */
-    void operator()(cpu_set_t* mask) const {
-        ReleaseProcessMask(mask);
-    }
-};
+using ReleaseProcessMaskDeleter = ov::threading::ReleaseProcessMaskDeleter;

-/**
- * @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter
- * @ingroup ie_dev_api_threading
- */
-using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
+using CpuSet = ov::threading::CpuSet;

 /**
 * @brief Get the cores affinity mask for the current process
--- a/src/inference/src/threading/itt.hpp
+++ b/src/inference/src/threading/itt.hpp
@@ -9,41 +9,4 @@

 #pragma once

-#include <openvino/cc/selective_build.h>
-
-#include <openvino/itt.hpp>
-
-#include "openvino/core/except.hpp"
-
-namespace ov {
-namespace tbbbind {
-namespace itt {
-namespace domains {
-OV_ITT_DOMAIN(tbb_bind);
-}  // namespace domains
-}  // namespace itt
-}  // namespace tbbbind
-}  // namespace ov
-
-OV_CC_DOMAINS(tbb_bind);
-
-/*
- * TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used.
- */
-#if defined(SELECTIVE_BUILD_ANALYZER)
-
-#    define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region)
-#    define TBB_BIND_NUMA_ENABLED  OV_SCOPE(tbb_bind, NUMA)
-
-#elif defined(SELECTIVE_BUILD)
-
-#    define TBB_BIND_SCOPE(region)                                        \
-        if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \
-            OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1)
-#    define TBB_BIND_NUMA_ENABLED
-
-#else
-
-#    define TBB_BIND_SCOPE(region)
-#    define TBB_BIND_NUMA_ENABLED
-#endif
+#include "dev/threading/itt.hpp"