diff --git a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp index c05509e225c..7936130997b 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp @@ -51,8 +51,12 @@ void get_cur_stream_info(const int stream_id, max_threads_per_core = 2; } -#if defined(_WIN32) || defined(__APPLE__) +#if defined(__APPLE__) cpu_reserve = false; +#elif defined(_WIN32) + if (proc_type_table.size() > 1) { + cpu_reserve = false; + } #endif if (cpu_reserve) { stream_type = STREAM_WITH_OBSERVE; diff --git a/src/inference/src/dev/threading/thread_affinity.cpp b/src/inference/src/dev/threading/thread_affinity.cpp index 1e74c5cd055..887e89237b5 100644 --- a/src/inference/src/dev/threading/thread_affinity.cpp +++ b/src/inference/src/dev/threading/thread_affinity.cpp @@ -110,7 +110,33 @@ bool pin_current_thread_to_socket(int socket) { } return res; } -#else // no threads pinning/binding on Win/MacOS +#elif defined(_WIN32) +std::tuple get_process_mask() { + DWORD_PTR pro_mask, sys_mask; + if (0 != GetProcessAffinityMask(GetCurrentProcess(), &pro_mask, &sys_mask)) { + CpuSet mask(new DWORD_PTR(pro_mask)); + return std::make_tuple(std::move(mask), 0); + } + return std::make_tuple(nullptr, 0); +} +void release_process_mask(cpu_set_t*) {} + +bool pin_thread_to_vacant_core(int thrIdx, + int hyperthreads, + int ncores, + const CpuSet& procMask, + const std::vector& cpu_ids, + int cpuIdxOffset) { + return 0 != SetThreadAffinityMask(GetCurrentThread(), DWORD_PTR(1) << cpu_ids[thrIdx]); +} +bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) { + DWORD_PTR mask = static_cast(*procMask.get()); + return 0 != SetThreadAffinityMask(GetCurrentThread(), mask); +} +bool pin_current_thread_to_socket(int socket) { + return false; +} +#else // no threads pinning/binding on MacOS std::tuple get_process_mask() { return std::make_tuple(nullptr, 0); } diff --git a/src/inference/src/dev/threading/thread_affinity.hpp b/src/inference/src/dev/threading/thread_affinity.hpp index 1f971d64275..20c0f7d513a 100644 --- a/src/inference/src/dev/threading/thread_affinity.hpp +++ b/src/inference/src/dev/threading/thread_affinity.hpp @@ -11,13 +11,20 @@ #if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32)) # include #endif +#if defined(_WIN32) +# include + +# include +#endif namespace ov { namespace threading { -#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32)) +#if (defined(__APPLE__) || defined(__EMSCRIPTEN__)) using cpu_set_t = void; -#endif // (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32)) +#elif defined(_WIN32) +using cpu_set_t = DWORD_PTR; +#endif /** * @brief Release the cores affinity mask for the current process diff --git a/src/inference/tests/unit/cpu_stream_info_test.cpp b/src/inference/tests/unit/cpu_stream_info_test.cpp index 51d07015077..56d651a031b 100644 --- a/src/inference/tests/unit/cpu_stream_info_test.cpp +++ b/src/inference/tests/unit/cpu_stream_info_test.cpp @@ -357,21 +357,12 @@ LinuxCpuStreamTypeCase _1sockets_4cores_binding = { }, {{8, 4, 0, 4, 0, 0}}, {{4, MAIN_CORE_PROC, 1, 0, 0}}, -# if defined(__linux__) { STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, }, -# else - { - STREAM_WITHOUT_PARAM, - STREAM_WITHOUT_PARAM, - STREAM_WITHOUT_PARAM, - STREAM_WITHOUT_PARAM, - }, -# endif {1, 1, 1, 1}, { MAIN_CORE_PROC, @@ -423,17 +414,10 @@ LinuxCpuStreamTypeCase _1sockets_12cores_pcore_binding = { }, {{20, 8, 4, 8, 0, 0}}, {{2, MAIN_CORE_PROC, 4, 0, 0}}, -# if defined(__linux__) { STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, }, -# else - { - STREAM_WITH_CORE_TYPE, - STREAM_WITH_CORE_TYPE, - }, -# endif {4, 4}, { MAIN_CORE_PROC, @@ -488,21 +472,12 @@ LinuxCpuStreamTypeCase _1sockets_12cores_ecore_binding = { }, {{20, 8, 4, 8, 0, 0}}, {{4, EFFICIENT_CORE_PROC, 1, 0, 0}}, -# if defined(__linux__) { STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE, }, -# else - { - STREAM_WITH_CORE_TYPE, - STREAM_WITH_CORE_TYPE, - STREAM_WITH_CORE_TYPE, - STREAM_WITH_CORE_TYPE, - }, -# endif {1, 1, 1, 1}, { EFFICIENT_CORE_PROC, diff --git a/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp b/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp index 3554de0636d..9efcc23ddf2 100644 --- a/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp +++ b/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp @@ -79,19 +79,29 @@ bool get_cpu_pinning(bool& input_value, int num_sockets = get_default_latency_streams(latency_threading_mode); bool latency = num_streams <= num_sockets && num_streams > 0; - if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && - proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) { - result_value = - input_changed - ? input_value - : ((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true); + if (input_changed) { + result_value = input_value; } else { - result_value = input_changed - ? input_value - : (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true); + if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && + proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) { + result_value = + ((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true); + } else { + result_value = (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true); + } +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) +# if defined(_WIN32) + result_value = false; +# endif +#endif } #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) -# if defined(__APPLE__) || defined(_WIN32) +# if defined(_WIN32) + if (proc_type_table.size() > 1) { + result_value = false; + } +# endif +# if defined(__APPLE__) result_value = false; # endif #endif diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp index 05ad350a5f6..1bcdb6f84ef 100644 --- a/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_e2e_test.cpp @@ -578,7 +578,7 @@ StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = { {{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}}, }; -#ifdef __linux__ +#if defined (__linux__) || defined(_WIN32) INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration, StreamGenerationTests, ::testing::Values(generation_latency_1sockets_14cores_3,