CPU pinning on Windows (#19405)

* add cpu pinning on windows

* remove pinning limitation on windows

* only support the machine with one numa node

* fix code style

* fix build error on macos

* set mask initial value

* fix test failure on window

* fix build failure on macos, add limitation on windows machine with two sockets

* fix test failure on windows

* fix test failure

* fix comments
This commit is contained in:
Sun Xiaoxia 2023-09-23 11:28:15 +08:00 committed by GitHub
parent f735999e7b
commit 678e919b13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 62 additions and 40 deletions

View File

@ -51,8 +51,12 @@ void get_cur_stream_info(const int stream_id,
max_threads_per_core = 2; max_threads_per_core = 2;
} }
#if defined(_WIN32) || defined(__APPLE__) #if defined(__APPLE__)
cpu_reserve = false; cpu_reserve = false;
#elif defined(_WIN32)
if (proc_type_table.size() > 1) {
cpu_reserve = false;
}
#endif #endif
if (cpu_reserve) { if (cpu_reserve) {
stream_type = STREAM_WITH_OBSERVE; stream_type = STREAM_WITH_OBSERVE;

View File

@ -110,7 +110,33 @@ bool pin_current_thread_to_socket(int socket) {
} }
return res; return res;
} }
#else // no threads pinning/binding on Win/MacOS #elif defined(_WIN32)
std::tuple<CpuSet, int> get_process_mask() {
DWORD_PTR pro_mask, sys_mask;
if (0 != GetProcessAffinityMask(GetCurrentProcess(), &pro_mask, &sys_mask)) {
CpuSet mask(new DWORD_PTR(pro_mask));
return std::make_tuple(std::move(mask), 0);
}
return std::make_tuple(nullptr, 0);
}
void release_process_mask(cpu_set_t*) {}
bool pin_thread_to_vacant_core(int thrIdx,
int hyperthreads,
int ncores,
const CpuSet& procMask,
const std::vector<int>& cpu_ids,
int cpuIdxOffset) {
return 0 != SetThreadAffinityMask(GetCurrentThread(), DWORD_PTR(1) << cpu_ids[thrIdx]);
}
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
DWORD_PTR mask = static_cast<DWORD_PTR>(*procMask.get());
return 0 != SetThreadAffinityMask(GetCurrentThread(), mask);
}
bool pin_current_thread_to_socket(int socket) {
return false;
}
#else // no threads pinning/binding on MacOS
std::tuple<CpuSet, int> get_process_mask() { std::tuple<CpuSet, int> get_process_mask() {
return std::make_tuple(nullptr, 0); return std::make_tuple(nullptr, 0);
} }

View File

@ -11,13 +11,20 @@
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32)) #if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
# include <sched.h> # include <sched.h>
#endif #endif
#if defined(_WIN32)
# include <windows.h>
# include <thread>
#endif
namespace ov { namespace ov {
namespace threading { namespace threading {
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32)) #if (defined(__APPLE__) || defined(__EMSCRIPTEN__))
using cpu_set_t = void; using cpu_set_t = void;
#endif // (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32)) #elif defined(_WIN32)
using cpu_set_t = DWORD_PTR;
#endif
/** /**
* @brief Release the cores affinity mask for the current process * @brief Release the cores affinity mask for the current process

View File

@ -357,21 +357,12 @@ LinuxCpuStreamTypeCase _1sockets_4cores_binding = {
}, },
{{8, 4, 0, 4, 0, 0}}, {{8, 4, 0, 4, 0, 0}},
{{4, MAIN_CORE_PROC, 1, 0, 0}}, {{4, MAIN_CORE_PROC, 1, 0, 0}},
# if defined(__linux__)
{ {
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
}, },
# else
{
STREAM_WITHOUT_PARAM,
STREAM_WITHOUT_PARAM,
STREAM_WITHOUT_PARAM,
STREAM_WITHOUT_PARAM,
},
# endif
{1, 1, 1, 1}, {1, 1, 1, 1},
{ {
MAIN_CORE_PROC, MAIN_CORE_PROC,
@ -423,17 +414,10 @@ LinuxCpuStreamTypeCase _1sockets_12cores_pcore_binding = {
}, },
{{20, 8, 4, 8, 0, 0}}, {{20, 8, 4, 8, 0, 0}},
{{2, MAIN_CORE_PROC, 4, 0, 0}}, {{2, MAIN_CORE_PROC, 4, 0, 0}},
# if defined(__linux__)
{ {
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
}, },
# else
{
STREAM_WITH_CORE_TYPE,
STREAM_WITH_CORE_TYPE,
},
# endif
{4, 4}, {4, 4},
{ {
MAIN_CORE_PROC, MAIN_CORE_PROC,
@ -488,21 +472,12 @@ LinuxCpuStreamTypeCase _1sockets_12cores_ecore_binding = {
}, },
{{20, 8, 4, 8, 0, 0}}, {{20, 8, 4, 8, 0, 0}},
{{4, EFFICIENT_CORE_PROC, 1, 0, 0}}, {{4, EFFICIENT_CORE_PROC, 1, 0, 0}},
# if defined(__linux__)
{ {
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
STREAM_WITH_OBSERVE, STREAM_WITH_OBSERVE,
}, },
# else
{
STREAM_WITH_CORE_TYPE,
STREAM_WITH_CORE_TYPE,
STREAM_WITH_CORE_TYPE,
STREAM_WITH_CORE_TYPE,
},
# endif
{1, 1, 1, 1}, {1, 1, 1, 1},
{ {
EFFICIENT_CORE_PROC, EFFICIENT_CORE_PROC,

View File

@ -79,19 +79,29 @@ bool get_cpu_pinning(bool& input_value,
int num_sockets = get_default_latency_streams(latency_threading_mode); int num_sockets = get_default_latency_streams(latency_threading_mode);
bool latency = num_streams <= num_sockets && num_streams > 0; bool latency = num_streams <= num_sockets && num_streams > 0;
if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && if (input_changed) {
proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) { result_value = input_value;
result_value =
input_changed
? input_value
: ((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true);
} else { } else {
result_value = input_changed if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 &&
? input_value proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) {
: (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true); result_value =
((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true);
} else {
result_value = (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true);
}
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
# if defined(_WIN32)
result_value = false;
# endif
#endif
} }
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
# if defined(__APPLE__) || defined(_WIN32) # if defined(_WIN32)
if (proc_type_table.size() > 1) {
result_value = false;
}
# endif
# if defined(__APPLE__)
result_value = false; result_value = false;
# endif # endif
#endif #endif

View File

@ -578,7 +578,7 @@ StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = {
{{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}}, {{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}},
}; };
#ifdef __linux__ #if defined (__linux__) || defined(_WIN32)
INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration, INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration,
StreamGenerationTests, StreamGenerationTests,
::testing::Values(generation_latency_1sockets_14cores_3, ::testing::Values(generation_latency_1sockets_14cores_3,