CPU pinning on Windows (#19405)
* add cpu pinning on windows * remove pinning limitation on windows * only support the machine with one numa node * fix code style * fix build error on macos * set mask initial value * fix test failure on window * fix build failure on macos, add limitation on windows machine with two sockets * fix test failure on windows * fix test failure * fix comments
This commit is contained in:
parent
f735999e7b
commit
678e919b13
@ -51,8 +51,12 @@ void get_cur_stream_info(const int stream_id,
|
||||
max_threads_per_core = 2;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(__APPLE__)
|
||||
#if defined(__APPLE__)
|
||||
cpu_reserve = false;
|
||||
#elif defined(_WIN32)
|
||||
if (proc_type_table.size() > 1) {
|
||||
cpu_reserve = false;
|
||||
}
|
||||
#endif
|
||||
if (cpu_reserve) {
|
||||
stream_type = STREAM_WITH_OBSERVE;
|
||||
|
@ -110,7 +110,33 @@ bool pin_current_thread_to_socket(int socket) {
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#else // no threads pinning/binding on Win/MacOS
|
||||
#elif defined(_WIN32)
|
||||
std::tuple<CpuSet, int> get_process_mask() {
|
||||
DWORD_PTR pro_mask, sys_mask;
|
||||
if (0 != GetProcessAffinityMask(GetCurrentProcess(), &pro_mask, &sys_mask)) {
|
||||
CpuSet mask(new DWORD_PTR(pro_mask));
|
||||
return std::make_tuple(std::move(mask), 0);
|
||||
}
|
||||
return std::make_tuple(nullptr, 0);
|
||||
}
|
||||
void release_process_mask(cpu_set_t*) {}
|
||||
|
||||
bool pin_thread_to_vacant_core(int thrIdx,
|
||||
int hyperthreads,
|
||||
int ncores,
|
||||
const CpuSet& procMask,
|
||||
const std::vector<int>& cpu_ids,
|
||||
int cpuIdxOffset) {
|
||||
return 0 != SetThreadAffinityMask(GetCurrentThread(), DWORD_PTR(1) << cpu_ids[thrIdx]);
|
||||
}
|
||||
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
|
||||
DWORD_PTR mask = static_cast<DWORD_PTR>(*procMask.get());
|
||||
return 0 != SetThreadAffinityMask(GetCurrentThread(), mask);
|
||||
}
|
||||
bool pin_current_thread_to_socket(int socket) {
|
||||
return false;
|
||||
}
|
||||
#else // no threads pinning/binding on MacOS
|
||||
std::tuple<CpuSet, int> get_process_mask() {
|
||||
return std::make_tuple(nullptr, 0);
|
||||
}
|
||||
|
@ -11,13 +11,20 @@
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
# include <sched.h>
|
||||
#endif
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
|
||||
# include <thread>
|
||||
#endif
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
|
||||
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__))
|
||||
using cpu_set_t = void;
|
||||
#endif // (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
#elif defined(_WIN32)
|
||||
using cpu_set_t = DWORD_PTR;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Release the cores affinity mask for the current process
|
||||
|
@ -357,21 +357,12 @@ LinuxCpuStreamTypeCase _1sockets_4cores_binding = {
|
||||
},
|
||||
{{8, 4, 0, 4, 0, 0}},
|
||||
{{4, MAIN_CORE_PROC, 1, 0, 0}},
|
||||
# if defined(__linux__)
|
||||
{
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
},
|
||||
# else
|
||||
{
|
||||
STREAM_WITHOUT_PARAM,
|
||||
STREAM_WITHOUT_PARAM,
|
||||
STREAM_WITHOUT_PARAM,
|
||||
STREAM_WITHOUT_PARAM,
|
||||
},
|
||||
# endif
|
||||
{1, 1, 1, 1},
|
||||
{
|
||||
MAIN_CORE_PROC,
|
||||
@ -423,17 +414,10 @@ LinuxCpuStreamTypeCase _1sockets_12cores_pcore_binding = {
|
||||
},
|
||||
{{20, 8, 4, 8, 0, 0}},
|
||||
{{2, MAIN_CORE_PROC, 4, 0, 0}},
|
||||
# if defined(__linux__)
|
||||
{
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
},
|
||||
# else
|
||||
{
|
||||
STREAM_WITH_CORE_TYPE,
|
||||
STREAM_WITH_CORE_TYPE,
|
||||
},
|
||||
# endif
|
||||
{4, 4},
|
||||
{
|
||||
MAIN_CORE_PROC,
|
||||
@ -488,21 +472,12 @@ LinuxCpuStreamTypeCase _1sockets_12cores_ecore_binding = {
|
||||
},
|
||||
{{20, 8, 4, 8, 0, 0}},
|
||||
{{4, EFFICIENT_CORE_PROC, 1, 0, 0}},
|
||||
# if defined(__linux__)
|
||||
{
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
STREAM_WITH_OBSERVE,
|
||||
},
|
||||
# else
|
||||
{
|
||||
STREAM_WITH_CORE_TYPE,
|
||||
STREAM_WITH_CORE_TYPE,
|
||||
STREAM_WITH_CORE_TYPE,
|
||||
STREAM_WITH_CORE_TYPE,
|
||||
},
|
||||
# endif
|
||||
{1, 1, 1, 1},
|
||||
{
|
||||
EFFICIENT_CORE_PROC,
|
||||
|
@ -79,19 +79,29 @@ bool get_cpu_pinning(bool& input_value,
|
||||
int num_sockets = get_default_latency_streams(latency_threading_mode);
|
||||
bool latency = num_streams <= num_sockets && num_streams > 0;
|
||||
|
||||
if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 &&
|
||||
proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) {
|
||||
result_value =
|
||||
input_changed
|
||||
? input_value
|
||||
: ((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true);
|
||||
if (input_changed) {
|
||||
result_value = input_value;
|
||||
} else {
|
||||
result_value = input_changed
|
||||
? input_value
|
||||
: (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true);
|
||||
if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 &&
|
||||
proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) {
|
||||
result_value =
|
||||
((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true);
|
||||
} else {
|
||||
result_value = (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true);
|
||||
}
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
# if defined(_WIN32)
|
||||
result_value = false;
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
# if defined(_WIN32)
|
||||
if (proc_type_table.size() > 1) {
|
||||
result_value = false;
|
||||
}
|
||||
# endif
|
||||
# if defined(__APPLE__)
|
||||
result_value = false;
|
||||
# endif
|
||||
#endif
|
||||
|
@ -578,7 +578,7 @@ StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = {
|
||||
{{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}},
|
||||
};
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined (__linux__) || defined(_WIN32)
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration,
|
||||
StreamGenerationTests,
|
||||
::testing::Values(generation_latency_1sockets_14cores_3,
|
||||
|
Loading…
Reference in New Issue
Block a user