CPU pinning on Windows (#19405)
* add cpu pinning on windows * remove pinning limitation on windows * only support the machine with one numa node * fix code style * fix build error on macos * set mask initial value * fix test failure on window * fix build failure on macos, add limitation on windows machine with two sockets * fix test failure on windows * fix test failure * fix comments
This commit is contained in:
parent
f735999e7b
commit
678e919b13
@ -51,8 +51,12 @@ void get_cur_stream_info(const int stream_id,
|
|||||||
max_threads_per_core = 2;
|
max_threads_per_core = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
cpu_reserve = false;
|
cpu_reserve = false;
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
if (proc_type_table.size() > 1) {
|
||||||
|
cpu_reserve = false;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
if (cpu_reserve) {
|
if (cpu_reserve) {
|
||||||
stream_type = STREAM_WITH_OBSERVE;
|
stream_type = STREAM_WITH_OBSERVE;
|
||||||
|
@ -110,7 +110,33 @@ bool pin_current_thread_to_socket(int socket) {
|
|||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
#else // no threads pinning/binding on Win/MacOS
|
#elif defined(_WIN32)
|
||||||
|
std::tuple<CpuSet, int> get_process_mask() {
|
||||||
|
DWORD_PTR pro_mask, sys_mask;
|
||||||
|
if (0 != GetProcessAffinityMask(GetCurrentProcess(), &pro_mask, &sys_mask)) {
|
||||||
|
CpuSet mask(new DWORD_PTR(pro_mask));
|
||||||
|
return std::make_tuple(std::move(mask), 0);
|
||||||
|
}
|
||||||
|
return std::make_tuple(nullptr, 0);
|
||||||
|
}
|
||||||
|
void release_process_mask(cpu_set_t*) {}
|
||||||
|
|
||||||
|
bool pin_thread_to_vacant_core(int thrIdx,
|
||||||
|
int hyperthreads,
|
||||||
|
int ncores,
|
||||||
|
const CpuSet& procMask,
|
||||||
|
const std::vector<int>& cpu_ids,
|
||||||
|
int cpuIdxOffset) {
|
||||||
|
return 0 != SetThreadAffinityMask(GetCurrentThread(), DWORD_PTR(1) << cpu_ids[thrIdx]);
|
||||||
|
}
|
||||||
|
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
|
||||||
|
DWORD_PTR mask = static_cast<DWORD_PTR>(*procMask.get());
|
||||||
|
return 0 != SetThreadAffinityMask(GetCurrentThread(), mask);
|
||||||
|
}
|
||||||
|
bool pin_current_thread_to_socket(int socket) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#else // no threads pinning/binding on MacOS
|
||||||
std::tuple<CpuSet, int> get_process_mask() {
|
std::tuple<CpuSet, int> get_process_mask() {
|
||||||
return std::make_tuple(nullptr, 0);
|
return std::make_tuple(nullptr, 0);
|
||||||
}
|
}
|
||||||
|
@ -11,13 +11,20 @@
|
|||||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||||
# include <sched.h>
|
# include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(_WIN32)
|
||||||
|
# include <windows.h>
|
||||||
|
|
||||||
|
# include <thread>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace threading {
|
namespace threading {
|
||||||
|
|
||||||
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__))
|
||||||
using cpu_set_t = void;
|
using cpu_set_t = void;
|
||||||
#endif // (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
#elif defined(_WIN32)
|
||||||
|
using cpu_set_t = DWORD_PTR;
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Release the cores affinity mask for the current process
|
* @brief Release the cores affinity mask for the current process
|
||||||
|
@ -357,21 +357,12 @@ LinuxCpuStreamTypeCase _1sockets_4cores_binding = {
|
|||||||
},
|
},
|
||||||
{{8, 4, 0, 4, 0, 0}},
|
{{8, 4, 0, 4, 0, 0}},
|
||||||
{{4, MAIN_CORE_PROC, 1, 0, 0}},
|
{{4, MAIN_CORE_PROC, 1, 0, 0}},
|
||||||
# if defined(__linux__)
|
|
||||||
{
|
{
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
},
|
},
|
||||||
# else
|
|
||||||
{
|
|
||||||
STREAM_WITHOUT_PARAM,
|
|
||||||
STREAM_WITHOUT_PARAM,
|
|
||||||
STREAM_WITHOUT_PARAM,
|
|
||||||
STREAM_WITHOUT_PARAM,
|
|
||||||
},
|
|
||||||
# endif
|
|
||||||
{1, 1, 1, 1},
|
{1, 1, 1, 1},
|
||||||
{
|
{
|
||||||
MAIN_CORE_PROC,
|
MAIN_CORE_PROC,
|
||||||
@ -423,17 +414,10 @@ LinuxCpuStreamTypeCase _1sockets_12cores_pcore_binding = {
|
|||||||
},
|
},
|
||||||
{{20, 8, 4, 8, 0, 0}},
|
{{20, 8, 4, 8, 0, 0}},
|
||||||
{{2, MAIN_CORE_PROC, 4, 0, 0}},
|
{{2, MAIN_CORE_PROC, 4, 0, 0}},
|
||||||
# if defined(__linux__)
|
|
||||||
{
|
{
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
},
|
},
|
||||||
# else
|
|
||||||
{
|
|
||||||
STREAM_WITH_CORE_TYPE,
|
|
||||||
STREAM_WITH_CORE_TYPE,
|
|
||||||
},
|
|
||||||
# endif
|
|
||||||
{4, 4},
|
{4, 4},
|
||||||
{
|
{
|
||||||
MAIN_CORE_PROC,
|
MAIN_CORE_PROC,
|
||||||
@ -488,21 +472,12 @@ LinuxCpuStreamTypeCase _1sockets_12cores_ecore_binding = {
|
|||||||
},
|
},
|
||||||
{{20, 8, 4, 8, 0, 0}},
|
{{20, 8, 4, 8, 0, 0}},
|
||||||
{{4, EFFICIENT_CORE_PROC, 1, 0, 0}},
|
{{4, EFFICIENT_CORE_PROC, 1, 0, 0}},
|
||||||
# if defined(__linux__)
|
|
||||||
{
|
{
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
STREAM_WITH_OBSERVE,
|
STREAM_WITH_OBSERVE,
|
||||||
},
|
},
|
||||||
# else
|
|
||||||
{
|
|
||||||
STREAM_WITH_CORE_TYPE,
|
|
||||||
STREAM_WITH_CORE_TYPE,
|
|
||||||
STREAM_WITH_CORE_TYPE,
|
|
||||||
STREAM_WITH_CORE_TYPE,
|
|
||||||
},
|
|
||||||
# endif
|
|
||||||
{1, 1, 1, 1},
|
{1, 1, 1, 1},
|
||||||
{
|
{
|
||||||
EFFICIENT_CORE_PROC,
|
EFFICIENT_CORE_PROC,
|
||||||
|
@ -79,19 +79,29 @@ bool get_cpu_pinning(bool& input_value,
|
|||||||
int num_sockets = get_default_latency_streams(latency_threading_mode);
|
int num_sockets = get_default_latency_streams(latency_threading_mode);
|
||||||
bool latency = num_streams <= num_sockets && num_streams > 0;
|
bool latency = num_streams <= num_sockets && num_streams > 0;
|
||||||
|
|
||||||
|
if (input_changed) {
|
||||||
|
result_value = input_value;
|
||||||
|
} else {
|
||||||
if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 &&
|
if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 &&
|
||||||
proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) {
|
proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) {
|
||||||
result_value =
|
result_value =
|
||||||
input_changed
|
((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true);
|
||||||
? input_value
|
|
||||||
: ((latency || bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA) ? false : true);
|
|
||||||
} else {
|
} else {
|
||||||
result_value = input_changed
|
result_value = (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true);
|
||||||
? input_value
|
|
||||||
: (bind_type == threading::IStreamsExecutor::ThreadBindingType::NUMA ? false : true);
|
|
||||||
}
|
}
|
||||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||||
# if defined(__APPLE__) || defined(_WIN32)
|
# if defined(_WIN32)
|
||||||
|
result_value = false;
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||||
|
# if defined(_WIN32)
|
||||||
|
if (proc_type_table.size() > 1) {
|
||||||
|
result_value = false;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
# if defined(__APPLE__)
|
||||||
result_value = false;
|
result_value = false;
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -578,7 +578,7 @@ StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = {
|
|||||||
{{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}},
|
{{24, MAIN_CORE_PROC, 1, 0, 0}, {24, MAIN_CORE_PROC, 1, 1, 1}},
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __linux__
|
#if defined (__linux__) || defined(_WIN32)
|
||||||
INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration,
|
INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration,
|
||||||
StreamGenerationTests,
|
StreamGenerationTests,
|
||||||
::testing::Values(generation_latency_1sockets_14cores_3,
|
::testing::Values(generation_latency_1sockets_14cores_3,
|
||||||
|
Loading…
Reference in New Issue
Block a user