[CPU] HOT FIX: allow latency mode (streams = 1) to use hyper threading processors (#17592)

This commit is contained in:
Wanglei Shen 2023-05-25 15:12:56 +08:00 committed by GitHub
parent b93b863bac
commit 29f1ba9f42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 546 additions and 122 deletions

View File

@ -42,18 +42,25 @@ std::vector<std::vector<int>> apply_scheduling_core_type(const ov::hint::Schedul
return result_table;
}
std::vector<std::vector<int>> apply_hyper_threading(bool& input_value,
const bool input_changed,
std::vector<std::vector<int>> apply_hyper_threading(bool& input_ht_hint,
const bool input_ht_changed,
const std::string input_pm_hint,
const std::vector<std::vector<int>>& proc_type_table) {
std::vector<std::vector<int>> result_table = proc_type_table;
if ((proc_type_table[0][HYPER_THREADING_PROC] > 0) &&
(((!input_value) && input_changed) || ((!input_changed) && (proc_type_table.size() > 1)))) {
if (proc_type_table[0][HYPER_THREADING_PROC] > 0) {
if (((!input_ht_hint) && input_ht_changed) || ((!input_ht_changed) && (input_pm_hint == "LATENCY")) ||
((!input_ht_changed) && (input_pm_hint == "THROUGHPUT") && (proc_type_table.size() > 1))) {
for (auto& i : result_table) {
i[ALL_PROC] -= i[HYPER_THREADING_PROC];
i[HYPER_THREADING_PROC] = 0;
}
input_value = false;
input_ht_hint = false;
} else {
input_ht_hint = true;
}
} else {
input_ht_hint = false;
}
return result_table;

View File

@ -28,13 +28,15 @@ std::vector<std::vector<int>> apply_scheduling_core_type(const ov::hint::Schedul
/**
* @brief Limit available CPU resource in processors type table according to hyper threading property
* @param[in] input_type indicate value of property enable_hyper_threading.
* @param[in] input_changed indicate if value is set by user.
* @param[in] input_ht_hint indicate value of property enable_hyper_threading.
* @param[in] input_ht_changed indicate if value is set by user.
* @param[in] input_pm_hint indicate value of property performance_mode.
* @param[in] proc_type_table candidate processors available at this time
* @return updated proc_type_table which removed unmatched processors
*/
std::vector<std::vector<int>> apply_hyper_threading(bool& input_type,
const bool input_changed,
std::vector<std::vector<int>> apply_hyper_threading(bool& input_ht_hint,
const bool input_ht_changed,
const std::string input_pm_hint,
const std::vector<std::vector<int>>& proc_type_table);
/**

View File

@ -30,26 +30,10 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE);
std::vector<std::vector<int>> streams_info_table;
if (1 == input_streams) {
stream_info[NUMBER_OF_STREAMS] = 1;
int limit_threads = (input_threads == 0) ? model_prefer_threads : input_threads;
if (proc_type_table[0][ALL_PROC] == proc_type_table[0][EFFICIENT_CORE_PROC]) {
stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC;
stream_info[THREADS_PER_STREAM] = (input_threads == 0)
? proc_type_table[0][EFFICIENT_CORE_PROC]
: std::min(proc_type_table[0][EFFICIENT_CORE_PROC], limit_threads);
streams_info_table.push_back(stream_info);
} else if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
((limit_threads == 0) || (limit_threads > proc_type_table[0][MAIN_CORE_PROC]))) {
stream_info[PROC_TYPE] = ALL_PROC;
int n_threads = std::accumulate(proc_type_table[0].begin() + MAIN_CORE_PROC,
proc_type_table[0].begin() + HYPER_THREADING_PROC,
0);
stream_info[THREADS_PER_STREAM] = (limit_threads == 0) ? n_threads : std::min(n_threads, limit_threads);
streams_info_table.push_back(stream_info);
auto UpdateMixStreamInfo = [&]() {
stream_info[NUMBER_OF_STREAMS] = 0;
n_threads = stream_info[THREADS_PER_STREAM];
for (int n = MAIN_CORE_PROC; n < HYPER_THREADING_PROC; n++) {
int n_threads = stream_info[THREADS_PER_STREAM];
for (int n = MAIN_CORE_PROC; n <= HYPER_THREADING_PROC; n++) {
if (0 != proc_type_table[0][n]) {
stream_info[PROC_TYPE] = n;
if (n_threads <= proc_type_table[0][n]) {
@ -63,28 +47,58 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
}
}
}
} else {
};
if (1 == input_streams) {
stream_info[NUMBER_OF_STREAMS] = 1;
if (input_threads > 0) {
stream_info[THREADS_PER_STREAM] = std::min(proc_type_table[0][ALL_PROC], input_threads);
if ((stream_info[THREADS_PER_STREAM] > proc_type_table[0][MAIN_CORE_PROC]) &&
(proc_type_table[0][MAIN_CORE_PROC] > 0) && (proc_type_table[0][EFFICIENT_CORE_PROC] > 0)) {
stream_info[PROC_TYPE] = ALL_PROC;
streams_info_table.push_back(stream_info);
UpdateMixStreamInfo();
} else if ((stream_info[THREADS_PER_STREAM] <= proc_type_table[0][MAIN_CORE_PROC]) ||
(proc_type_table[0][EFFICIENT_CORE_PROC] == 0)) {
stream_info[PROC_TYPE] = MAIN_CORE_PROC;
stream_info[THREADS_PER_STREAM] = (limit_threads == 0)
? proc_type_table[0][MAIN_CORE_PROC]
: std::min(proc_type_table[0][MAIN_CORE_PROC], limit_threads);
streams_info_table.push_back(stream_info);
} else {
stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC;
streams_info_table.push_back(stream_info);
}
} else {
if (proc_type_table[0][ALL_PROC] == proc_type_table[0][EFFICIENT_CORE_PROC]) {
stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC;
stream_info[THREADS_PER_STREAM] =
(model_prefer_threads == 0)
? proc_type_table[0][EFFICIENT_CORE_PROC]
: std::min(proc_type_table[0][EFFICIENT_CORE_PROC], model_prefer_threads);
streams_info_table.push_back(stream_info);
} else if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
((model_prefer_threads == 0) || (model_prefer_threads > proc_type_table[0][MAIN_CORE_PROC]))) {
stream_info[PROC_TYPE] = ALL_PROC;
stream_info[THREADS_PER_STREAM] =
(model_prefer_threads == 0 || model_prefer_threads > proc_type_table[0][MAIN_CORE_PROC])
? proc_type_table[0][ALL_PROC]
: proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC];
streams_info_table.push_back(stream_info);
UpdateMixStreamInfo();
} else {
stream_info[PROC_TYPE] = MAIN_CORE_PROC;
stream_info[THREADS_PER_STREAM] =
proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC];
streams_info_table.push_back(stream_info);
}
}
return streams_info_table;
} else {
int n_streams = 0;
int n_threads = 0;
int n_threads_per_stream = 0;
int base_type = MAIN_CORE_PROC;
if (proc_type_table.size() == 1) {
n_threads = (0 == input_threads) ? proc_type_table[0][ALL_PROC]
: std::min(proc_type_table[0][ALL_PROC], input_threads);
} else {
n_threads = (0 == input_threads) ? proc_type_table[0][MAIN_CORE_PROC]
: std::min(proc_type_table[0][MAIN_CORE_PROC], input_threads);
}
n_threads =
(0 == input_threads) ? proc_type_table[0][ALL_PROC] : std::min(proc_type_table[0][ALL_PROC], input_threads);
if (0 != input_streams) {
base_type = (proc_type_table[0][MAIN_CORE_PROC] == 0) ? EFFICIENT_CORE_PROC : MAIN_CORE_PROC;
@ -108,7 +122,8 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
} else {
base_type = (proc_type_table[0][MAIN_CORE_PROC] == 0) ? EFFICIENT_CORE_PROC : MAIN_CORE_PROC;
if (0 == model_prefer_threads) {
int n_proc = std::min(n_threads, proc_type_table[0][base_type]);
int n_proc = (proc_type_table.size() == 1) ? std::min(n_threads, proc_type_table[0][base_type])
: std::min(n_threads, proc_type_table[1][base_type]);
if (0 == n_proc % 4) {
n_threads_per_stream = 4;
} else if (0 == n_proc % 5) {
@ -126,18 +141,14 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
n_threads_per_stream =
std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][base_type]);
} else {
while (n_streams < n_threads_per_stream) {
if (1 == n_threads_per_stream) {
break;
} else {
n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
while (n_streams * 2 <= n_threads_per_stream) {
n_threads_per_stream = static_cast<int>(n_threads_per_stream / 2);
n_threads_per_stream = static_cast<int>(
proc_type_table[0][base_type] /
((proc_type_table[0][base_type] + n_threads_per_stream - 1) / n_threads_per_stream));
n_streams = static_cast<int>(n_threads / n_threads_per_stream);
}
}
}
} else if ((1 == model_prefer_threads) && (proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
(proc_type_table[0][MAIN_CORE_PROC] > 0) && (n_threads > proc_type_table[0][MAIN_CORE_PROC])) {
n_streams = (n_threads >= proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC])
@ -270,7 +281,10 @@ void get_num_streams(const int streams,
executor_config._orig_proc_type_table = orig_proc_type_table;
std::vector<std::vector<int>> proc_type_table =
apply_scheduling_core_type(config.schedulingCoreType, orig_proc_type_table);
proc_type_table = apply_hyper_threading(config.enableHyperThreading, config.changedHyperThreading, proc_type_table);
proc_type_table = apply_hyper_threading(config.enableHyperThreading,
config.changedHyperThreading,
config.perfHintsConfig.ovPerfHint,
proc_type_table);
executor_config._proc_type_table = proc_type_table;
executor_config._cpu_pinning = get_cpu_pinning(config.enableCpuPinning,
config.changedCpuPinning,

View File

@ -201,10 +201,10 @@ const std::vector<ov::AnyMap> configsDeviceProperties = {
{ov::device::properties(ov::AnyMap{{"CPU", ov::AnyMap{ov::num_streams(3)}}})}};
const std::vector<ov::AnyMap> configsDevicePropertiesDouble = {
{ov::device::properties("CPU", ov::num_streams(5)), ov::num_streams(3)},
{ov::device::properties("CPU", ov::num_streams(5)),
{ov::device::properties("CPU", ov::num_streams(3)), ov::num_streams(5)},
{ov::device::properties("CPU", ov::num_streams(3)),
ov::device::properties(ov::AnyMap{{"CPU", ov::AnyMap{ov::num_streams(7)}}}),
ov::num_streams(3)},
ov::num_streams(5)},
{ov::device::properties("CPU", ov::num_streams(3)), ov::device::properties("CPU", ov::num_streams(5))},
{ov::device::properties("CPU", ov::num_streams(3)),
ov::device::properties(ov::AnyMap{{"CPU", ov::AnyMap{ov::num_streams(5)}}})},

View File

@ -83,10 +83,12 @@ INSTANTIATE_TEST_SUITE_P(SchedulingCoreTypeTable,
_1sockets_E_CORE_ONLY));
struct UseHTTestCase {
bool use_ht_value;
bool use_ht_changed;
bool input_ht_value;
bool input_ht_changed;
std::string input_pm_hint;
std::vector<std::vector<int>> proc_type_table;
std::vector<std::vector<int>> result_table;
bool output_ht_value;
};
class UseHTTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<UseHTTestCase>> {
@ -95,80 +97,260 @@ public:
auto test_data = std::get<0>(GetParam());
std::vector<std::vector<int>> test_result_table =
ov::intel_cpu::apply_hyper_threading(test_data.use_ht_value, test_data.use_ht_changed, test_data.proc_type_table);
ov::intel_cpu::apply_hyper_threading(test_data.input_ht_value,
test_data.input_ht_changed,
test_data.input_pm_hint,
test_data.proc_type_table);
ASSERT_EQ(test_data.result_table, test_result_table);
ASSERT_EQ(test_data.input_ht_value, test_data.output_ht_value);
}
};
UseHTTestCase _2sockets_false = {
UseHTTestCase _2sockets_false_latency = {
false,
true,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};
UseHTTestCase _2sockets_true = {
true,
true,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
};
UseHTTestCase _2sockets_default_1 = {
false,
UseHTTestCase _2sockets_false_throughput = {
false,
true,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};
UseHTTestCase _2sockets_default_2 = {
UseHTTestCase _2sockets_true_latency = {
true,
true,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
true,
};
UseHTTestCase _2sockets_true_throughput = {
true,
true,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
true,
};
UseHTTestCase _2sockets_default_1_latency = {
false,
false,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};
UseHTTestCase _1sockets_false = {
UseHTTestCase _2sockets_default_1_throughput = {
false,
false,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};
UseHTTestCase _2sockets_default_2_latency = {
true,
false,
"LATENCY",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};
UseHTTestCase _2sockets_default_2_throughput = {
true,
false,
"THROUGHPUT",
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
false,
};
UseHTTestCase _1sockets_1_false_latency = {
false,
true,
"LATENCY",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};
UseHTTestCase _1sockets_true = {
true,
UseHTTestCase _1sockets_1_false_throughput = {
false,
true,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};
UseHTTestCase _1sockets_default_1 = {
false,
false,
UseHTTestCase _1sockets_1_true_latency = {
true,
true,
"LATENCY",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};
UseHTTestCase _1sockets_default_2 = {
UseHTTestCase _1sockets_1_true_throughput = {
true,
true,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};
UseHTTestCase _1sockets_1_default_1_latency = {
false,
false,
"LATENCY",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};
UseHTTestCase _1sockets_1_default_1_throughput = {
false,
false,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};
UseHTTestCase _1sockets_1_default_2_latency = {
true,
false,
"LATENCY",
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
false,
};
UseHTTestCase _1sockets_1_default_2_throughput = {
true,
false,
"THROUGHPUT",
{{20, 6, 8, 6}},
{{20, 6, 8, 6}},
true,
};
UseHTTestCase _1sockets_2_false_latency = {
false,
true,
"LATENCY",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};
UseHTTestCase _1sockets_2_false_throughput = {
false,
true,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};
UseHTTestCase _1sockets_2_true_latency = {
true,
true,
"LATENCY",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};
UseHTTestCase _1sockets_2_true_throughput = {
true,
true,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};
UseHTTestCase _1sockets_2_default_1_latency = {
false,
false,
"LATENCY",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};
UseHTTestCase _1sockets_2_default_1_throughput = {
false,
false,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};
UseHTTestCase _1sockets_2_default_2_latency = {
true,
false,
"LATENCY",
{{12, 6, 0, 6}},
{{6, 6, 0, 0}},
false,
};
UseHTTestCase _1sockets_2_default_2_throughput = {
true,
false,
"THROUGHPUT",
{{12, 6, 0, 6}},
{{12, 6, 0, 6}},
true,
};
TEST_P(UseHTTests, UseHT) {}
INSTANTIATE_TEST_SUITE_P(UseHTTable,
UseHTTests,
testing::Values(_2sockets_false,
_2sockets_true,
_2sockets_default_1,
_2sockets_default_2,
_1sockets_false,
_1sockets_true,
_1sockets_default_1,
_1sockets_default_2));
testing::Values(_2sockets_false_latency,
_2sockets_true_latency,
_2sockets_default_1_latency,
_2sockets_default_2_latency,
_1sockets_1_false_latency,
_1sockets_1_true_latency,
_1sockets_1_default_1_latency,
_1sockets_1_default_2_latency,
_1sockets_2_false_latency,
_1sockets_2_true_latency,
_1sockets_2_default_1_latency,
_1sockets_2_default_2_latency,
_2sockets_false_throughput,
_2sockets_true_throughput,
_2sockets_default_1_throughput,
_2sockets_default_2_throughput,
_1sockets_1_false_throughput,
_1sockets_1_true_throughput,
_1sockets_1_default_1_throughput,
_1sockets_1_default_2_throughput,
_1sockets_2_false_throughput,
_1sockets_2_true_throughput,
_1sockets_2_default_1_throughput,
_1sockets_2_default_2_throughput));
struct StreamsCalculationTestCase {
int input_streams;
@ -202,7 +384,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_1 = {
0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{1, MAIN_CORE_PROC, 104}},
{{1, MAIN_CORE_PROC, 208}},
};
StreamsCalculationTestCase _2sockets_104cores_latency_2 = {
@ -223,13 +405,31 @@ StreamsCalculationTestCase _2sockets_104cores_latency_3 = {
{{1, MAIN_CORE_PROC, 20}},
};
StreamsCalculationTestCase _2sockets_104cores_latency_4 = {
1,
208,
0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{1, MAIN_CORE_PROC, 208}},
};
StreamsCalculationTestCase _2sockets_104cores_latency_5 = {
1,
0,
0,
0,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{1, MAIN_CORE_PROC, 104}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_1 = {
0,
0,
0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{26, MAIN_CORE_PROC, 4}},
{{52, MAIN_CORE_PROC, 4}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
@ -238,7 +438,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}},
{{2, MAIN_CORE_PROC, 104}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_3 = {
@ -265,7 +465,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
0,
1,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{104, MAIN_CORE_PROC, 1}},
{{208, MAIN_CORE_PROC, 1}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
@ -274,7 +474,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
0,
2,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{52, MAIN_CORE_PROC, 2}},
{{104, MAIN_CORE_PROC, 2}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
@ -283,7 +483,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
0,
8,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{13, MAIN_CORE_PROC, 8}},
{{26, MAIN_CORE_PROC, 8}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_8 = {
@ -310,7 +510,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_10 = {
2,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}},
{{2, MAIN_CORE_PROC, 104}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_11 = {
@ -319,7 +519,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_11 = {
5,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}},
{{2, MAIN_CORE_PROC, 104}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_12 = {
@ -328,6 +528,78 @@ StreamsCalculationTestCase _2sockets_104cores_tput_12 = {
2,
2,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 104}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_13 = {
0,
0,
0,
0,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{26, MAIN_CORE_PROC, 4}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_14 = {
2,
0,
0,
0,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{2, MAIN_CORE_PROC, 52}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_15 = {
0,
0,
0,
1,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{104, MAIN_CORE_PROC, 1}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_16 = {
0,
0,
0,
2,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{52, MAIN_CORE_PROC, 2}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_17 = {
0,
0,
0,
8,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{13, MAIN_CORE_PROC, 8}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_18 = {
0,
0,
2,
0,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{2, MAIN_CORE_PROC, 52}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_19 = {
2,
0,
5,
0,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{2, MAIN_CORE_PROC, 52}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_20 = {
0,
0,
2,
2,
{{104, 104, 0, 0}, {52, 52, 0, 0}, {52, 52, 0, 0}},
{{2, MAIN_CORE_PROC, 52}},
};
@ -381,7 +653,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_1 = {
0,
0,
0,
{{20, 6, 8, 6}},
{{14, 6, 8, 0}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
};
@ -400,7 +672,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
0,
6,
{{20, 6, 8, 6}},
{{1, MAIN_CORE_PROC, 6}},
{{1, MAIN_CORE_PROC, 12}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
@ -409,7 +681,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
0,
14,
{{20, 6, 8, 6}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
{{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_5 = {
@ -418,6 +690,42 @@ StreamsCalculationTestCase _1sockets_14cores_latency_5 = {
2,
14,
{{20, 6, 8, 6}},
{{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_6 = {
1,
0,
0,
0,
{{20, 6, 8, 6}},
{{1, ALL_PROC, 20}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 6}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_7 = {
1,
0,
0,
6,
{{14, 6, 8, 0}},
{{1, MAIN_CORE_PROC, 6}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_8 = {
1,
0,
0,
14,
{{14, 6, 8, 0}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_9 = {
1,
0,
2,
14,
{{14, 6, 8, 0}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
};
@ -565,14 +873,13 @@ StreamsCalculationTestCase _1sockets_14cores_tput_16 = {
{{6, MAIN_CORE_PROC, 1}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 1}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_1 = {
1,
0,
0,
0,
{{12, 2, 8, 2}},
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
{{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 2}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
@ -590,7 +897,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
0,
2,
{{12, 2, 8, 2}},
{{1, MAIN_CORE_PROC, 2}},
{{1, MAIN_CORE_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
@ -599,6 +906,33 @@ StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
0,
10,
{{12, 2, 8, 2}},
{{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}, {0, HYPER_THREADING_PROC, 2}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_5 = {
1,
0,
0,
0,
{{10, 2, 8, 0}},
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_6 = {
1,
0,
0,
2,
{{10, 2, 8, 0}},
{{1, MAIN_CORE_PROC, 2}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_7 = {
1,
0,
0,
10,
{{10, 2, 8, 0}},
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
};
@ -662,7 +996,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_1 = {
0,
0,
{{12, 4, 4, 4}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
{{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}, {0, HYPER_THREADING_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
@ -671,7 +1005,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
0,
0,
{{12, 4, 4, 4}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
{{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}, {0, HYPER_THREADING_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
@ -680,7 +1014,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
0,
4,
{{12, 4, 4, 4}},
{{1, MAIN_CORE_PROC, 4}},
{{1, MAIN_CORE_PROC, 8}},
};
StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
@ -689,6 +1023,33 @@ StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
0,
8,
{{12, 4, 4, 4}},
{{1, ALL_PROC, 12}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}, {0, HYPER_THREADING_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_8cores_latency_5 = {
1,
0,
0,
0,
{{8, 4, 4, 0}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_8cores_latency_6 = {
1,
0,
0,
4,
{{8, 4, 4, 0}},
{{1, MAIN_CORE_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_8cores_latency_7 = {
1,
0,
0,
8,
{{8, 4, 4, 0}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
};
@ -698,7 +1059,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_1 = {
0,
0,
{{12, 4, 4, 4}},
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
{{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}, {1, HYPER_THREADING_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_8cores_tput_2 = {
@ -755,13 +1116,22 @@ StreamsCalculationTestCase _1sockets_8cores_tput_7 = {
{{4, MAIN_CORE_PROC, 1}, {2, EFFICIENT_CORE_PROC, 2}, {4, HYPER_THREADING_PROC, 1}},
};
StreamsCalculationTestCase _1sockets_8cores_tput_8 = {
0,
0,
0,
0,
{{8, 4, 4, 0}},
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
};
StreamsCalculationTestCase _1sockets_6cores_latency_1 = {
1,
0,
0,
0,
{{12, 6, 0, 6}},
{{1, MAIN_CORE_PROC, 6}},
{{1, MAIN_CORE_PROC, 12}},
};
StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
@ -770,6 +1140,15 @@ StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
0,
0,
{{12, 6, 0, 6}},
{{1, MAIN_CORE_PROC, 12}},
};
StreamsCalculationTestCase _1sockets_6cores_latency_3 = {
1,
0,
0,
0,
{{6, 6, 0, 0}},
{{1, MAIN_CORE_PROC, 6}},
};
@ -797,7 +1176,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_3 = {
0,
0,
{{12, 6, 0, 6}},
{{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
{{2, MAIN_CORE_PROC, 3}},
};
StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
@ -842,7 +1221,7 @@ StreamsCalculationTestCase _1sockets_ecores_latency_4 = {
0,
4,
{{16, 0, 16, 0}},
{{1, EFFICIENT_CORE_PROC, 16}},
{{1, EFFICIENT_CORE_PROC, 4}},
};
StreamsCalculationTestCase _1sockets_ecores_tput_1 = {
@ -906,6 +1285,8 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
testing::Values(_2sockets_104cores_latency_1,
_2sockets_104cores_latency_2,
_2sockets_104cores_latency_3,
_2sockets_104cores_latency_4,
_2sockets_104cores_latency_5,
_2sockets_104cores_tput_1,
_2sockets_104cores_tput_2,
_2sockets_104cores_tput_3,
@ -918,6 +1299,14 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_2sockets_104cores_tput_10,
_2sockets_104cores_tput_11,
_2sockets_104cores_tput_12,
_2sockets_104cores_tput_13,
_2sockets_104cores_tput_14,
_2sockets_104cores_tput_15,
_2sockets_104cores_tput_16,
_2sockets_104cores_tput_17,
_2sockets_104cores_tput_18,
_2sockets_104cores_tput_19,
_2sockets_104cores_tput_20,
_2sockets_48cores_latency_1,
_2sockets_48cores_tput_1,
_2sockets_48cores_tput_2,
@ -928,6 +1317,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_1sockets_14cores_latency_3,
_1sockets_14cores_latency_4,
_1sockets_14cores_latency_5,
_1sockets_14cores_latency_6,
_1sockets_14cores_latency_7,
_1sockets_14cores_latency_8,
_1sockets_14cores_latency_9,
_1sockets_14cores_tput_1,
_1sockets_14cores_tput_2,
_1sockets_14cores_tput_3,
@ -948,6 +1341,9 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_1sockets_10cores_latency_2,
_1sockets_10cores_latency_3,
_1sockets_10cores_latency_4,
_1sockets_10cores_latency_5,
_1sockets_10cores_latency_6,
_1sockets_10cores_latency_7,
_1sockets_10cores_tput_1,
_1sockets_10cores_tput_2,
_1sockets_10cores_tput_3,
@ -958,6 +1354,9 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_1sockets_8cores_latency_2,
_1sockets_8cores_latency_3,
_1sockets_8cores_latency_4,
_1sockets_8cores_latency_5,
_1sockets_8cores_latency_6,
_1sockets_8cores_latency_7,
_1sockets_8cores_tput_1,
_1sockets_8cores_tput_2,
_1sockets_8cores_tput_3,
@ -965,8 +1364,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_1sockets_8cores_tput_5,
_1sockets_8cores_tput_6,
_1sockets_8cores_tput_7,
_1sockets_8cores_tput_8,
_1sockets_6cores_latency_1,
_1sockets_6cores_latency_2,
_1sockets_6cores_latency_3,
_1sockets_6cores_tput_1,
_1sockets_6cores_tput_2,
_1sockets_6cores_tput_3,