include nireq during streams calculation (#16378)

* include nireq during streams calculation

* update description for comments

* update description
This commit is contained in:
Shen, Wanglei 2023-03-24 15:27:13 +08:00 committed by GitHub
parent 3f4b1e8205
commit 613b66ba35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 201 additions and 28 deletions

View File

@ -19,6 +19,7 @@ namespace intel_cpu {
std::vector<std::vector<int>> get_streams_info_table(const int input_streams, std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
const int input_threads, const int input_threads,
const int input_infer_requests,
const int model_prefer_threads, const int model_prefer_threads,
const std::vector<std::vector<int>> proc_type_table) { const std::vector<std::vector<int>> proc_type_table) {
std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE); std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE);
@ -74,13 +75,12 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
} }
if (0 != input_streams) { if (0 != input_streams) {
if (input_streams >= n_threads) { n_streams = (input_infer_requests > 0) ? std::min(input_streams, input_infer_requests) : input_streams;
if (n_streams >= n_threads) {
n_streams = n_threads; n_streams = n_threads;
n_threads_per_stream = 1; n_threads_per_stream = 1;
} else { } else {
n_streams = input_streams; n_threads_per_stream = std::min(std::max(1, n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
n_threads_per_stream =
std::min(std::max(1, n_threads / input_streams), proc_type_table[0][MAIN_CORE_PROC]);
if (proc_type_table.size() == 1) { if (proc_type_table.size() == 1) {
if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) && if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) &&
(n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) { (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
@ -107,21 +107,29 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast<int>(n_proc / 4)); n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast<int>(n_proc / 4));
} }
n_streams = static_cast<int>(n_threads / n_threads_per_stream); n_streams = static_cast<int>(n_threads / n_threads_per_stream);
if ((input_infer_requests > 0) && (n_streams > input_infer_requests)) {
while (n_streams < n_threads_per_stream) { n_streams = input_infer_requests;
if (1 == n_threads_per_stream) { n_threads_per_stream =
break; std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
} else { } else {
n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2); while (n_streams < n_threads_per_stream) {
n_threads_per_stream = static_cast<int>( if (1 == n_threads_per_stream) {
proc_type_table[0][MAIN_CORE_PROC] / break;
((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream)); } else {
n_streams = static_cast<int>(n_threads / n_threads_per_stream); n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
n_threads_per_stream =
static_cast<int>(proc_type_table[0][MAIN_CORE_PROC] /
((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) /
n_threads_per_stream));
n_streams = static_cast<int>(n_threads / n_threads_per_stream);
}
} }
} }
} else { } else {
n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads); n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads);
n_threads_per_stream = static_cast<int>(n_threads / n_streams); n_streams = (input_infer_requests > 0) ? std::min(n_streams, input_infer_requests) : n_streams;
n_threads_per_stream =
std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
} }
} }

View File

@ -14,21 +14,30 @@
namespace ov { namespace ov {
namespace intel_cpu { namespace intel_cpu {
/** /**
* @brief Generate streams information table according to processors type table * @brief Generate streams information table according to processors type table.
* @param[in] input_streams is target streams set by user via NUM_STREAMS or hints. * @param[in] input_streams is the targeted number of streams set by user via ov::num_streams or hints.
* - input "0" mean function generate the optimal number of streams * - input "0" indicates the optimal number of streams generated by the function.
* - LATENCY hint equals 1 stream. * - When user sets LATENCY hint, OpenVINO runtime generate one stream per CPU node.
* @param[in] input_threads is max threads set by user via INFERNECE_NUM_THREADS. * @param[in] input_threads is the max number of threads set by user via ov::inference_num_threads.
* - input "0" mean function can use all resource in proc_type_table * - input "0" indicates that the function can use all resource in proc_type_table.
* - When user limit max threads, streams in output cannot be more than max threads * - If user limits the max number of threads, the final number of streams output cannot exceed the max
* @param[in] model_prefer_threads is preferred threads per stream based on model generated in previous function * number of threads.
* - input "0" mean function generate the optimal threads per stream based on platform * @param[in] input_infer_requests is max number of infer requests set by user via ov::hint::num_requests.
* @param[in] proc_type_table candidate processors available at this time * - input "0" indicates that the function can use all resource in proc_type_table.
* - candidate processors have benn updated based on properties like "Ecore only" in previous function * - If user limits the max number of infer requests, the final number of streams output cannot exceed the
* @return summary table of streams info will be used by StreamsExecutor * max number of infer requests.
* @param[in] model_prefer_threads is preferred number of threads per stream based on the model generated in previous
* function.
* - input "0" indicates that the function generates the optimal number of threads per stream based on
* processors type information.
* @param[in] proc_type_table is currently available candidate processors.
* - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type
* in previous function.
* @return streams information table which will be used by StreamsExecutor.
*/ */
std::vector<std::vector<int>> get_streams_info_table(const int input_streams, std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
const int input_threads, const int input_threads,
const int input_infer_requests,
const int model_prefer_threads, const int model_prefer_threads,
const std::vector<std::vector<int>> proc_type_table); const std::vector<std::vector<int>> proc_type_table);
} // namespace intel_cpu } // namespace intel_cpu

View File

@ -18,6 +18,7 @@ namespace {
struct StreamsCalculationTestCase { struct StreamsCalculationTestCase {
int input_streams; int input_streams;
int input_threads; int input_threads;
int input_infer_requests;
int model_prefer_threads; int model_prefer_threads;
std::vector<std::vector<int>> proc_type_table; std::vector<std::vector<int>> proc_type_table;
std::vector<std::vector<int>> stream_info_table; std::vector<std::vector<int>> stream_info_table;
@ -32,6 +33,7 @@ public:
std::vector<std::vector<int>> test_stream_info_table = std::vector<std::vector<int>> test_stream_info_table =
ov::intel_cpu::get_streams_info_table(test_data.input_streams, ov::intel_cpu::get_streams_info_table(test_data.input_streams,
test_data.input_threads, test_data.input_threads,
test_data.input_infer_requests,
test_data.model_prefer_threads, test_data.model_prefer_threads,
test_data.proc_type_table); test_data.proc_type_table);
@ -43,6 +45,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_1 = {
1, 1,
0, 0,
0, 0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{1, MAIN_CORE_PROC, 104}}, {{1, MAIN_CORE_PROC, 104}},
}; };
@ -51,6 +54,16 @@ StreamsCalculationTestCase _2sockets_104cores_latency_2 = {
1, 1,
20, 20,
0, 0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{1, MAIN_CORE_PROC, 20}},
};
StreamsCalculationTestCase _2sockets_104cores_latency_3 = {
1,
20,
5,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{1, MAIN_CORE_PROC, 20}}, {{1, MAIN_CORE_PROC, 20}},
}; };
@ -59,6 +72,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_1 = {
0, 0,
0, 0,
0, 0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{26, MAIN_CORE_PROC, 4}}, {{26, MAIN_CORE_PROC, 4}},
}; };
@ -67,6 +81,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
2, 2,
0, 0,
0, 0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}}, {{2, MAIN_CORE_PROC, 52}},
}; };
@ -75,6 +90,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_3 = {
0, 0,
20, 20,
0, 0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{5, MAIN_CORE_PROC, 4}}, {{5, MAIN_CORE_PROC, 4}},
}; };
@ -83,11 +99,13 @@ StreamsCalculationTestCase _2sockets_104cores_tput_4 = {
2, 2,
20, 20,
0, 0,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 10}}, {{2, MAIN_CORE_PROC, 10}},
}; };
StreamsCalculationTestCase _2sockets_104cores_tput_5 = { StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
0,
0, 0,
0, 0,
1, 1,
@ -96,6 +114,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
}; };
StreamsCalculationTestCase _2sockets_104cores_tput_6 = { StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
0,
0, 0,
0, 0,
2, 2,
@ -104,6 +123,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
}; };
StreamsCalculationTestCase _2sockets_104cores_tput_7 = { StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
0,
0, 0,
0, 0,
8, 8,
@ -114,15 +134,53 @@ StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
StreamsCalculationTestCase _2sockets_104cores_tput_8 = { StreamsCalculationTestCase _2sockets_104cores_tput_8 = {
0, 0,
40, 40,
0,
8, 8,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{5, MAIN_CORE_PROC, 8}}, {{5, MAIN_CORE_PROC, 8}},
}; };
StreamsCalculationTestCase _2sockets_104cores_tput_9 = {
5,
20,
2,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 10}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_10 = {
0,
0,
2,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_11 = {
2,
0,
5,
0,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}},
};
StreamsCalculationTestCase _2sockets_104cores_tput_12 = {
0,
0,
2,
2,
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
{{2, MAIN_CORE_PROC, 52}},
};
StreamsCalculationTestCase _2sockets_48cores_latency_1 = { StreamsCalculationTestCase _2sockets_48cores_latency_1 = {
1, 1,
0, 0,
0, 0,
0,
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
{{1, MAIN_CORE_PROC, 48}}, {{1, MAIN_CORE_PROC, 48}},
}; };
@ -131,6 +189,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_1 = {
0, 0,
0, 0,
0, 0,
0,
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
{{12, MAIN_CORE_PROC, 4}}, {{12, MAIN_CORE_PROC, 4}},
}; };
@ -139,6 +198,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_2 = {
100, 100,
0, 0,
0, 0,
0,
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
{{48, MAIN_CORE_PROC, 1}}, {{48, MAIN_CORE_PROC, 1}},
}; };
@ -147,6 +207,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
0, 0,
100, 100,
0, 0,
0,
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
{{12, MAIN_CORE_PROC, 4}}, {{12, MAIN_CORE_PROC, 4}},
}; };
@ -154,6 +215,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
StreamsCalculationTestCase _2sockets_48cores_tput_4 = { StreamsCalculationTestCase _2sockets_48cores_tput_4 = {
2, 2,
20, 20,
0,
1, 1,
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
{{2, MAIN_CORE_PROC, 10}}, {{2, MAIN_CORE_PROC, 10}},
@ -163,6 +225,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_1 = {
1, 1,
0, 0,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
}; };
@ -171,6 +234,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
1, 1,
10, 10,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}}, {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}},
}; };
@ -178,6 +242,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
StreamsCalculationTestCase _1sockets_14cores_latency_3 = { StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
1, 1,
0, 0,
0,
6, 6,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{1, MAIN_CORE_PROC, 6}}, {{1, MAIN_CORE_PROC, 6}},
@ -186,6 +251,16 @@ StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
StreamsCalculationTestCase _1sockets_14cores_latency_4 = { StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
1, 1,
0, 0,
0,
14,
{{20, 6, 8, 6}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
};
StreamsCalculationTestCase _1sockets_14cores_latency_5 = {
1,
0,
2,
14, 14,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
@ -195,6 +270,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_1 = {
0, 0,
0, 0,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
}; };
@ -203,6 +279,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_2 = {
2, 2,
0, 0,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
}; };
@ -211,6 +288,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_3 = {
4, 4,
0, 0,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
}; };
@ -219,11 +297,13 @@ StreamsCalculationTestCase _1sockets_14cores_tput_4 = {
0, 0,
12, 12,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
}; };
StreamsCalculationTestCase _1sockets_14cores_tput_5 = { StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
0,
0, 0,
0, 0,
1, 1,
@ -232,6 +312,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
}; };
StreamsCalculationTestCase _1sockets_14cores_tput_6 = { StreamsCalculationTestCase _1sockets_14cores_tput_6 = {
0,
0, 0,
0, 0,
2, 2,
@ -243,6 +324,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_7 = {
100, 100,
0, 0,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}}, {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
}; };
@ -251,14 +333,52 @@ StreamsCalculationTestCase _1sockets_14cores_tput_8 = {
0, 0,
100, 100,
0, 0,
0,
{{20, 6, 8, 6}}, {{20, 6, 8, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
}; };
StreamsCalculationTestCase _1sockets_14cores_tput_9 = {
4,
0,
8,
0,
{{20, 6, 8, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
};
StreamsCalculationTestCase _1sockets_14cores_tput_10 = {
6,
0,
4,
0,
{{20, 6, 8, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
};
StreamsCalculationTestCase _1sockets_14cores_tput_11 = {
0,
0,
2,
0,
{{20, 6, 8, 6}},
{{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
};
StreamsCalculationTestCase _1sockets_14cores_tput_12 = {
0,
0,
2,
2,
{{20, 6, 8, 6}},
{{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
};
StreamsCalculationTestCase _1sockets_10cores_latency_1 = { StreamsCalculationTestCase _1sockets_10cores_latency_1 = {
1, 1,
0, 0,
0, 0,
0,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}}, {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
}; };
@ -267,6 +387,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
1, 1,
8, 8,
0, 0,
0,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}},
}; };
@ -274,6 +395,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
StreamsCalculationTestCase _1sockets_10cores_latency_3 = { StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
1, 1,
0, 0,
0,
2, 2,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, MAIN_CORE_PROC, 2}}, {{1, MAIN_CORE_PROC, 2}},
@ -282,6 +404,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
StreamsCalculationTestCase _1sockets_10cores_latency_4 = { StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
1, 1,
0, 0,
0,
10, 10,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}}, {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
@ -291,6 +414,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_1 = {
0, 0,
0, 0,
0, 0,
0,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}}, {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
}; };
@ -299,6 +423,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_2 = {
2, 2,
0, 0,
0, 0,
0,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}}, {{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
}; };
@ -307,6 +432,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_3 = {
4, 4,
0, 0,
0, 0,
0,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}}, {{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}},
}; };
@ -315,11 +441,13 @@ StreamsCalculationTestCase _1sockets_10cores_tput_4 = {
0, 0,
6, 6,
0, 0,
0,
{{12, 2, 8, 2}}, {{12, 2, 8, 2}},
{{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, {{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
}; };
StreamsCalculationTestCase _1sockets_10cores_tput_5 = { StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
0,
0, 0,
0, 0,
1, 1,
@ -328,6 +456,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
}; };
StreamsCalculationTestCase _1sockets_10cores_tput_6 = { StreamsCalculationTestCase _1sockets_10cores_tput_6 = {
0,
0, 0,
0, 0,
2, 2,
@ -339,6 +468,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_1 = {
1, 1,
0, 0,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
}; };
@ -347,6 +477,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
1, 1,
100, 100,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
}; };
@ -354,6 +485,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
StreamsCalculationTestCase _1sockets_8cores_latency_3 = { StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
1, 1,
0, 0,
0,
4, 4,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{1, MAIN_CORE_PROC, 4}}, {{1, MAIN_CORE_PROC, 4}},
@ -362,6 +494,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
StreamsCalculationTestCase _1sockets_8cores_latency_4 = { StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
1, 1,
0, 0,
0,
8, 8,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
@ -371,6 +504,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_1 = {
0, 0,
0, 0,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
}; };
@ -379,6 +513,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_2 = {
2, 2,
0, 0,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}}, {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}},
}; };
@ -387,6 +522,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_3 = {
4, 4,
0, 0,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
}; };
@ -395,6 +531,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_4 = {
6, 6,
0, 0,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
}; };
@ -403,6 +540,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_5 = {
0, 0,
6, 6,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}}, {{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
}; };
@ -411,11 +549,13 @@ StreamsCalculationTestCase _1sockets_8cores_tput_6 = {
0, 0,
8, 8,
0, 0,
0,
{{12, 4, 4, 4}}, {{12, 4, 4, 4}},
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
}; };
StreamsCalculationTestCase _1sockets_8cores_tput_7 = { StreamsCalculationTestCase _1sockets_8cores_tput_7 = {
0,
0, 0,
0, 0,
1, 1,
@ -427,6 +567,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_1 = {
1, 1,
0, 0,
0, 0,
0,
{{12, 6, 0, 6}}, {{12, 6, 0, 6}},
{{1, MAIN_CORE_PROC, 6}}, {{1, MAIN_CORE_PROC, 6}},
}; };
@ -435,6 +576,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
1, 1,
100, 100,
0, 0,
0,
{{12, 6, 0, 6}}, {{12, 6, 0, 6}},
{{1, MAIN_CORE_PROC, 6}}, {{1, MAIN_CORE_PROC, 6}},
}; };
@ -443,6 +585,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_1 = {
0, 0,
0, 0,
0, 0,
0,
{{12, 6, 0, 6}}, {{12, 6, 0, 6}},
{{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, {{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
}; };
@ -451,6 +594,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_2 = {
2, 2,
0, 0,
0, 0,
0,
{{12, 6, 0, 6}}, {{12, 6, 0, 6}},
{{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}}, {{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}},
}; };
@ -459,11 +603,13 @@ StreamsCalculationTestCase _1sockets_6cores_tput_3 = {
0, 0,
8, 8,
0, 0,
0,
{{12, 6, 0, 6}}, {{12, 6, 0, 6}},
{{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}}, {{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
}; };
StreamsCalculationTestCase _1sockets_6cores_tput_4 = { StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
0,
0, 0,
0, 0,
1, 1,
@ -476,7 +622,8 @@ TEST_P(StreamsCalculationTests, StreamsCalculation) {}
INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
StreamsCalculationTests, StreamsCalculationTests,
testing::Values(_2sockets_104cores_latency_1, testing::Values(_2sockets_104cores_latency_1,
_2sockets_104cores_latency_1, _2sockets_104cores_latency_2,
_2sockets_104cores_latency_3,
_2sockets_104cores_tput_1, _2sockets_104cores_tput_1,
_2sockets_104cores_tput_2, _2sockets_104cores_tput_2,
_2sockets_104cores_tput_3, _2sockets_104cores_tput_3,
@ -485,6 +632,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_2sockets_104cores_tput_6, _2sockets_104cores_tput_6,
_2sockets_104cores_tput_7, _2sockets_104cores_tput_7,
_2sockets_104cores_tput_8, _2sockets_104cores_tput_8,
_2sockets_104cores_tput_9,
_2sockets_104cores_tput_10,
_2sockets_104cores_tput_11,
_2sockets_104cores_tput_12,
_2sockets_48cores_latency_1, _2sockets_48cores_latency_1,
_2sockets_48cores_tput_1, _2sockets_48cores_tput_1,
_2sockets_48cores_tput_2, _2sockets_48cores_tput_2,
@ -494,6 +645,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_1sockets_14cores_latency_2, _1sockets_14cores_latency_2,
_1sockets_14cores_latency_3, _1sockets_14cores_latency_3,
_1sockets_14cores_latency_4, _1sockets_14cores_latency_4,
_1sockets_14cores_latency_5,
_1sockets_14cores_tput_1, _1sockets_14cores_tput_1,
_1sockets_14cores_tput_2, _1sockets_14cores_tput_2,
_1sockets_14cores_tput_3, _1sockets_14cores_tput_3,
@ -502,6 +654,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
_1sockets_14cores_tput_6, _1sockets_14cores_tput_6,
_1sockets_14cores_tput_7, _1sockets_14cores_tput_7,
_1sockets_14cores_tput_8, _1sockets_14cores_tput_8,
_1sockets_14cores_tput_9,
_1sockets_14cores_tput_10,
_1sockets_14cores_tput_11,
_1sockets_14cores_tput_12,
_1sockets_10cores_latency_1, _1sockets_10cores_latency_1,
_1sockets_10cores_latency_2, _1sockets_10cores_latency_2,
_1sockets_10cores_latency_3, _1sockets_10cores_latency_3,