From 613b66ba35a056215260427e6c3230af0cf82011 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Fri, 24 Mar 2023 15:27:13 +0800 Subject: [PATCH] include nireq during streams calculation (#16378) * include nireq during streams calculation * update description for comments * update description --- .../intel_cpu/src/cpu_streams_calculation.cpp | 38 +++-- .../intel_cpu/src/cpu_streams_calculation.hpp | 33 ++-- .../tests/unit/streams_info_table_test.cpp | 158 +++++++++++++++++- 3 files changed, 201 insertions(+), 28 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index b5a22160228..ea5111015d9 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -19,6 +19,7 @@ namespace intel_cpu { std::vector> get_streams_info_table(const int input_streams, const int input_threads, + const int input_infer_requests, const int model_prefer_threads, const std::vector> proc_type_table) { std::vector stream_info(CPU_STREAMS_TABLE_SIZE); @@ -74,13 +75,12 @@ std::vector> get_streams_info_table(const int input_streams, } if (0 != input_streams) { - if (input_streams >= n_threads) { + n_streams = (input_infer_requests > 0) ? std::min(input_streams, input_infer_requests) : input_streams; + if (n_streams >= n_threads) { n_streams = n_threads; n_threads_per_stream = 1; } else { - n_streams = input_streams; - n_threads_per_stream = - std::min(std::max(1, n_threads / input_streams), proc_type_table[0][MAIN_CORE_PROC]); + n_threads_per_stream = std::min(std::max(1, n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]); if (proc_type_table.size() == 1) { if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) && (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) { @@ -107,21 +107,29 @@ std::vector> get_streams_info_table(const int input_streams, n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast(n_proc / 4)); } n_streams = static_cast(n_threads / n_threads_per_stream); - - while (n_streams < n_threads_per_stream) { - if (1 == n_threads_per_stream) { - break; - } else { - n_threads_per_stream = static_cast((n_threads_per_stream * 2 - 1) / 2); - n_threads_per_stream = static_cast( - proc_type_table[0][MAIN_CORE_PROC] / - ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream)); - n_streams = static_cast(n_threads / n_threads_per_stream); + if ((input_infer_requests > 0) && (n_streams > input_infer_requests)) { + n_streams = input_infer_requests; + n_threads_per_stream = + std::min(static_cast(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]); + } else { + while (n_streams < n_threads_per_stream) { + if (1 == n_threads_per_stream) { + break; + } else { + n_threads_per_stream = static_cast((n_threads_per_stream * 2 - 1) / 2); + n_threads_per_stream = + static_cast(proc_type_table[0][MAIN_CORE_PROC] / + ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / + n_threads_per_stream)); + n_streams = static_cast(n_threads / n_threads_per_stream); + } } } } else { n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads); - n_threads_per_stream = static_cast(n_threads / n_streams); + n_streams = (input_infer_requests > 0) ? std::min(n_streams, input_infer_requests) : n_streams; + n_threads_per_stream = + std::min(static_cast(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]); } } diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp index c43388242ab..53614942bf8 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp @@ -14,21 +14,30 @@ namespace ov { namespace intel_cpu { /** - * @brief Generate streams information table according to processors type table - * @param[in] input_streams is target streams set by user via NUM_STREAMS or hints. - * - input "0" mean function generate the optimal number of streams - * - LATENCY hint equals 1 stream. - * @param[in] input_threads is max threads set by user via INFERNECE_NUM_THREADS. - * - input "0" mean function can use all resource in proc_type_table - * - When user limit max threads, streams in output cannot be more than max threads - * @param[in] model_prefer_threads is preferred threads per stream based on model generated in previous function - * - input "0" mean function generate the optimal threads per stream based on platform - * @param[in] proc_type_table candidate processors available at this time - * - candidate processors have benn updated based on properties like "Ecore only" in previous function - * @return summary table of streams info will be used by StreamsExecutor + * @brief Generate streams information table according to processors type table. + * @param[in] input_streams is the targeted number of streams set by user via ov::num_streams or hints. + * - input "0" indicates the optimal number of streams generated by the function. + * - When user sets LATENCY hint, OpenVINO runtime generate one stream per CPU node. + * @param[in] input_threads is the max number of threads set by user via ov::inference_num_threads. + * - input "0" indicates that the function can use all resource in proc_type_table. + * - If user limits the max number of threads, the final number of streams output cannot exceed the max + * number of threads. + * @param[in] input_infer_requests is max number of infer requests set by user via ov::hint::num_requests. + * - input "0" indicates that the function can use all resource in proc_type_table. + * - If user limits the max number of infer requests, the final number of streams output cannot exceed the + * max number of infer requests. + * @param[in] model_prefer_threads is preferred number of threads per stream based on the model generated in previous + * function. + * - input "0" indicates that the function generates the optimal number of threads per stream based on + * processors type information. + * @param[in] proc_type_table is currently available candidate processors. + * - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type + * in previous function. + * @return streams information table which will be used by StreamsExecutor. */ std::vector> get_streams_info_table(const int input_streams, const int input_threads, + const int input_infer_requests, const int model_prefer_threads, const std::vector> proc_type_table); } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp index 2fc0c36712a..696c6508c47 100644 --- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp @@ -18,6 +18,7 @@ namespace { struct StreamsCalculationTestCase { int input_streams; int input_threads; + int input_infer_requests; int model_prefer_threads; std::vector> proc_type_table; std::vector> stream_info_table; @@ -32,6 +33,7 @@ public: std::vector> test_stream_info_table = ov::intel_cpu::get_streams_info_table(test_data.input_streams, test_data.input_threads, + test_data.input_infer_requests, test_data.model_prefer_threads, test_data.proc_type_table); @@ -43,6 +45,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_1 = { 1, 0, 0, + 0, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{1, MAIN_CORE_PROC, 104}}, }; @@ -51,6 +54,16 @@ StreamsCalculationTestCase _2sockets_104cores_latency_2 = { 1, 20, 0, + 0, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{1, MAIN_CORE_PROC, 20}}, +}; + +StreamsCalculationTestCase _2sockets_104cores_latency_3 = { + 1, + 20, + 5, + 0, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{1, MAIN_CORE_PROC, 20}}, }; @@ -59,6 +72,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_1 = { 0, 0, 0, + 0, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{26, MAIN_CORE_PROC, 4}}, }; @@ -67,6 +81,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_2 = { 2, 0, 0, + 0, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{2, MAIN_CORE_PROC, 52}}, }; @@ -75,6 +90,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_3 = { 0, 20, 0, + 0, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{5, MAIN_CORE_PROC, 4}}, }; @@ -83,11 +99,13 @@ StreamsCalculationTestCase _2sockets_104cores_tput_4 = { 2, 20, 0, + 0, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{2, MAIN_CORE_PROC, 10}}, }; StreamsCalculationTestCase _2sockets_104cores_tput_5 = { + 0, 0, 0, 1, @@ -96,6 +114,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_5 = { }; StreamsCalculationTestCase _2sockets_104cores_tput_6 = { + 0, 0, 0, 2, @@ -104,6 +123,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_6 = { }; StreamsCalculationTestCase _2sockets_104cores_tput_7 = { + 0, 0, 0, 8, @@ -114,15 +134,53 @@ StreamsCalculationTestCase _2sockets_104cores_tput_7 = { StreamsCalculationTestCase _2sockets_104cores_tput_8 = { 0, 40, + 0, 8, {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, {{5, MAIN_CORE_PROC, 8}}, }; +StreamsCalculationTestCase _2sockets_104cores_tput_9 = { + 5, + 20, + 2, + 0, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{2, MAIN_CORE_PROC, 10}}, +}; + +StreamsCalculationTestCase _2sockets_104cores_tput_10 = { + 0, + 0, + 2, + 0, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{2, MAIN_CORE_PROC, 52}}, +}; + +StreamsCalculationTestCase _2sockets_104cores_tput_11 = { + 2, + 0, + 5, + 0, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{2, MAIN_CORE_PROC, 52}}, +}; + +StreamsCalculationTestCase _2sockets_104cores_tput_12 = { + 0, + 0, + 2, + 2, + {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}}, + {{2, MAIN_CORE_PROC, 52}}, +}; + StreamsCalculationTestCase _2sockets_48cores_latency_1 = { 1, 0, 0, + 0, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{1, MAIN_CORE_PROC, 48}}, }; @@ -131,6 +189,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_1 = { 0, 0, 0, + 0, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{12, MAIN_CORE_PROC, 4}}, }; @@ -139,6 +198,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_2 = { 100, 0, 0, + 0, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{48, MAIN_CORE_PROC, 1}}, }; @@ -147,6 +207,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = { 0, 100, 0, + 0, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{12, MAIN_CORE_PROC, 4}}, }; @@ -154,6 +215,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = { StreamsCalculationTestCase _2sockets_48cores_tput_4 = { 2, 20, + 0, 1, {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, {{2, MAIN_CORE_PROC, 10}}, @@ -163,6 +225,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_1 = { 1, 0, 0, + 0, {{20, 6, 8, 6}}, {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, }; @@ -171,6 +234,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = { 1, 10, 0, + 0, {{20, 6, 8, 6}}, {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}}, }; @@ -178,6 +242,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = { StreamsCalculationTestCase _1sockets_14cores_latency_3 = { 1, 0, + 0, 6, {{20, 6, 8, 6}}, {{1, MAIN_CORE_PROC, 6}}, @@ -186,6 +251,16 @@ StreamsCalculationTestCase _1sockets_14cores_latency_3 = { StreamsCalculationTestCase _1sockets_14cores_latency_4 = { 1, 0, + 0, + 14, + {{20, 6, 8, 6}}, + {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, +}; + +StreamsCalculationTestCase _1sockets_14cores_latency_5 = { + 1, + 0, + 2, 14, {{20, 6, 8, 6}}, {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, @@ -195,6 +270,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_1 = { 0, 0, 0, + 0, {{20, 6, 8, 6}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, }; @@ -203,6 +279,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_2 = { 2, 0, 0, + 0, {{20, 6, 8, 6}}, {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, }; @@ -211,6 +288,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_3 = { 4, 0, 0, + 0, {{20, 6, 8, 6}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, }; @@ -219,11 +297,13 @@ StreamsCalculationTestCase _1sockets_14cores_tput_4 = { 0, 12, 0, + 0, {{20, 6, 8, 6}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, }; StreamsCalculationTestCase _1sockets_14cores_tput_5 = { + 0, 0, 0, 1, @@ -232,6 +312,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_5 = { }; StreamsCalculationTestCase _1sockets_14cores_tput_6 = { + 0, 0, 0, 2, @@ -243,6 +324,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_7 = { 100, 0, 0, + 0, {{20, 6, 8, 6}}, {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}}, }; @@ -251,14 +333,52 @@ StreamsCalculationTestCase _1sockets_14cores_tput_8 = { 0, 100, 0, + 0, {{20, 6, 8, 6}}, {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, }; +StreamsCalculationTestCase _1sockets_14cores_tput_9 = { + 4, + 0, + 8, + 0, + {{20, 6, 8, 6}}, + {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, +}; + +StreamsCalculationTestCase _1sockets_14cores_tput_10 = { + 6, + 0, + 4, + 0, + {{20, 6, 8, 6}}, + {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}}, +}; + +StreamsCalculationTestCase _1sockets_14cores_tput_11 = { + 0, + 0, + 2, + 0, + {{20, 6, 8, 6}}, + {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, +}; + +StreamsCalculationTestCase _1sockets_14cores_tput_12 = { + 0, + 0, + 2, + 2, + {{20, 6, 8, 6}}, + {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}}, +}; + StreamsCalculationTestCase _1sockets_10cores_latency_1 = { 1, 0, 0, + 0, {{12, 2, 8, 2}}, {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}}, }; @@ -267,6 +387,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = { 1, 8, 0, + 0, {{12, 2, 8, 2}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}}, }; @@ -274,6 +395,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = { StreamsCalculationTestCase _1sockets_10cores_latency_3 = { 1, 0, + 0, 2, {{12, 2, 8, 2}}, {{1, MAIN_CORE_PROC, 2}}, @@ -282,6 +404,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_3 = { StreamsCalculationTestCase _1sockets_10cores_latency_4 = { 1, 0, + 0, 10, {{12, 2, 8, 2}}, {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}}, @@ -291,6 +414,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_1 = { 0, 0, 0, + 0, {{12, 2, 8, 2}}, {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}}, }; @@ -299,6 +423,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_2 = { 2, 0, 0, + 0, {{12, 2, 8, 2}}, {{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}}, }; @@ -307,6 +432,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_3 = { 4, 0, 0, + 0, {{12, 2, 8, 2}}, {{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}}, }; @@ -315,11 +441,13 @@ StreamsCalculationTestCase _1sockets_10cores_tput_4 = { 0, 6, 0, + 0, {{12, 2, 8, 2}}, {{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, }; StreamsCalculationTestCase _1sockets_10cores_tput_5 = { + 0, 0, 0, 1, @@ -328,6 +456,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_5 = { }; StreamsCalculationTestCase _1sockets_10cores_tput_6 = { + 0, 0, 0, 2, @@ -339,6 +468,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_1 = { 1, 0, 0, + 0, {{12, 4, 4, 4}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, }; @@ -347,6 +477,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = { 1, 100, 0, + 0, {{12, 4, 4, 4}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, }; @@ -354,6 +485,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = { StreamsCalculationTestCase _1sockets_8cores_latency_3 = { 1, 0, + 0, 4, {{12, 4, 4, 4}}, {{1, MAIN_CORE_PROC, 4}}, @@ -362,6 +494,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_3 = { StreamsCalculationTestCase _1sockets_8cores_latency_4 = { 1, 0, + 0, 8, {{12, 4, 4, 4}}, {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}}, @@ -371,6 +504,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_1 = { 0, 0, 0, + 0, {{12, 4, 4, 4}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}}, }; @@ -379,6 +513,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_2 = { 2, 0, 0, + 0, {{12, 4, 4, 4}}, {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}}, }; @@ -387,6 +522,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_3 = { 4, 0, 0, + 0, {{12, 4, 4, 4}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, }; @@ -395,6 +531,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_4 = { 6, 0, 0, + 0, {{12, 4, 4, 4}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}}, }; @@ -403,6 +540,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_5 = { 0, 6, 0, + 0, {{12, 4, 4, 4}}, {{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}}, }; @@ -411,11 +549,13 @@ StreamsCalculationTestCase _1sockets_8cores_tput_6 = { 0, 8, 0, + 0, {{12, 4, 4, 4}}, {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}}, }; StreamsCalculationTestCase _1sockets_8cores_tput_7 = { + 0, 0, 0, 1, @@ -427,6 +567,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_1 = { 1, 0, 0, + 0, {{12, 6, 0, 6}}, {{1, MAIN_CORE_PROC, 6}}, }; @@ -435,6 +576,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_2 = { 1, 100, 0, + 0, {{12, 6, 0, 6}}, {{1, MAIN_CORE_PROC, 6}}, }; @@ -443,6 +585,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_1 = { 0, 0, 0, + 0, {{12, 6, 0, 6}}, {{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, }; @@ -451,6 +594,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_2 = { 2, 0, 0, + 0, {{12, 6, 0, 6}}, {{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}}, }; @@ -459,11 +603,13 @@ StreamsCalculationTestCase _1sockets_6cores_tput_3 = { 0, 8, 0, + 0, {{12, 6, 0, 6}}, {{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}}, }; StreamsCalculationTestCase _1sockets_6cores_tput_4 = { + 0, 0, 0, 1, @@ -476,7 +622,8 @@ TEST_P(StreamsCalculationTests, StreamsCalculation) {} INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, StreamsCalculationTests, testing::Values(_2sockets_104cores_latency_1, - _2sockets_104cores_latency_1, + _2sockets_104cores_latency_2, + _2sockets_104cores_latency_3, _2sockets_104cores_tput_1, _2sockets_104cores_tput_2, _2sockets_104cores_tput_3, @@ -485,6 +632,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _2sockets_104cores_tput_6, _2sockets_104cores_tput_7, _2sockets_104cores_tput_8, + _2sockets_104cores_tput_9, + _2sockets_104cores_tput_10, + _2sockets_104cores_tput_11, + _2sockets_104cores_tput_12, _2sockets_48cores_latency_1, _2sockets_48cores_tput_1, _2sockets_48cores_tput_2, @@ -494,6 +645,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _1sockets_14cores_latency_2, _1sockets_14cores_latency_3, _1sockets_14cores_latency_4, + _1sockets_14cores_latency_5, _1sockets_14cores_tput_1, _1sockets_14cores_tput_2, _1sockets_14cores_tput_3, @@ -502,6 +654,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _1sockets_14cores_tput_6, _1sockets_14cores_tput_7, _1sockets_14cores_tput_8, + _1sockets_14cores_tput_9, + _1sockets_14cores_tput_10, + _1sockets_14cores_tput_11, + _1sockets_14cores_tput_12, _1sockets_10cores_latency_1, _1sockets_10cores_latency_2, _1sockets_10cores_latency_3,