From 613b66ba35a056215260427e6c3230af0cf82011 Mon Sep 17 00:00:00 2001
From: "Shen, Wanglei" <wanglei.shen@intel.com>
Date: Fri, 24 Mar 2023 15:27:13 +0800
Subject: [PATCH] include nireq during streams calculation (#16378)

* include nireq during streams calculation

* update description for comments

* update description
---
 .../intel_cpu/src/cpu_streams_calculation.cpp |  38 +++--
 .../intel_cpu/src/cpu_streams_calculation.hpp |  33 ++--
 .../tests/unit/streams_info_table_test.cpp    | 158 +++++++++++++++++-
 3 files changed, 201 insertions(+), 28 deletions(-)
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
index b5a22160228..ea5111015d9 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -19,6 +19,7 @@ namespace intel_cpu {
 
 std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int input_threads,
+                                                     const int input_infer_requests,
                                                      const int model_prefer_threads,
                                                      const std::vector<std::vector<int>> proc_type_table) {
     std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE);
@@ -74,13 +75,12 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
         }
 
         if (0 != input_streams) {
-            if (input_streams >= n_threads) {
+            n_streams = (input_infer_requests > 0) ? std::min(input_streams, input_infer_requests) : input_streams;
+            if (n_streams >= n_threads) {
                 n_streams = n_threads;
                 n_threads_per_stream = 1;
             } else {
-                n_streams = input_streams;
-                n_threads_per_stream =
-                    std::min(std::max(1, n_threads / input_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                n_threads_per_stream = std::min(std::max(1, n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
                 if (proc_type_table.size() == 1) {
                     if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) &&
                         (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
@@ -107,21 +107,29 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                     n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast<int>(n_proc / 4));
                 }
                 n_streams = static_cast<int>(n_threads / n_threads_per_stream);
-
-                while (n_streams < n_threads_per_stream) {
-                    if (1 == n_threads_per_stream) {
-                        break;
-                    } else {
-                        n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
-                        n_threads_per_stream = static_cast<int>(
-                            proc_type_table[0][MAIN_CORE_PROC] /
-                            ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
-                        n_streams = static_cast<int>(n_threads / n_threads_per_stream);
+                if ((input_infer_requests > 0) && (n_streams > input_infer_requests)) {
+                    n_streams = input_infer_requests;
+                    n_threads_per_stream =
+                        std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                } else {
+                    while (n_streams < n_threads_per_stream) {
+                        if (1 == n_threads_per_stream) {
+                            break;
+                        } else {
+                            n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
+                            n_threads_per_stream =
+                                static_cast<int>(proc_type_table[0][MAIN_CORE_PROC] /
+                                                 ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) /
+                                                  n_threads_per_stream));
+                            n_streams = static_cast<int>(n_threads / n_threads_per_stream);
+                        }
                     }
                 }
             } else {
                 n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads);
-                n_threads_per_stream = static_cast<int>(n_threads / n_streams);
+                n_streams = (input_infer_requests > 0) ? std::min(n_streams, input_infer_requests) : n_streams;
+                n_threads_per_stream =
+                    std::min(static_cast<int>(n_threads / n_streams), proc_type_table[0][MAIN_CORE_PROC]);
             }
         }
 
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
index c43388242ab..53614942bf8 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -14,21 +14,30 @@
 namespace ov {
 namespace intel_cpu {
 /**
- * @brief      Generate streams information table according to processors type table
- * @param[in]  input_streams is target streams set by user via NUM_STREAMS or hints.
- *               - input "0" mean function generate the optimal number of streams
- *               - LATENCY hint equals 1 stream.
- * @param[in]  input_threads is max threads set by user via INFERNECE_NUM_THREADS.
- *               - input "0" mean function can use all resource in proc_type_table
- *               - When user limit max threads, streams in output cannot be more than max threads
- * @param[in]  model_prefer_threads is preferred threads per stream based on model generated in previous function
- *               - input "0" mean function generate the optimal threads per stream based on platform
- * @param[in]  proc_type_table candidate processors available at this time
- *               - candidate processors have benn updated based on properties like "Ecore only" in previous function
- * @return     summary table of streams info will be used by StreamsExecutor
+ * @brief      Generate streams information table according to processors type table.
+ * @param[in]  input_streams is the targeted number of streams set by user via ov::num_streams or hints.
+ *               - input "0" indicates the optimal number of streams generated by the function.
+ *               - When user sets LATENCY hint, OpenVINO runtime generate one stream per CPU node.
+ * @param[in]  input_threads is the max number of threads set by user via ov::inference_num_threads.
+ *               - input "0" indicates that the function can use all resource in proc_type_table.
+ *               - If user limits the max number of threads, the final number of streams output cannot exceed the max
+ * number of threads.
+ * @param[in]  input_infer_requests is max number of infer requests set by user via ov::hint::num_requests.
+ *               - input "0" indicates that the function can use all resource in proc_type_table.
+ *               - If user limits the max number of infer requests, the final number of streams output cannot exceed the
+ * max number of infer requests.
+ * @param[in]  model_prefer_threads is preferred number of threads per stream based on the model generated in previous
+ * function.
+ *               - input "0" indicates that the function generates the optimal number of threads per stream based on
+ * processors type information.
+ * @param[in]  proc_type_table is currently available candidate processors.
+ *               - candidate processors have benn updated based on user input hints like ov::hint::scheduling_core_type
+ * in previous function.
+ * @return     streams information table which will be used by StreamsExecutor.
  */
 std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                                                      const int input_threads,
+                                                     const int input_infer_requests,
                                                      const int model_prefer_threads,
                                                      const std::vector<std::vector<int>> proc_type_table);
 }  // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
index 2fc0c36712a..696c6508c47 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
@@ -18,6 +18,7 @@ namespace {
 struct StreamsCalculationTestCase {
     int input_streams;
     int input_threads;
+    int input_infer_requests;
     int model_prefer_threads;
     std::vector<std::vector<int>> proc_type_table;
     std::vector<std::vector<int>> stream_info_table;
@@ -32,6 +33,7 @@ public:
         std::vector<std::vector<int>> test_stream_info_table =
             ov::intel_cpu::get_streams_info_table(test_data.input_streams,
                                                   test_data.input_threads,
+                                                  test_data.input_infer_requests,
                                                   test_data.model_prefer_threads,
                                                   test_data.proc_type_table);
 
@@ -43,6 +45,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{1, MAIN_CORE_PROC, 104}},
 };
@@ -51,6 +54,16 @@ StreamsCalculationTestCase _2sockets_104cores_latency_2 = {
     1,
     20,
     0,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{1, MAIN_CORE_PROC, 20}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_latency_3 = {
+    1,
+    20,
+    5,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{1, MAIN_CORE_PROC, 20}},
 };
@@ -59,6 +72,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{26, MAIN_CORE_PROC, 4}},
 };
@@ -67,6 +81,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{2, MAIN_CORE_PROC, 52}},
 };
@@ -75,6 +90,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_3 = {
     0,
     20,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{5, MAIN_CORE_PROC, 4}},
 };
@@ -83,11 +99,13 @@ StreamsCalculationTestCase _2sockets_104cores_tput_4 = {
     2,
     20,
     0,
+    0,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{2, MAIN_CORE_PROC, 10}},
 };
 
 StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
+    0,
     0,
     0,
     1,
@@ -96,6 +114,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
 };
 
 StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
+    0,
     0,
     0,
     2,
@@ -104,6 +123,7 @@ StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
 };
 
 StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
+    0,
     0,
     0,
     8,
@@ -114,15 +134,53 @@ StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
 StreamsCalculationTestCase _2sockets_104cores_tput_8 = {
     0,
     40,
+    0,
     8,
     {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
     {{5, MAIN_CORE_PROC, 8}},
 };
 
+StreamsCalculationTestCase _2sockets_104cores_tput_9 = {
+    5,
+    20,
+    2,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 10}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_10 = {
+    0,
+    0,
+    2,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_11 = {
+    2,
+    0,
+    5,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_12 = {
+    0,
+    0,
+    2,
+    2,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
 StreamsCalculationTestCase _2sockets_48cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{1, MAIN_CORE_PROC, 48}},
 };
@@ -131,6 +189,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{12, MAIN_CORE_PROC, 4}},
 };
@@ -139,6 +198,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_2 = {
     100,
     0,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{48, MAIN_CORE_PROC, 1}},
 };
@@ -147,6 +207,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
     0,
     100,
     0,
+    0,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{12, MAIN_CORE_PROC, 4}},
 };
@@ -154,6 +215,7 @@ StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
 StreamsCalculationTestCase _2sockets_48cores_tput_4 = {
     2,
     20,
+    0,
     1,
     {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
     {{2, MAIN_CORE_PROC, 10}},
@@ -163,6 +225,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
 };
@@ -171,6 +234,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
     1,
     10,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}},
 };
@@ -178,6 +242,7 @@ StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
 StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
     1,
     0,
+    0,
     6,
     {{20, 6, 8, 6}},
     {{1, MAIN_CORE_PROC, 6}},
@@ -186,6 +251,16 @@ StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
 StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
     1,
     0,
+    0,
+    14,
+    {{20, 6, 8, 6}},
+    {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_latency_5 = {
+    1,
+    0,
+    2,
     14,
     {{20, 6, 8, 6}},
     {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
@@ -195,6 +270,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
 };
@@ -203,6 +279,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
 };
@@ -211,6 +288,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_3 = {
     4,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
 };
@@ -219,11 +297,13 @@ StreamsCalculationTestCase _1sockets_14cores_tput_4 = {
     0,
     12,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
 };
 
 StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
+    0,
     0,
     0,
     1,
@@ -232,6 +312,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
 };
 
 StreamsCalculationTestCase _1sockets_14cores_tput_6 = {
+    0,
     0,
     0,
     2,
@@ -243,6 +324,7 @@ StreamsCalculationTestCase _1sockets_14cores_tput_7 = {
     100,
     0,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
 };
@@ -251,14 +333,52 @@ StreamsCalculationTestCase _1sockets_14cores_tput_8 = {
     0,
     100,
     0,
+    0,
     {{20, 6, 8, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
 };
 
+StreamsCalculationTestCase _1sockets_14cores_tput_9 = {
+    4,
+    0,
+    8,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_10 = {
+    6,
+    0,
+    4,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_11 = {
+    0,
+    0,
+    2,
+    0,
+    {{20, 6, 8, 6}},
+    {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_12 = {
+    0,
+    0,
+    2,
+    2,
+    {{20, 6, 8, 6}},
+    {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
+};
+
 StreamsCalculationTestCase _1sockets_10cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
 };
@@ -267,6 +387,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
     1,
     8,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}},
 };
@@ -274,6 +395,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
 StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
     1,
     0,
+    0,
     2,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}},
@@ -282,6 +404,7 @@ StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
 StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
     1,
     0,
+    0,
     10,
     {{12, 2, 8, 2}},
     {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
@@ -291,6 +414,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
 };
@@ -299,6 +423,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
 };
@@ -307,6 +432,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_3 = {
     4,
     0,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}},
 };
@@ -315,11 +441,13 @@ StreamsCalculationTestCase _1sockets_10cores_tput_4 = {
     0,
     6,
     0,
+    0,
     {{12, 2, 8, 2}},
     {{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
 };
 
 StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
+    0,
     0,
     0,
     1,
@@ -328,6 +456,7 @@ StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
 };
 
 StreamsCalculationTestCase _1sockets_10cores_tput_6 = {
+    0,
     0,
     0,
     2,
@@ -339,6 +468,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
 };
@@ -347,6 +477,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
     1,
     100,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
 };
@@ -354,6 +485,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
 StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
     1,
     0,
+    0,
     4,
     {{12, 4, 4, 4}},
     {{1, MAIN_CORE_PROC, 4}},
@@ -362,6 +494,7 @@ StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
 StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
     1,
     0,
+    0,
     8,
     {{12, 4, 4, 4}},
     {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
@@ -371,6 +504,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
 };
@@ -379,6 +513,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}},
 };
@@ -387,6 +522,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_3 = {
     4,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
 };
@@ -395,6 +531,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_4 = {
     6,
     0,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
 };
@@ -403,6 +540,7 @@ StreamsCalculationTestCase _1sockets_8cores_tput_5 = {
     0,
     6,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
 };
@@ -411,11 +549,13 @@ StreamsCalculationTestCase _1sockets_8cores_tput_6 = {
     0,
     8,
     0,
+    0,
     {{12, 4, 4, 4}},
     {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
 };
 
 StreamsCalculationTestCase _1sockets_8cores_tput_7 = {
+    0,
     0,
     0,
     1,
@@ -427,6 +567,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_1 = {
     1,
     0,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{1, MAIN_CORE_PROC, 6}},
 };
@@ -435,6 +576,7 @@ StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
     1,
     100,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{1, MAIN_CORE_PROC, 6}},
 };
@@ -443,6 +585,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_1 = {
     0,
     0,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
 };
@@ -451,6 +594,7 @@ StreamsCalculationTestCase _1sockets_6cores_tput_2 = {
     2,
     0,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}},
 };
@@ -459,11 +603,13 @@ StreamsCalculationTestCase _1sockets_6cores_tput_3 = {
     0,
     8,
     0,
+    0,
     {{12, 6, 0, 6}},
     {{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
 };
 
 StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
+    0,
     0,
     0,
     1,
@@ -476,7 +622,8 @@ TEST_P(StreamsCalculationTests, StreamsCalculation) {}
 INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                          StreamsCalculationTests,
                          testing::Values(_2sockets_104cores_latency_1,
-                                         _2sockets_104cores_latency_1,
+                                         _2sockets_104cores_latency_2,
+                                         _2sockets_104cores_latency_3,
                                          _2sockets_104cores_tput_1,
                                          _2sockets_104cores_tput_2,
                                          _2sockets_104cores_tput_3,
@@ -485,6 +632,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _2sockets_104cores_tput_6,
                                          _2sockets_104cores_tput_7,
                                          _2sockets_104cores_tput_8,
+                                         _2sockets_104cores_tput_9,
+                                         _2sockets_104cores_tput_10,
+                                         _2sockets_104cores_tput_11,
+                                         _2sockets_104cores_tput_12,
                                          _2sockets_48cores_latency_1,
                                          _2sockets_48cores_tput_1,
                                          _2sockets_48cores_tput_2,
@@ -494,6 +645,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_14cores_latency_2,
                                          _1sockets_14cores_latency_3,
                                          _1sockets_14cores_latency_4,
+                                         _1sockets_14cores_latency_5,
                                          _1sockets_14cores_tput_1,
                                          _1sockets_14cores_tput_2,
                                          _1sockets_14cores_tput_3,
@@ -502,6 +654,10 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_14cores_tput_6,
                                          _1sockets_14cores_tput_7,
                                          _1sockets_14cores_tput_8,
+                                         _1sockets_14cores_tput_9,
+                                         _1sockets_14cores_tput_10,
+                                         _1sockets_14cores_tput_11,
+                                         _1sockets_14cores_tput_12,
                                          _1sockets_10cores_latency_1,
                                          _1sockets_10cores_latency_2,
                                          _1sockets_10cores_latency_3,