Create streams info table based on processor type table (#15571)

* enable streams info table based on CPU mapping * add detail processor info for mix stream * fix code style issue * fix typo * fix code style issue for Android build * update description of streams info table * move streams info related function to new file * remove duplicated definition * add description for parameters of get_streams_info_table() * update test case file * fix windows build issue * fix windows build issue * fix windows build issue * fix typo * update latency mode for hybrid platform * update limit threads for latency * update latency mode for 2 sockets platform
2023-03-06 06:06:41 +00:00
parent 0860db0dc3
commit e605a4c344
4 changed files with 772 additions and 0 deletions
--- a/src/inference/dev_api/threading/ie_cpu_streams_info.hpp
+++ b/src/inference/dev_api/threading/ie_cpu_streams_info.hpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @file ie_cpu_streams_info.hpp
+ * @brief A header file for Inference Engine CPU streams info table implementation.
+ */
+
+#pragma once
+
+namespace InferenceEngine {
+
+/**
+ * @enum       column_of_cpu_streams_info_table
+ * @brief      This enum contains definition of each columns in cpu streams information table.
+ *
+ * The following are two example of processor type table.
+ *  1. 8 streams on hybrid platform which has 4 threads per stream (TPS).
+ *
+ *  NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
+ *          2               1                4          // 2 streams (4 TPS) on physical core of Intel Performance-cores
+ *          4               2                4          // 4 streams (4 TPS) on Intel Efficient-cores
+ *          2               3                4          // 2 streams (4 TPS) on logic core of Intel Performance-cores
+ *
+ * 2. 1 stream (10 TPS) on hybrid platform which has 2 threads on physical core and 8 threads on Ecore.
+ *
+ *  NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
+ *          1               0               10          // 1 streams (10 TPS) on multiple types of processors
+ *          0               1                2          // 2 threads on physical core of Intel Performance-cores
+ *          0               2                8          // 8 threads on Intel Efficient-cores
+ */
+typedef enum {
+    NUMBER_OF_STREAMS = 0,      //!< Number of streams on specific CPU core tpye
+    PROC_TYPE = 1,              //!< Core type of current streams
+    THREADS_PER_STREAM = 2,     //!< Number of threads per stream of current streams
+    CPU_STREAMS_TABLE_SIZE = 3  //!< Size of streams info table
+} column_of_cpu_streams_info_table;
+
+}  // namespace InferenceEngine
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -0,0 +1,164 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cpu_streams_calculation.hpp"
+
+#include <algorithm>
+#include <cstdio>
+#include <iostream>
+#include <numeric>
+
+#include "ie_system_conf.h"
+#include "threading/ie_cpu_streams_info.hpp"
+
+using namespace InferenceEngine;
+
+namespace ov {
+namespace intel_cpu {
+
+std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
+                                                     const int input_threads,
+                                                     const int model_prefer_threads,
+                                                     const std::vector<std::vector<int>> proc_type_table) {
+    std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE);
+    std::vector<std::vector<int>> streams_info_table;
+
+    if (1 == input_streams) {
+        stream_info[NUMBER_OF_STREAMS] = 1;
+        int limit_threads = (input_threads == 0) ? model_prefer_threads : input_threads;
+        if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
+            ((limit_threads == 0) || (limit_threads > proc_type_table[0][MAIN_CORE_PROC]))) {
+            stream_info[PROC_TYPE] = ALL_PROC;
+            int n_threads = std::accumulate(proc_type_table[0].begin() + MAIN_CORE_PROC,
+                                            proc_type_table[0].begin() + HYPER_THREADING_PROC,
+                                            0);
+            stream_info[THREADS_PER_STREAM] = (limit_threads == 0) ? n_threads : std::min(n_threads, limit_threads);
+            streams_info_table.push_back(stream_info);
+            stream_info[NUMBER_OF_STREAMS] = 0;
+            n_threads = stream_info[THREADS_PER_STREAM];
+            for (int n = MAIN_CORE_PROC; n < HYPER_THREADING_PROC; n++) {
+                if (0 != proc_type_table[0][n]) {
+                    stream_info[PROC_TYPE] = n;
+                    if (n_threads <= proc_type_table[0][n]) {
+                        stream_info[THREADS_PER_STREAM] = n_threads;
+                        streams_info_table.push_back(stream_info);
+                        break;
+                    } else {
+                        stream_info[THREADS_PER_STREAM] = proc_type_table[0][n];
+                        streams_info_table.push_back(stream_info);
+                        n_threads -= proc_type_table[0][n];
+                    }
+                }
+            }
+        } else {
+            stream_info[PROC_TYPE] = MAIN_CORE_PROC;
+            stream_info[THREADS_PER_STREAM] = (limit_threads == 0)
+                                                  ? proc_type_table[0][MAIN_CORE_PROC]
+                                                  : std::min(proc_type_table[0][MAIN_CORE_PROC], limit_threads);
+            streams_info_table.push_back(stream_info);
+        }
+        return streams_info_table;
+
+    } else {
+        int n_streams = 0;
+        int n_threads = 0;
+        int n_threads_per_stream = 0;
+
+        if (proc_type_table.size() == 1) {
+            n_threads = (0 == input_threads) ? proc_type_table[0][ALL_PROC]
+                                             : std::min(proc_type_table[0][ALL_PROC], input_threads);
+        } else {
+            n_threads = (0 == input_threads) ? proc_type_table[0][MAIN_CORE_PROC]
+                                             : std::min(proc_type_table[0][MAIN_CORE_PROC], input_threads);
+        }
+
+        if (0 != input_streams) {
+            if (input_streams >= n_threads) {
+                n_streams = n_threads;
+                n_threads_per_stream = 1;
+            } else {
+                n_streams = input_streams;
+                n_threads_per_stream =
+                    std::min(std::max(1, n_threads / input_streams), proc_type_table[0][MAIN_CORE_PROC]);
+                if (proc_type_table.size() == 1) {
+                    if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) &&
+                        (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
+                        n_threads_per_stream = proc_type_table[0][MAIN_CORE_PROC];
+                    } else if (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC]) {
+                        n_threads_per_stream = int(
+                            proc_type_table[0][MAIN_CORE_PROC] /
+                            ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
+                    }
+                }
+            }
+        } else {
+            if (0 == model_prefer_threads) {
+                int n_proc = std::min(n_threads, proc_type_table[0][MAIN_CORE_PROC]);
+                if (0 == n_proc % 4) {
+                    n_threads_per_stream = 4;
+                } else if (0 == n_proc % 5) {
+                    n_threads_per_stream = 5;
+                } else if (0 == n_proc % 3) {
+                    n_threads_per_stream = 3;
+                } else if (proc_type_table.size() == 1) {
+                    n_threads_per_stream = n_proc;
+                } else {
+                    n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast<int>(n_proc / 4));
+                }
+                n_streams = static_cast<int>(n_threads / n_threads_per_stream);
+
+                while (n_streams < n_threads_per_stream) {
+                    if (1 == n_threads_per_stream) {
+                        break;
+                    } else {
+                        n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
+                        n_threads_per_stream = static_cast<int>(
+                            proc_type_table[0][MAIN_CORE_PROC] /
+                            ((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
+                        n_streams = static_cast<int>(n_threads / n_threads_per_stream);
+                    }
+                }
+            } else {
+                n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads);
+                n_threads_per_stream = static_cast<int>(n_threads / n_streams);
+            }
+        }
+
+        stream_info[THREADS_PER_STREAM] = n_threads_per_stream;
+
+        if (proc_type_table.size() == 1) {
+            while (1) {
+                for (int n = MAIN_CORE_PROC; n < PROC_TYPE_TABLE_SIZE; n++) {
+                    if (0 != proc_type_table[0][n]) {
+                        stream_info[PROC_TYPE] = n;
+                        stream_info[NUMBER_OF_STREAMS] =
+                            static_cast<int>(proc_type_table[0][n] / stream_info[THREADS_PER_STREAM]);
+                        if (n_streams <= stream_info[NUMBER_OF_STREAMS]) {
+                            stream_info[NUMBER_OF_STREAMS] = n_streams;
+                            streams_info_table.push_back(stream_info);
+                            return streams_info_table;
+                        } else {
+                            streams_info_table.push_back(stream_info);
+                            n_streams -= stream_info[NUMBER_OF_STREAMS];
+                        }
+                    }
+                }
+                if (1 == stream_info[THREADS_PER_STREAM]) {
+                    return streams_info_table;
+                } else {
+                    stream_info[THREADS_PER_STREAM] -= 1;
+                    std::vector<std::vector<int>>().swap(streams_info_table);
+                }
+            }
+        } else {
+            stream_info[NUMBER_OF_STREAMS] = n_streams;
+            stream_info[PROC_TYPE] = MAIN_CORE_PROC;
+            stream_info[THREADS_PER_STREAM] = n_threads_per_stream;
+            streams_info_table.push_back(stream_info);
+            return streams_info_table;
+        }
+    }
+}
+}  // namespace intel_cpu
+}  // namespace ov
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @file cpu_streams_calculation.hpp
+ * @brief A header file for CPU streams calulation implementation.
+ */
+
+#pragma once
+
+#include <vector>
+
+namespace ov {
+namespace intel_cpu {
+/**
+ * @brief      Generate streams information table according to processors type table
+ * @param[in]  input_streams is target streams set by user via NUM_STREAMS or hints.
+ *               - input "0" mean function generate the optimal number of streams
+ *               - LATENCY hint equals 1 stream.
+ * @param[in]  input_threads is max threads set by user via INFERNECE_NUM_THREADS.
+ *               - input "0" mean function can use all resource in proc_type_table
+ *               - When user limit max threads, streams in output cannot be more than max threads
+ * @param[in]  model_prefer_threads is preferred threads per stream based on model generated in previous function
+ *               - input "0" mean function generate the optimal threads per stream based on platform
+ * @param[in]  proc_type_table candidate processors available at this time
+ *               - candidate processors have benn updated based on properties like "Ecore only" in previous function
+ * @return     summary table of streams info will be used by StreamsExecutor
+ */
+std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
+                                                     const int input_threads,
+                                                     const int model_prefer_threads,
+                                                     const std::vector<std::vector<int>> proc_type_table);
+}  // namespace intel_cpu
+}  // namespace ov
--- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
@@ -0,0 +1,533 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <ie_system_conf.h>
+
+#include <common_test_utils/test_common.hpp>
+
+#include "cpu_streams_calculation.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+using namespace ov;
+
+namespace {
+
+struct StreamsCalculationTestCase {
+    int input_streams;
+    int input_threads;
+    int model_prefer_threads;
+    std::vector<std::vector<int>> proc_type_table;
+    std::vector<std::vector<int>> stream_info_table;
+};
+
+class StreamsCalculationTests : public CommonTestUtils::TestsCommon,
+                                public testing::WithParamInterface<std::tuple<StreamsCalculationTestCase>> {
+public:
+    void SetUp() override {
+        const auto& test_data = std::get<0>(GetParam());
+
+        std::vector<std::vector<int>> test_stream_info_table =
+            ov::intel_cpu::get_streams_info_table(test_data.input_streams,
+                                                  test_data.input_threads,
+                                                  test_data.model_prefer_threads,
+                                                  test_data.proc_type_table);
+
+        ASSERT_EQ(test_data.stream_info_table, test_stream_info_table);
+    }
+};
+
+StreamsCalculationTestCase _2sockets_104cores_latency_1 = {
+    1,
+    0,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{1, MAIN_CORE_PROC, 104}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_latency_2 = {
+    1,
+    20,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{1, MAIN_CORE_PROC, 20}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_1 = {
+    0,
+    0,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{26, MAIN_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
+    2,
+    0,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 52}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_3 = {
+    0,
+    20,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{5, MAIN_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_4 = {
+    2,
+    20,
+    0,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{2, MAIN_CORE_PROC, 10}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
+    0,
+    0,
+    1,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{104, MAIN_CORE_PROC, 1}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
+    0,
+    0,
+    2,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{52, MAIN_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
+    0,
+    0,
+    8,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{13, MAIN_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _2sockets_104cores_tput_8 = {
+    0,
+    40,
+    8,
+    {{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
+    {{5, MAIN_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _2sockets_48cores_latency_1 = {
+    1,
+    0,
+    0,
+    {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
+    {{1, MAIN_CORE_PROC, 48}},
+};
+
+StreamsCalculationTestCase _2sockets_48cores_tput_1 = {
+    0,
+    0,
+    0,
+    {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
+    {{12, MAIN_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _2sockets_48cores_tput_2 = {
+    100,
+    0,
+    0,
+    {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
+    {{48, MAIN_CORE_PROC, 1}},
+};
+
+StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
+    0,
+    100,
+    0,
+    {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
+    {{12, MAIN_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _2sockets_48cores_tput_4 = {
+    2,
+    20,
+    1,
+    {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
+    {{2, MAIN_CORE_PROC, 10}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_latency_1 = {
+    1,
+    0,
+    0,
+    {{20, 6, 8, 6}},
+    {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
+    1,
+    10,
+    0,
+    {{20, 6, 8, 6}},
+    {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
+    1,
+    0,
+    6,
+    {{20, 6, 8, 6}},
+    {{1, MAIN_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
+    1,
+    0,
+    14,
+    {{20, 6, 8, 6}},
+    {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_1 = {
+    0,
+    0,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_2 = {
+    2,
+    0,
+    0,
+    {{20, 6, 8, 6}},
+    {{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_3 = {
+    4,
+    0,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_4 = {
+    0,
+    12,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
+    0,
+    0,
+    1,
+    {{20, 6, 8, 6}},
+    {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_6 = {
+    0,
+    0,
+    2,
+    {{20, 6, 8, 6}},
+    {{3, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {3, HYPER_THREADING_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_7 = {
+    100,
+    0,
+    0,
+    {{20, 6, 8, 6}},
+    {{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
+};
+
+StreamsCalculationTestCase _1sockets_14cores_tput_8 = {
+    0,
+    100,
+    0,
+    {{20, 6, 8, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_latency_1 = {
+    1,
+    0,
+    0,
+    {{12, 2, 8, 2}},
+    {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
+    1,
+    8,
+    0,
+    {{12, 2, 8, 2}},
+    {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
+    1,
+    0,
+    2,
+    {{12, 2, 8, 2}},
+    {{1, MAIN_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
+    1,
+    0,
+    10,
+    {{12, 2, 8, 2}},
+    {{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_tput_1 = {
+    0,
+    0,
+    0,
+    {{12, 2, 8, 2}},
+    {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_tput_2 = {
+    2,
+    0,
+    0,
+    {{12, 2, 8, 2}},
+    {{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_tput_3 = {
+    4,
+    0,
+    0,
+    {{12, 2, 8, 2}},
+    {{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_tput_4 = {
+    0,
+    6,
+    0,
+    {{12, 2, 8, 2}},
+    {{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
+    0,
+    0,
+    1,
+    {{12, 2, 8, 2}},
+    {{2, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {2, HYPER_THREADING_PROC, 1}},
+};
+
+StreamsCalculationTestCase _1sockets_10cores_tput_6 = {
+    0,
+    0,
+    2,
+    {{12, 2, 8, 2}},
+    {{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_latency_1 = {
+    1,
+    0,
+    0,
+    {{12, 4, 4, 4}},
+    {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
+    1,
+    100,
+    0,
+    {{12, 4, 4, 4}},
+    {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
+    1,
+    0,
+    4,
+    {{12, 4, 4, 4}},
+    {{1, MAIN_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
+    1,
+    0,
+    8,
+    {{12, 4, 4, 4}},
+    {{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_1 = {
+    0,
+    0,
+    0,
+    {{12, 4, 4, 4}},
+    {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_2 = {
+    2,
+    0,
+    0,
+    {{12, 4, 4, 4}},
+    {{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_3 = {
+    4,
+    0,
+    0,
+    {{12, 4, 4, 4}},
+    {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_4 = {
+    6,
+    0,
+    0,
+    {{12, 4, 4, 4}},
+    {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_5 = {
+    0,
+    6,
+    0,
+    {{12, 4, 4, 4}},
+    {{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_6 = {
+    0,
+    8,
+    0,
+    {{12, 4, 4, 4}},
+    {{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_8cores_tput_7 = {
+    0,
+    0,
+    1,
+    {{12, 4, 4, 4}},
+    {{4, MAIN_CORE_PROC, 1}, {4, EFFICIENT_CORE_PROC, 1}, {4, HYPER_THREADING_PROC, 1}},
+};
+
+StreamsCalculationTestCase _1sockets_6cores_latency_1 = {
+    1,
+    0,
+    0,
+    {{12, 6, 0, 6}},
+    {{1, MAIN_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
+    1,
+    100,
+    0,
+    {{12, 6, 0, 6}},
+    {{1, MAIN_CORE_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_6cores_tput_1 = {
+    0,
+    0,
+    0,
+    {{12, 6, 0, 6}},
+    {{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
+};
+
+StreamsCalculationTestCase _1sockets_6cores_tput_2 = {
+    2,
+    0,
+    0,
+    {{12, 6, 0, 6}},
+    {{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}},
+};
+
+StreamsCalculationTestCase _1sockets_6cores_tput_3 = {
+    0,
+    8,
+    0,
+    {{12, 6, 0, 6}},
+    {{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
+};
+
+StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
+    0,
+    0,
+    1,
+    {{12, 6, 0, 6}},
+    {{6, MAIN_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
+};
+
+TEST_P(StreamsCalculationTests, StreamsCalculation) {}
+
+INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
+                         StreamsCalculationTests,
+                         testing::Values(_2sockets_104cores_latency_1,
+                                         _2sockets_104cores_latency_1,
+                                         _2sockets_104cores_tput_1,
+                                         _2sockets_104cores_tput_2,
+                                         _2sockets_104cores_tput_3,
+                                         _2sockets_104cores_tput_4,
+                                         _2sockets_104cores_tput_5,
+                                         _2sockets_104cores_tput_6,
+                                         _2sockets_104cores_tput_7,
+                                         _2sockets_104cores_tput_8,
+                                         _2sockets_48cores_latency_1,
+                                         _2sockets_48cores_tput_1,
+                                         _2sockets_48cores_tput_2,
+                                         _2sockets_48cores_tput_3,
+                                         _2sockets_48cores_tput_4,
+                                         _1sockets_14cores_latency_1,
+                                         _1sockets_14cores_latency_2,
+                                         _1sockets_14cores_latency_3,
+                                         _1sockets_14cores_latency_4,
+                                         _1sockets_14cores_tput_1,
+                                         _1sockets_14cores_tput_2,
+                                         _1sockets_14cores_tput_3,
+                                         _1sockets_14cores_tput_4,
+                                         _1sockets_14cores_tput_5,
+                                         _1sockets_14cores_tput_6,
+                                         _1sockets_14cores_tput_7,
+                                         _1sockets_14cores_tput_8,
+                                         _1sockets_10cores_latency_1,
+                                         _1sockets_10cores_latency_2,
+                                         _1sockets_10cores_latency_3,
+                                         _1sockets_10cores_latency_4,
+                                         _1sockets_10cores_tput_1,
+                                         _1sockets_10cores_tput_2,
+                                         _1sockets_10cores_tput_3,
+                                         _1sockets_10cores_tput_4,
+                                         _1sockets_10cores_tput_5,
+                                         _1sockets_10cores_tput_6,
+                                         _1sockets_8cores_latency_1,
+                                         _1sockets_8cores_latency_2,
+                                         _1sockets_8cores_latency_3,
+                                         _1sockets_8cores_latency_4,
+                                         _1sockets_8cores_tput_1,
+                                         _1sockets_8cores_tput_2,
+                                         _1sockets_8cores_tput_3,
+                                         _1sockets_8cores_tput_4,
+                                         _1sockets_8cores_tput_5,
+                                         _1sockets_8cores_tput_6,
+                                         _1sockets_8cores_tput_7,
+                                         _1sockets_6cores_latency_1,
+                                         _1sockets_6cores_latency_2,
+                                         _1sockets_6cores_tput_1,
+                                         _1sockets_6cores_tput_2,
+                                         _1sockets_6cores_tput_3,
+                                         _1sockets_6cores_tput_4));
+
+}  // namespace