Create streams info table based on processor type table (#15571)
* enable streams info table based on CPU mapping * add detail processor info for mix stream * fix code style issue * fix typo * fix code style issue for Android build * update description of streams info table * move streams info related function to new file * remove duplicated definition * add description for parameters of get_streams_info_table() * update test case file * fix windows build issue * fix windows build issue * fix windows build issue * fix typo * update latency mode for hybrid platform * update limit threads for latency * update latency mode for 2 sockets platform
This commit is contained in:
40
src/inference/dev_api/threading/ie_cpu_streams_info.hpp
Normal file
40
src/inference/dev_api/threading/ie_cpu_streams_info.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @file ie_cpu_streams_info.hpp
|
||||
* @brief A header file for Inference Engine CPU streams info table implementation.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
* @enum column_of_cpu_streams_info_table
|
||||
* @brief This enum contains definition of each columns in cpu streams information table.
|
||||
*
|
||||
* The following are two example of processor type table.
|
||||
* 1. 8 streams on hybrid platform which has 4 threads per stream (TPS).
|
||||
*
|
||||
* NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
|
||||
* 2 1 4 // 2 streams (4 TPS) on physical core of Intel Performance-cores
|
||||
* 4 2 4 // 4 streams (4 TPS) on Intel Efficient-cores
|
||||
* 2 3 4 // 2 streams (4 TPS) on logic core of Intel Performance-cores
|
||||
*
|
||||
* 2. 1 stream (10 TPS) on hybrid platform which has 2 threads on physical core and 8 threads on Ecore.
|
||||
*
|
||||
* NUMBER_OF_STREAMS | PROC_TYPE | THREADS_PER_STREAM
|
||||
* 1 0 10 // 1 streams (10 TPS) on multiple types of processors
|
||||
* 0 1 2 // 2 threads on physical core of Intel Performance-cores
|
||||
* 0 2 8 // 8 threads on Intel Efficient-cores
|
||||
*/
|
||||
typedef enum {
|
||||
NUMBER_OF_STREAMS = 0, //!< Number of streams on specific CPU core tpye
|
||||
PROC_TYPE = 1, //!< Core type of current streams
|
||||
THREADS_PER_STREAM = 2, //!< Number of threads per stream of current streams
|
||||
CPU_STREAMS_TABLE_SIZE = 3 //!< Size of streams info table
|
||||
} column_of_cpu_streams_info_table;
|
||||
|
||||
} // namespace InferenceEngine
|
||||
164
src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
Normal file
164
src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
Normal file
@@ -0,0 +1,164 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cpu_streams_calculation.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
#include "threading/ie_cpu_streams_info.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
|
||||
const int input_threads,
|
||||
const int model_prefer_threads,
|
||||
const std::vector<std::vector<int>> proc_type_table) {
|
||||
std::vector<int> stream_info(CPU_STREAMS_TABLE_SIZE);
|
||||
std::vector<std::vector<int>> streams_info_table;
|
||||
|
||||
if (1 == input_streams) {
|
||||
stream_info[NUMBER_OF_STREAMS] = 1;
|
||||
int limit_threads = (input_threads == 0) ? model_prefer_threads : input_threads;
|
||||
if ((proc_type_table[0][EFFICIENT_CORE_PROC] > 0) &&
|
||||
((limit_threads == 0) || (limit_threads > proc_type_table[0][MAIN_CORE_PROC]))) {
|
||||
stream_info[PROC_TYPE] = ALL_PROC;
|
||||
int n_threads = std::accumulate(proc_type_table[0].begin() + MAIN_CORE_PROC,
|
||||
proc_type_table[0].begin() + HYPER_THREADING_PROC,
|
||||
0);
|
||||
stream_info[THREADS_PER_STREAM] = (limit_threads == 0) ? n_threads : std::min(n_threads, limit_threads);
|
||||
streams_info_table.push_back(stream_info);
|
||||
stream_info[NUMBER_OF_STREAMS] = 0;
|
||||
n_threads = stream_info[THREADS_PER_STREAM];
|
||||
for (int n = MAIN_CORE_PROC; n < HYPER_THREADING_PROC; n++) {
|
||||
if (0 != proc_type_table[0][n]) {
|
||||
stream_info[PROC_TYPE] = n;
|
||||
if (n_threads <= proc_type_table[0][n]) {
|
||||
stream_info[THREADS_PER_STREAM] = n_threads;
|
||||
streams_info_table.push_back(stream_info);
|
||||
break;
|
||||
} else {
|
||||
stream_info[THREADS_PER_STREAM] = proc_type_table[0][n];
|
||||
streams_info_table.push_back(stream_info);
|
||||
n_threads -= proc_type_table[0][n];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
stream_info[PROC_TYPE] = MAIN_CORE_PROC;
|
||||
stream_info[THREADS_PER_STREAM] = (limit_threads == 0)
|
||||
? proc_type_table[0][MAIN_CORE_PROC]
|
||||
: std::min(proc_type_table[0][MAIN_CORE_PROC], limit_threads);
|
||||
streams_info_table.push_back(stream_info);
|
||||
}
|
||||
return streams_info_table;
|
||||
|
||||
} else {
|
||||
int n_streams = 0;
|
||||
int n_threads = 0;
|
||||
int n_threads_per_stream = 0;
|
||||
|
||||
if (proc_type_table.size() == 1) {
|
||||
n_threads = (0 == input_threads) ? proc_type_table[0][ALL_PROC]
|
||||
: std::min(proc_type_table[0][ALL_PROC], input_threads);
|
||||
} else {
|
||||
n_threads = (0 == input_threads) ? proc_type_table[0][MAIN_CORE_PROC]
|
||||
: std::min(proc_type_table[0][MAIN_CORE_PROC], input_threads);
|
||||
}
|
||||
|
||||
if (0 != input_streams) {
|
||||
if (input_streams >= n_threads) {
|
||||
n_streams = n_threads;
|
||||
n_threads_per_stream = 1;
|
||||
} else {
|
||||
n_streams = input_streams;
|
||||
n_threads_per_stream =
|
||||
std::min(std::max(1, n_threads / input_streams), proc_type_table[0][MAIN_CORE_PROC]);
|
||||
if (proc_type_table.size() == 1) {
|
||||
if ((n_threads_per_stream > proc_type_table[0][MAIN_CORE_PROC]) &&
|
||||
(n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC] * 2)) {
|
||||
n_threads_per_stream = proc_type_table[0][MAIN_CORE_PROC];
|
||||
} else if (n_threads_per_stream < proc_type_table[0][MAIN_CORE_PROC]) {
|
||||
n_threads_per_stream = int(
|
||||
proc_type_table[0][MAIN_CORE_PROC] /
|
||||
((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (0 == model_prefer_threads) {
|
||||
int n_proc = std::min(n_threads, proc_type_table[0][MAIN_CORE_PROC]);
|
||||
if (0 == n_proc % 4) {
|
||||
n_threads_per_stream = 4;
|
||||
} else if (0 == n_proc % 5) {
|
||||
n_threads_per_stream = 5;
|
||||
} else if (0 == n_proc % 3) {
|
||||
n_threads_per_stream = 3;
|
||||
} else if (proc_type_table.size() == 1) {
|
||||
n_threads_per_stream = n_proc;
|
||||
} else {
|
||||
n_threads_per_stream = (n_proc > 16) ? 4 : std::max(1, static_cast<int>(n_proc / 4));
|
||||
}
|
||||
n_streams = static_cast<int>(n_threads / n_threads_per_stream);
|
||||
|
||||
while (n_streams < n_threads_per_stream) {
|
||||
if (1 == n_threads_per_stream) {
|
||||
break;
|
||||
} else {
|
||||
n_threads_per_stream = static_cast<int>((n_threads_per_stream * 2 - 1) / 2);
|
||||
n_threads_per_stream = static_cast<int>(
|
||||
proc_type_table[0][MAIN_CORE_PROC] /
|
||||
((proc_type_table[0][MAIN_CORE_PROC] + n_threads_per_stream - 1) / n_threads_per_stream));
|
||||
n_streams = static_cast<int>(n_threads / n_threads_per_stream);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
n_streams = ((n_threads + model_prefer_threads - 1) / model_prefer_threads);
|
||||
n_threads_per_stream = static_cast<int>(n_threads / n_streams);
|
||||
}
|
||||
}
|
||||
|
||||
stream_info[THREADS_PER_STREAM] = n_threads_per_stream;
|
||||
|
||||
if (proc_type_table.size() == 1) {
|
||||
while (1) {
|
||||
for (int n = MAIN_CORE_PROC; n < PROC_TYPE_TABLE_SIZE; n++) {
|
||||
if (0 != proc_type_table[0][n]) {
|
||||
stream_info[PROC_TYPE] = n;
|
||||
stream_info[NUMBER_OF_STREAMS] =
|
||||
static_cast<int>(proc_type_table[0][n] / stream_info[THREADS_PER_STREAM]);
|
||||
if (n_streams <= stream_info[NUMBER_OF_STREAMS]) {
|
||||
stream_info[NUMBER_OF_STREAMS] = n_streams;
|
||||
streams_info_table.push_back(stream_info);
|
||||
return streams_info_table;
|
||||
} else {
|
||||
streams_info_table.push_back(stream_info);
|
||||
n_streams -= stream_info[NUMBER_OF_STREAMS];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (1 == stream_info[THREADS_PER_STREAM]) {
|
||||
return streams_info_table;
|
||||
} else {
|
||||
stream_info[THREADS_PER_STREAM] -= 1;
|
||||
std::vector<std::vector<int>>().swap(streams_info_table);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
stream_info[NUMBER_OF_STREAMS] = n_streams;
|
||||
stream_info[PROC_TYPE] = MAIN_CORE_PROC;
|
||||
stream_info[THREADS_PER_STREAM] = n_threads_per_stream;
|
||||
streams_info_table.push_back(stream_info);
|
||||
return streams_info_table;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
35
src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
Normal file
35
src/plugins/intel_cpu/src/cpu_streams_calculation.hpp
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @file cpu_streams_calculation.hpp
|
||||
* @brief A header file for CPU streams calulation implementation.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
/**
|
||||
* @brief Generate streams information table according to processors type table
|
||||
* @param[in] input_streams is target streams set by user via NUM_STREAMS or hints.
|
||||
* - input "0" mean function generate the optimal number of streams
|
||||
* - LATENCY hint equals 1 stream.
|
||||
* @param[in] input_threads is max threads set by user via INFERNECE_NUM_THREADS.
|
||||
* - input "0" mean function can use all resource in proc_type_table
|
||||
* - When user limit max threads, streams in output cannot be more than max threads
|
||||
* @param[in] model_prefer_threads is preferred threads per stream based on model generated in previous function
|
||||
* - input "0" mean function generate the optimal threads per stream based on platform
|
||||
* @param[in] proc_type_table candidate processors available at this time
|
||||
* - candidate processors have benn updated based on properties like "Ecore only" in previous function
|
||||
* @return summary table of streams info will be used by StreamsExecutor
|
||||
*/
|
||||
std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
|
||||
const int input_threads,
|
||||
const int model_prefer_threads,
|
||||
const std::vector<std::vector<int>> proc_type_table);
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
533
src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
Normal file
533
src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp
Normal file
@@ -0,0 +1,533 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <ie_system_conf.h>
|
||||
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
|
||||
#include "cpu_streams_calculation.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov;
|
||||
|
||||
namespace {
|
||||
|
||||
struct StreamsCalculationTestCase {
|
||||
int input_streams;
|
||||
int input_threads;
|
||||
int model_prefer_threads;
|
||||
std::vector<std::vector<int>> proc_type_table;
|
||||
std::vector<std::vector<int>> stream_info_table;
|
||||
};
|
||||
|
||||
class StreamsCalculationTests : public CommonTestUtils::TestsCommon,
|
||||
public testing::WithParamInterface<std::tuple<StreamsCalculationTestCase>> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const auto& test_data = std::get<0>(GetParam());
|
||||
|
||||
std::vector<std::vector<int>> test_stream_info_table =
|
||||
ov::intel_cpu::get_streams_info_table(test_data.input_streams,
|
||||
test_data.input_threads,
|
||||
test_data.model_prefer_threads,
|
||||
test_data.proc_type_table);
|
||||
|
||||
ASSERT_EQ(test_data.stream_info_table, test_stream_info_table);
|
||||
}
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_latency_1 = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{1, MAIN_CORE_PROC, 104}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_latency_2 = {
|
||||
1,
|
||||
20,
|
||||
0,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{1, MAIN_CORE_PROC, 20}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_1 = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{26, MAIN_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_2 = {
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{2, MAIN_CORE_PROC, 52}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_3 = {
|
||||
0,
|
||||
20,
|
||||
0,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{5, MAIN_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_4 = {
|
||||
2,
|
||||
20,
|
||||
0,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{2, MAIN_CORE_PROC, 10}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_5 = {
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{104, MAIN_CORE_PROC, 1}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_6 = {
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{52, MAIN_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_7 = {
|
||||
0,
|
||||
0,
|
||||
8,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{13, MAIN_CORE_PROC, 8}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_104cores_tput_8 = {
|
||||
0,
|
||||
40,
|
||||
8,
|
||||
{{208, 104, 0, 104}, {104, 52, 0, 52}, {104, 52, 0, 52}},
|
||||
{{5, MAIN_CORE_PROC, 8}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_48cores_latency_1 = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
|
||||
{{1, MAIN_CORE_PROC, 48}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_48cores_tput_1 = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
|
||||
{{12, MAIN_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_48cores_tput_2 = {
|
||||
100,
|
||||
0,
|
||||
0,
|
||||
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
|
||||
{{48, MAIN_CORE_PROC, 1}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_48cores_tput_3 = {
|
||||
0,
|
||||
100,
|
||||
0,
|
||||
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
|
||||
{{12, MAIN_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _2sockets_48cores_tput_4 = {
|
||||
2,
|
||||
20,
|
||||
1,
|
||||
{{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}},
|
||||
{{2, MAIN_CORE_PROC, 10}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_latency_1 = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_latency_2 = {
|
||||
1,
|
||||
10,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_latency_3 = {
|
||||
1,
|
||||
0,
|
||||
6,
|
||||
{{20, 6, 8, 6}},
|
||||
{{1, MAIN_CORE_PROC, 6}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_latency_4 = {
|
||||
1,
|
||||
0,
|
||||
14,
|
||||
{{20, 6, 8, 6}},
|
||||
{{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_1 = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_2 = {
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{1, MAIN_CORE_PROC, 6}, {1, EFFICIENT_CORE_PROC, 6}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_3 = {
|
||||
4,
|
||||
0,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_4 = {
|
||||
0,
|
||||
12,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_5 = {
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
{{20, 6, 8, 6}},
|
||||
{{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_6 = {
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
{{20, 6, 8, 6}},
|
||||
{{3, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {3, HYPER_THREADING_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_7 = {
|
||||
100,
|
||||
0,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{6, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_14cores_tput_8 = {
|
||||
0,
|
||||
100,
|
||||
0,
|
||||
{{20, 6, 8, 6}},
|
||||
{{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_latency_1 = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_latency_2 = {
|
||||
1,
|
||||
8,
|
||||
0,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 6}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_latency_3 = {
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, MAIN_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_latency_4 = {
|
||||
1,
|
||||
0,
|
||||
10,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, ALL_PROC, 10}, {0, MAIN_CORE_PROC, 2}, {0, EFFICIENT_CORE_PROC, 8}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_tput_1 = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_tput_2 = {
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_tput_3 = {
|
||||
4,
|
||||
0,
|
||||
0,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, MAIN_CORE_PROC, 2}, {3, EFFICIENT_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_tput_4 = {
|
||||
0,
|
||||
6,
|
||||
0,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_tput_5 = {
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
{{12, 2, 8, 2}},
|
||||
{{2, MAIN_CORE_PROC, 1}, {8, EFFICIENT_CORE_PROC, 1}, {2, HYPER_THREADING_PROC, 1}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_10cores_tput_6 = {
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
{{12, 2, 8, 2}},
|
||||
{{1, MAIN_CORE_PROC, 2}, {4, EFFICIENT_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_latency_1 = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_latency_2 = {
|
||||
1,
|
||||
100,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_latency_3 = {
|
||||
1,
|
||||
0,
|
||||
4,
|
||||
{{12, 4, 4, 4}},
|
||||
{{1, MAIN_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_latency_4 = {
|
||||
1,
|
||||
0,
|
||||
8,
|
||||
{{12, 4, 4, 4}},
|
||||
{{1, ALL_PROC, 8}, {0, MAIN_CORE_PROC, 4}, {0, EFFICIENT_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_1 = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_2 = {
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{1, MAIN_CORE_PROC, 4}, {1, EFFICIENT_CORE_PROC, 4}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_3 = {
|
||||
4,
|
||||
0,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_4 = {
|
||||
6,
|
||||
0,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}, {2, HYPER_THREADING_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_5 = {
|
||||
0,
|
||||
6,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{2, MAIN_CORE_PROC, 2}, {1, EFFICIENT_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_6 = {
|
||||
0,
|
||||
8,
|
||||
0,
|
||||
{{12, 4, 4, 4}},
|
||||
{{2, MAIN_CORE_PROC, 2}, {2, EFFICIENT_CORE_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_8cores_tput_7 = {
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
{{12, 4, 4, 4}},
|
||||
{{4, MAIN_CORE_PROC, 1}, {4, EFFICIENT_CORE_PROC, 1}, {4, HYPER_THREADING_PROC, 1}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_6cores_latency_1 = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
{{12, 6, 0, 6}},
|
||||
{{1, MAIN_CORE_PROC, 6}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_6cores_latency_2 = {
|
||||
1,
|
||||
100,
|
||||
0,
|
||||
{{12, 6, 0, 6}},
|
||||
{{1, MAIN_CORE_PROC, 6}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_6cores_tput_1 = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
{{12, 6, 0, 6}},
|
||||
{{2, MAIN_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_6cores_tput_2 = {
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
{{12, 6, 0, 6}},
|
||||
{{1, MAIN_CORE_PROC, 6}, {1, HYPER_THREADING_PROC, 6}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_6cores_tput_3 = {
|
||||
0,
|
||||
8,
|
||||
0,
|
||||
{{12, 6, 0, 6}},
|
||||
{{3, MAIN_CORE_PROC, 2}, {1, HYPER_THREADING_PROC, 2}},
|
||||
};
|
||||
|
||||
StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
{{12, 6, 0, 6}},
|
||||
{{6, MAIN_CORE_PROC, 1}, {6, HYPER_THREADING_PROC, 1}},
|
||||
};
|
||||
|
||||
TEST_P(StreamsCalculationTests, StreamsCalculation) {}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
|
||||
StreamsCalculationTests,
|
||||
testing::Values(_2sockets_104cores_latency_1,
|
||||
_2sockets_104cores_latency_1,
|
||||
_2sockets_104cores_tput_1,
|
||||
_2sockets_104cores_tput_2,
|
||||
_2sockets_104cores_tput_3,
|
||||
_2sockets_104cores_tput_4,
|
||||
_2sockets_104cores_tput_5,
|
||||
_2sockets_104cores_tput_6,
|
||||
_2sockets_104cores_tput_7,
|
||||
_2sockets_104cores_tput_8,
|
||||
_2sockets_48cores_latency_1,
|
||||
_2sockets_48cores_tput_1,
|
||||
_2sockets_48cores_tput_2,
|
||||
_2sockets_48cores_tput_3,
|
||||
_2sockets_48cores_tput_4,
|
||||
_1sockets_14cores_latency_1,
|
||||
_1sockets_14cores_latency_2,
|
||||
_1sockets_14cores_latency_3,
|
||||
_1sockets_14cores_latency_4,
|
||||
_1sockets_14cores_tput_1,
|
||||
_1sockets_14cores_tput_2,
|
||||
_1sockets_14cores_tput_3,
|
||||
_1sockets_14cores_tput_4,
|
||||
_1sockets_14cores_tput_5,
|
||||
_1sockets_14cores_tput_6,
|
||||
_1sockets_14cores_tput_7,
|
||||
_1sockets_14cores_tput_8,
|
||||
_1sockets_10cores_latency_1,
|
||||
_1sockets_10cores_latency_2,
|
||||
_1sockets_10cores_latency_3,
|
||||
_1sockets_10cores_latency_4,
|
||||
_1sockets_10cores_tput_1,
|
||||
_1sockets_10cores_tput_2,
|
||||
_1sockets_10cores_tput_3,
|
||||
_1sockets_10cores_tput_4,
|
||||
_1sockets_10cores_tput_5,
|
||||
_1sockets_10cores_tput_6,
|
||||
_1sockets_8cores_latency_1,
|
||||
_1sockets_8cores_latency_2,
|
||||
_1sockets_8cores_latency_3,
|
||||
_1sockets_8cores_latency_4,
|
||||
_1sockets_8cores_tput_1,
|
||||
_1sockets_8cores_tput_2,
|
||||
_1sockets_8cores_tput_3,
|
||||
_1sockets_8cores_tput_4,
|
||||
_1sockets_8cores_tput_5,
|
||||
_1sockets_8cores_tput_6,
|
||||
_1sockets_8cores_tput_7,
|
||||
_1sockets_6cores_latency_1,
|
||||
_1sockets_6cores_latency_2,
|
||||
_1sockets_6cores_tput_1,
|
||||
_1sockets_6cores_tput_2,
|
||||
_1sockets_6cores_tput_3,
|
||||
_1sockets_6cores_tput_4));
|
||||
|
||||
} // namespace
|
||||
Reference in New Issue
Block a user