mirror of
https://github.com/memtest86plus/memtest86plus.git
synced 2025-02-25 18:55:23 -06:00
Faster barrier implementation.
The old barrier implementation was very slow when running on a multi-socket machine (pcmemtest issue 16). The new implementation provides two options: - when blocked, spin on a thread-local flag - when blocked, execute a HLT instruction and wait for a NMI The first option might be faster, but we need to measure it to find out. A new boot command line option is provided to select between the two, with a third setting that uses a mixture of the two.
This commit is contained in:
parent
311a597766
commit
4078b7760e
15
app/config.c
15
app/config.c
@ -92,10 +92,11 @@ cpu_state_t cpu_state[MAX_CPUS];
|
|||||||
|
|
||||||
bool enable_temperature = false;
|
bool enable_temperature = false;
|
||||||
bool enable_trace = false;
|
bool enable_trace = false;
|
||||||
bool enable_halt = true;
|
|
||||||
|
|
||||||
bool pause_at_start = true;
|
bool pause_at_start = true;
|
||||||
|
|
||||||
|
power_save_t power_save = POWER_SAVE_HIGH;
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Private Functions
|
// Private Functions
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -115,8 +116,14 @@ static void parse_option(const char *option, const char *params)
|
|||||||
}
|
}
|
||||||
} else if (strncmp(option, "nopause", 8) == 0) {
|
} else if (strncmp(option, "nopause", 8) == 0) {
|
||||||
pause_at_start = false;
|
pause_at_start = false;
|
||||||
} else if (strncmp(option, "nohalt", 7) == 0) {
|
} else if (strncmp(option, "powersave", 10) == 0) {
|
||||||
enable_halt = false;
|
if (strncmp(params, "off", 4) == 0) {
|
||||||
|
power_save = POWER_SAVE_OFF;
|
||||||
|
} else if (strncmp(params, "low", 4) == 0) {
|
||||||
|
power_save = POWER_SAVE_LOW;
|
||||||
|
} else if (strncmp(params, "high", 5) == 0) {
|
||||||
|
power_save = POWER_SAVE_HIGH;
|
||||||
|
}
|
||||||
} else if (strncmp(option, "smp", 4) == 0) {
|
} else if (strncmp(option, "smp", 4) == 0) {
|
||||||
smp_enabled = true;
|
smp_enabled = true;
|
||||||
} else if (strncmp(option, "trace", 6) == 0) {
|
} else if (strncmp(option, "trace", 6) == 0) {
|
||||||
@ -653,6 +660,8 @@ void config_init(void)
|
|||||||
|
|
||||||
enable_temperature = !no_temperature;
|
enable_temperature = !no_temperature;
|
||||||
|
|
||||||
|
power_save = POWER_SAVE_HIGH;
|
||||||
|
|
||||||
const boot_params_t *boot_params = (boot_params_t *)boot_params_addr;
|
const boot_params_t *boot_params = (boot_params_t *)boot_params_addr;
|
||||||
|
|
||||||
uintptr_t cmd_line_addr = boot_params->cmd_line_ptr;
|
uintptr_t cmd_line_addr = boot_params->cmd_line_ptr;
|
||||||
|
@ -28,6 +28,12 @@ typedef enum {
|
|||||||
ERROR_MODE_BADRAM
|
ERROR_MODE_BADRAM
|
||||||
} error_mode_t;
|
} error_mode_t;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
POWER_SAVE_OFF,
|
||||||
|
POWER_SAVE_LOW,
|
||||||
|
POWER_SAVE_HIGH
|
||||||
|
} power_save_t;
|
||||||
|
|
||||||
extern uintptr_t pm_limit_lower;
|
extern uintptr_t pm_limit_lower;
|
||||||
extern uintptr_t pm_limit_upper;
|
extern uintptr_t pm_limit_upper;
|
||||||
|
|
||||||
@ -41,10 +47,11 @@ extern cpu_state_t cpu_state[MAX_CPUS];
|
|||||||
|
|
||||||
extern bool enable_temperature;
|
extern bool enable_temperature;
|
||||||
extern bool enable_trace;
|
extern bool enable_trace;
|
||||||
extern bool enable_halt;
|
|
||||||
|
|
||||||
extern bool pause_at_start;
|
extern bool pause_at_start;
|
||||||
|
|
||||||
|
extern power_save_t power_save;
|
||||||
|
|
||||||
void config_init(void);
|
void config_init(void);
|
||||||
|
|
||||||
void config_menu(bool initial);
|
void config_menu(bool initial);
|
||||||
|
@ -231,12 +231,21 @@ void scroll(void)
|
|||||||
|
|
||||||
void do_tick(int my_cpu)
|
void do_tick(int my_cpu)
|
||||||
{
|
{
|
||||||
barrier_wait(run_barrier);
|
bool use_spin_wait = (power_save < POWER_SAVE_HIGH);
|
||||||
|
if (use_spin_wait) {
|
||||||
|
barrier_spin_wait(run_barrier);
|
||||||
|
} else {
|
||||||
|
barrier_halt_wait(run_barrier);
|
||||||
|
}
|
||||||
if (master_cpu == my_cpu) {
|
if (master_cpu == my_cpu) {
|
||||||
check_input();
|
check_input();
|
||||||
error_update();
|
error_update();
|
||||||
}
|
}
|
||||||
barrier_wait(run_barrier);
|
if (use_spin_wait) {
|
||||||
|
barrier_spin_wait(run_barrier);
|
||||||
|
} else {
|
||||||
|
barrier_halt_wait(run_barrier);
|
||||||
|
}
|
||||||
|
|
||||||
// Only the master CPU does the update.
|
// Only the master CPU does the update.
|
||||||
if (master_cpu != my_cpu) {
|
if (master_cpu != my_cpu) {
|
||||||
|
75
app/main.c
75
app/main.c
@ -118,21 +118,42 @@ uintptr_t test_addr[MAX_CPUS];
|
|||||||
// Private Functions
|
// Private Functions
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
#define BARRIER \
|
#define SHORT_BARRIER \
|
||||||
if (TRACE_BARRIERS) { \
|
if (TRACE_BARRIERS) { \
|
||||||
trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \
|
trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||||
} \
|
} \
|
||||||
barrier_wait(start_barrier);
|
if (power_save < POWER_SAVE_HIGH) { \
|
||||||
|
barrier_spin_wait(start_barrier); \
|
||||||
|
} else { \
|
||||||
|
barrier_halt_wait(start_barrier); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define LONG_BARRIER \
|
||||||
|
if (TRACE_BARRIERS) { \
|
||||||
|
trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||||
|
} \
|
||||||
|
if (power_save > POWER_SAVE_OFF) { \
|
||||||
|
barrier_halt_wait(start_barrier); \
|
||||||
|
} else { \
|
||||||
|
barrier_spin_wait(start_barrier); \
|
||||||
|
}
|
||||||
|
|
||||||
static void run_at(uintptr_t addr, int my_cpu)
|
static void run_at(uintptr_t addr, int my_cpu)
|
||||||
{
|
{
|
||||||
uintptr_t *new_start_addr = (uintptr_t *)(addr + startup - _start);
|
uintptr_t *new_start_addr = (uintptr_t *)(addr + startup - _start);
|
||||||
|
|
||||||
|
|
||||||
if (my_cpu == 0) {
|
if (my_cpu == 0) {
|
||||||
// Copy the program code and all data except the stacks.
|
// Copy the program code and all data except the stacks.
|
||||||
memcpy((void *)addr, &_start, _stacks - _start);
|
memcpy((void *)addr, (void *)_start, _stacks - _start);
|
||||||
|
// Copy the thread-local storage.
|
||||||
|
size_t locals_offset = _stacks - _start + BSP_STACK_SIZE - LOCALS_SIZE;
|
||||||
|
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||||
|
memcpy((void *)(addr + locals_offset), (void *)(_start + locals_offset), LOCALS_SIZE);
|
||||||
|
locals_offset += AP_STACK_SIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
BARRIER;
|
LONG_BARRIER;
|
||||||
|
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
// The 32-bit startup code needs to know where it is located.
|
// The 32-bit startup code needs to know where it is located.
|
||||||
@ -317,7 +338,7 @@ static void test_all_windows(int my_cpu)
|
|||||||
display_active_cpu(my_cpu);
|
display_active_cpu(my_cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
barrier_init(run_barrier, num_active_cpus);
|
barrier_reset(run_barrier, num_active_cpus);
|
||||||
}
|
}
|
||||||
|
|
||||||
int iterations = test_list[test_num].iterations;
|
int iterations = test_list[test_num].iterations;
|
||||||
@ -328,7 +349,7 @@ static void test_all_windows(int my_cpu)
|
|||||||
|
|
||||||
// Loop through all possible windows.
|
// Loop through all possible windows.
|
||||||
do {
|
do {
|
||||||
BARRIER;
|
LONG_BARRIER;
|
||||||
if (bail) {
|
if (bail) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -344,7 +365,7 @@ static void test_all_windows(int my_cpu)
|
|||||||
window_num = 1;
|
window_num = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BARRIER;
|
SHORT_BARRIER;
|
||||||
|
|
||||||
// Relocate if necessary.
|
// Relocate if necessary.
|
||||||
if (window_num > 0) {
|
if (window_num > 0) {
|
||||||
@ -374,16 +395,9 @@ static void test_all_windows(int my_cpu)
|
|||||||
}
|
}
|
||||||
setup_vm_map(window_start, window_end);
|
setup_vm_map(window_start, window_end);
|
||||||
}
|
}
|
||||||
BARRIER;
|
SHORT_BARRIER;
|
||||||
|
|
||||||
// There is a significant overhead in restarting halted CPU cores, so only enable
|
|
||||||
// halting if the memory present in the window is a reasonable size.
|
|
||||||
bool halt_if_inactive = enable_halt && num_enabled_cpus > num_active_cpus && num_mapped_pages > PAGE_C(16,MB);
|
|
||||||
if (!i_am_active) {
|
if (!i_am_active) {
|
||||||
if (!dummy_run && halt_if_inactive) {
|
|
||||||
cpu_state[my_cpu] = CPU_STATE_HALTED;
|
|
||||||
__asm__ __volatile__ ("hlt");
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -408,29 +422,6 @@ static void test_all_windows(int my_cpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (i_am_master) {
|
if (i_am_master) {
|
||||||
if (!dummy_run && halt_if_inactive) {
|
|
||||||
int cpu_num = 0;
|
|
||||||
int retries = 0;
|
|
||||||
while (cpu_num < num_available_cpus) {
|
|
||||||
if (cpu_num == my_cpu) {
|
|
||||||
cpu_num++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (cpu_state[cpu_num] == CPU_STATE_ENABLED) {
|
|
||||||
// This catches a potential race between the inactive CPU halting and the master CPU waking
|
|
||||||
// it up. This should be an unlikely event, so just spin until the inactive CPU catches up.
|
|
||||||
usleep(10);
|
|
||||||
if (++retries < 1000) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (cpu_state[cpu_num] == CPU_STATE_HALTED) {
|
|
||||||
smp_send_nmi(cpu_num);
|
|
||||||
}
|
|
||||||
retries = 0;
|
|
||||||
cpu_num++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
window_num++;
|
window_num++;
|
||||||
}
|
}
|
||||||
} while (window_end < pm_map[pm_map_size - 1].end);
|
} while (window_end < pm_map[pm_map_size - 1].end);
|
||||||
@ -467,7 +458,7 @@ void main(void)
|
|||||||
set_scroll_lock(false);
|
set_scroll_lock(false);
|
||||||
trace(0, "starting other CPUs");
|
trace(0, "starting other CPUs");
|
||||||
}
|
}
|
||||||
barrier_init(start_barrier, num_enabled_cpus);
|
barrier_reset(start_barrier, num_enabled_cpus);
|
||||||
int failed = smp_start(cpu_state);
|
int failed = smp_start(cpu_state);
|
||||||
if (failed) {
|
if (failed) {
|
||||||
const char *message = "Failed to start CPU core %i. Press any key to reboot...";
|
const char *message = "Failed to start CPU core %i. Press any key to reboot...";
|
||||||
@ -501,7 +492,7 @@ void main(void)
|
|||||||
// where we left off after each relocation.
|
// where we left off after each relocation.
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
BARRIER;
|
SHORT_BARRIER;
|
||||||
if (my_cpu == 0) {
|
if (my_cpu == 0) {
|
||||||
if (start_run) {
|
if (start_run) {
|
||||||
pass_num = 0;
|
pass_num = 0;
|
||||||
@ -542,11 +533,11 @@ void main(void)
|
|||||||
start_test = false;
|
start_test = false;
|
||||||
rerun_test = false;
|
rerun_test = false;
|
||||||
}
|
}
|
||||||
BARRIER;
|
SHORT_BARRIER;
|
||||||
if (test_list[test_num].enabled) {
|
if (test_list[test_num].enabled) {
|
||||||
test_all_windows(my_cpu);
|
test_all_windows(my_cpu);
|
||||||
}
|
}
|
||||||
BARRIER;
|
SHORT_BARRIER;
|
||||||
if (my_cpu != 0) {
|
if (my_cpu != 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,8 @@
|
|||||||
|
|
||||||
#define STACKS_SIZE (BSP_STACK_SIZE + MAX_APS * AP_STACK_SIZE)
|
#define STACKS_SIZE (BSP_STACK_SIZE + MAX_APS * AP_STACK_SIZE)
|
||||||
|
|
||||||
|
#define LOCALS_SIZE 16 /* Stack region reserved for thread-local storage */
|
||||||
|
|
||||||
#define LOW_LOAD_ADDR 0x00010000 /* The low load address for the main program */
|
#define LOW_LOAD_ADDR 0x00010000 /* The low load address for the main program */
|
||||||
#define HIGH_LOAD_ADDR 0x00100000 /* The high load address for the main program */
|
#define HIGH_LOAD_ADDR 0x00100000 /* The high load address for the main program */
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ startup:
|
|||||||
call smp_my_cpu_num
|
call smp_my_cpu_num
|
||||||
movl $AP_STACK_SIZE, %edx
|
movl $AP_STACK_SIZE, %edx
|
||||||
mul %edx
|
mul %edx
|
||||||
addl $BSP_STACK_SIZE, %eax
|
addl $(BSP_STACK_SIZE - LOCALS_SIZE), %eax
|
||||||
leal _stacks@GOTOFF(%ebx), %esp
|
leal _stacks@GOTOFF(%ebx), %esp
|
||||||
addl %eax, %esp
|
addl %eax, %esp
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ startup:
|
|||||||
call smp_my_cpu_num
|
call smp_my_cpu_num
|
||||||
movl $AP_STACK_SIZE, %edx
|
movl $AP_STACK_SIZE, %edx
|
||||||
mul %edx
|
mul %edx
|
||||||
addq $BSP_STACK_SIZE, %rax
|
addq $(BSP_STACK_SIZE - LOCALS_SIZE), %rax
|
||||||
leaq _stacks(%rip), %rsp
|
leaq _stacks(%rip), %rsp
|
||||||
addq %rax, %rsp
|
addq %rax, %rsp
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ INC_DIRS = -I../boot -I../system -I../lib -I../tests -I../app
|
|||||||
|
|
||||||
SYS_OBJS = system/cpuid.o \
|
SYS_OBJS = system/cpuid.o \
|
||||||
system/cpuinfo.o \
|
system/cpuinfo.o \
|
||||||
|
system/cpulocal.o \
|
||||||
system/ehci.o \
|
system/ehci.o \
|
||||||
system/font.o \
|
system/font.o \
|
||||||
system/hwctrl.o \
|
system/hwctrl.o \
|
||||||
|
@ -8,6 +8,7 @@ INC_DIRS = -I../boot -I../system -I../lib -I../tests -I../app
|
|||||||
|
|
||||||
SYS_OBJS = system/cpuid.o \
|
SYS_OBJS = system/cpuid.o \
|
||||||
system/cpuinfo.o \
|
system/cpuinfo.o \
|
||||||
|
system/cpulocal.o \
|
||||||
system/ehci.o \
|
system/ehci.o \
|
||||||
system/font.o \
|
system/font.o \
|
||||||
system/hwctrl.o \
|
system/hwctrl.o \
|
||||||
|
25
lib/assert.h
Normal file
25
lib/assert.h
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#ifndef ASSERT_H
|
||||||
|
#define ASSERT_H
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
*
|
||||||
|
* Provides a function to terminate the program if an unexpected and fatal
|
||||||
|
* error is detected.
|
||||||
|
*
|
||||||
|
*//*
|
||||||
|
* Copyright (C) 2022 Martin Whitaker.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Terminates the program (using a breakpoint exception) if expr is equal
|
||||||
|
* to zero.
|
||||||
|
*/
|
||||||
|
static inline void assert(int expr)
|
||||||
|
{
|
||||||
|
if (!expr) {
|
||||||
|
__asm__ __volatile__ ("int $3");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // ASSERT_H
|
@ -1,19 +1,14 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
// Copyright (C) 2020 Martin Whitaker.
|
// Copyright (C) 2020-2022 Martin Whitaker.
|
||||||
//
|
|
||||||
// Derived from an extract of memtest86+ smp.c:
|
|
||||||
//
|
|
||||||
// MemTest86+ V5 Specific code (GPL V2.0)
|
|
||||||
// By Samuel DEMEULEMEESTER, sdemeule@memtest.org
|
|
||||||
// http://www.canardpc.com - http://www.memtest.org
|
|
||||||
// ------------------------------------------------
|
|
||||||
// smp.c - MemTest-86 Version 3.5
|
|
||||||
//
|
|
||||||
// Released under version 2 of the Gnu Public License.
|
|
||||||
// By Chris Brady
|
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#include "cpulocal.h"
|
||||||
|
#include "smp.h"
|
||||||
|
|
||||||
|
#include "assert.h"
|
||||||
|
|
||||||
#include "barrier.h"
|
#include "barrier.h"
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -22,34 +17,67 @@
|
|||||||
|
|
||||||
void barrier_init(barrier_t *barrier, int num_threads)
|
void barrier_init(barrier_t *barrier, int num_threads)
|
||||||
{
|
{
|
||||||
barrier->num_threads = num_threads;
|
barrier->flag_num = allocate_local_flag();
|
||||||
barrier->count = num_threads;
|
assert(barrier->flag_num >= 0);
|
||||||
spin_unlock(&barrier->lock);
|
|
||||||
spin_unlock(&barrier->st1);
|
barrier_reset(barrier, num_threads);
|
||||||
spin_unlock(&barrier->st2);
|
|
||||||
spin_lock(&barrier->st2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void barrier_wait(barrier_t *barrier)
|
void barrier_reset(barrier_t *barrier, int num_threads)
|
||||||
|
{
|
||||||
|
barrier->num_threads = num_threads;
|
||||||
|
barrier->count = num_threads;
|
||||||
|
|
||||||
|
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
|
||||||
|
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||||
|
waiting_flags[cpu_num].flag = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void barrier_spin_wait(barrier_t *barrier)
|
||||||
{
|
{
|
||||||
if (barrier == NULL || barrier->num_threads < 2) {
|
if (barrier == NULL || barrier->num_threads < 2) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
spin_wait(&barrier->st1); // Wait if the barrier is active.
|
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
|
||||||
spin_lock(&barrier->lock); // Get lock for barrier struct.
|
int my_cpu = smp_my_cpu_num();
|
||||||
if (--barrier->count == 0) { // Last process?
|
waiting_flags[my_cpu].flag = true;
|
||||||
spin_lock(&barrier->st1); // Hold up any processes re-entering.
|
if (__sync_fetch_and_sub(&barrier->count, 1) > 1) {
|
||||||
spin_unlock(&barrier->st2); // Release the other processes.
|
volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag;
|
||||||
barrier->count++;
|
while (*i_am_blocked) {
|
||||||
spin_unlock(&barrier->lock);
|
__builtin_ia32_pause();
|
||||||
} else {
|
}
|
||||||
spin_unlock(&barrier->lock);
|
return;
|
||||||
spin_wait(&barrier->st2); // Wait for peers to arrive.
|
}
|
||||||
spin_lock(&barrier->lock);
|
// Last one here, so reset the barrier and wake the others. No need to
|
||||||
if (++barrier->count == barrier->num_threads) {
|
// check if a CPU core is actually waiting - just clear all the flags.
|
||||||
spin_unlock(&barrier->st1);
|
barrier->count = barrier->num_threads;
|
||||||
spin_lock(&barrier->st2);
|
__sync_synchronize();
|
||||||
|
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||||
|
waiting_flags[cpu_num].flag = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void barrier_halt_wait(barrier_t *barrier)
|
||||||
|
{
|
||||||
|
if (barrier == NULL || barrier->num_threads < 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
|
||||||
|
int my_cpu = smp_my_cpu_num();
|
||||||
|
waiting_flags[my_cpu].flag = true;
|
||||||
|
if (__sync_fetch_and_sub(&barrier->count, 1) > 1) {
|
||||||
|
__asm__ __volatile__ ("hlt");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Last one here, so reset the barrier and wake the others.
|
||||||
|
barrier->count = barrier->num_threads;
|
||||||
|
__sync_synchronize();
|
||||||
|
waiting_flags[my_cpu].flag = false;
|
||||||
|
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||||
|
if (waiting_flags[cpu_num].flag) {
|
||||||
|
waiting_flags[cpu_num].flag = false;
|
||||||
|
smp_send_nmi(cpu_num);
|
||||||
}
|
}
|
||||||
spin_unlock(&barrier->lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
* Copyright (C) 2020-2022 Martin Whitaker.
|
* Copyright (C) 2020-2022 Martin Whitaker.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "cpulocal.h"
|
||||||
|
|
||||||
#include "spinlock.h"
|
#include "spinlock.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -17,21 +19,31 @@
|
|||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int num_threads;
|
int flag_num;
|
||||||
volatile int count;
|
int num_threads;
|
||||||
spinlock_t lock;
|
int count;
|
||||||
spinlock_t st1;
|
|
||||||
spinlock_t st2;
|
|
||||||
} barrier_t;
|
} barrier_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialises the barrier to block the specified number of threads.
|
* Initialises a new barrier to block the specified number of threads.
|
||||||
*/
|
*/
|
||||||
void barrier_init(barrier_t *barrier, int num_threads);
|
void barrier_init(barrier_t *barrier, int num_threads);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits for all threads to arrive at the barrier.
|
* Resets an existing barrier to block the specified number of threads.
|
||||||
*/
|
*/
|
||||||
void barrier_wait(barrier_t *barrier);
|
void barrier_reset(barrier_t *barrier, int num_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Waits for all threads to arrive at the barrier. A CPU core spins in an
|
||||||
|
* idle loop when waiting.
|
||||||
|
*/
|
||||||
|
void barrier_spin_wait(barrier_t *barrier);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Waits for all threads to arrive at the barrier. A CPU core halts when
|
||||||
|
* waiting.
|
||||||
|
*/
|
||||||
|
void barrier_halt_wait(barrier_t *barrier);
|
||||||
|
|
||||||
#endif // BARRIER_H
|
#endif // BARRIER_H
|
||||||
|
26
system/cpulocal.c
Normal file
26
system/cpulocal.c
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
// Copyright (C) 2022 Martin Whitaker.
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "boot.h"
|
||||||
|
|
||||||
|
#include "cpulocal.h"
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Variables
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
int local_bytes_used = 0;
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Public Functions
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
int allocate_local_flag(void)
|
||||||
|
{
|
||||||
|
if (local_bytes_used == LOCALS_SIZE) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return local_bytes_used += sizeof(bool);
|
||||||
|
}
|
46
system/cpulocal.h
Normal file
46
system/cpulocal.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#ifndef CPULOCAL_H
|
||||||
|
#define CPULOCAL_H
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
*
|
||||||
|
* Provides functions to allocate and access thread-local flags.
|
||||||
|
*
|
||||||
|
*//*
|
||||||
|
* Copyright (C) 2022 Martin Whitaker.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "boot.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A single thread-local flag. These are spaced out in memory to ensure each
|
||||||
|
* flag occupies a different cache line.
|
||||||
|
*/
|
||||||
|
typedef struct __attribute__((packed)) {
|
||||||
|
bool flag;
|
||||||
|
uint8_t spacing[AP_STACK_SIZE - sizeof(bool)];
|
||||||
|
} local_flag_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocates an array of thread-local flags, one per CPU core, and returns
|
||||||
|
* a ID number that identifies the allocated array. Returns -1 if there is
|
||||||
|
* insufficient thread local storage remaining to allocate a new array of
|
||||||
|
* flags.
|
||||||
|
*/
|
||||||
|
int allocate_local_flag(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a pointer to the previously allocated array of thread-local flags
|
||||||
|
* identified by flag_num.
|
||||||
|
*/
|
||||||
|
static inline local_flag_t *local_flags(int flag_num)
|
||||||
|
{
|
||||||
|
// The number returned by allocate_local_flag is the byte offset of the
|
||||||
|
// flag from the start of the thread-local storage.
|
||||||
|
return (local_flag_t *)(_stacks + BSP_STACK_SIZE - LOCALS_SIZE + flag_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CPULOCAL_H
|
@ -1,5 +1,5 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
// Copyright (C) 2020 Martin Whitaker.
|
// Copyright (C) 2020-2022 Martin Whitaker.
|
||||||
//
|
//
|
||||||
// Derived from memtest86+ reloc.c:
|
// Derived from memtest86+ reloc.c:
|
||||||
//
|
//
|
||||||
@ -11,6 +11,8 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "assert.h"
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Constants
|
// Constants
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -61,13 +63,6 @@ typedef struct
|
|||||||
|
|
||||||
#define ELF32_R_TYPE(r_info) ((r_info) & 0xff)
|
#define ELF32_R_TYPE(r_info) ((r_info) & 0xff)
|
||||||
|
|
||||||
static inline void assert(int expr)
|
|
||||||
{
|
|
||||||
if (!expr) {
|
|
||||||
__asm__ __volatile__ ("int $3");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the run-time load address of the shared object. This must be inlined
|
* Return the run-time load address of the shared object. This must be inlined
|
||||||
* in a function which uses global data.
|
* in a function which uses global data.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
// Copyright (C) 2020 Martin Whitaker.
|
// Copyright (C) 2020-2022 Martin Whitaker.
|
||||||
//
|
//
|
||||||
// Derived from memtest86+ reloc.c:
|
// Derived from memtest86+ reloc.c:
|
||||||
//
|
//
|
||||||
@ -11,6 +11,8 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "assert.h"
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Constants
|
// Constants
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -62,13 +64,6 @@ typedef struct
|
|||||||
|
|
||||||
#define ELF64_R_TYPE(r_info) ((r_info) & 0xffffffff)
|
#define ELF64_R_TYPE(r_info) ((r_info) & 0xffffffff)
|
||||||
|
|
||||||
static inline void assert(int expr)
|
|
||||||
{
|
|
||||||
if (!expr) {
|
|
||||||
__asm__ __volatile__ ("int $3");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the run-time load address of the shared object.
|
* Return the run-time load address of the shared object.
|
||||||
*/
|
*/
|
||||||
|
26
system/smp.c
26
system/smp.c
@ -63,6 +63,10 @@
|
|||||||
#define APIC_DELMODE_STARTUP 6
|
#define APIC_DELMODE_STARTUP 6
|
||||||
#define APIC_DELMODE_EXTINT 7
|
#define APIC_DELMODE_EXTINT 7
|
||||||
|
|
||||||
|
// APIC ICR busy flag
|
||||||
|
|
||||||
|
#define APIC_ICR_BUSY (1 << 12)
|
||||||
|
|
||||||
// IA32_APIC_BASE MSR bits
|
// IA32_APIC_BASE MSR bits
|
||||||
|
|
||||||
#define IA32_APIC_ENABLED (1 << 11)
|
#define IA32_APIC_ENABLED (1 << 11)
|
||||||
@ -614,18 +618,23 @@ static bool find_cpus_in_rsdp(void)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector, int delay_before_poll)
|
static inline void send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector)
|
||||||
{
|
{
|
||||||
apic_write(APIC_REG_ICRHI, apic_id << 24);
|
apic_write(APIC_REG_ICRHI, apic_id << 24);
|
||||||
|
|
||||||
apic_write(APIC_REG_ICRLO, trigger << 15 | level << 14 | mode << 8 | vector);
|
apic_write(APIC_REG_ICRLO, trigger << 15 | level << 14 | mode << 8 | vector);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool send_ipi_and_wait(int apic_id, int trigger, int level, int mode, uint8_t vector, int delay_before_poll)
|
||||||
|
{
|
||||||
|
send_ipi(apic_id, trigger, level, mode, vector);
|
||||||
|
|
||||||
usleep(delay_before_poll);
|
usleep(delay_before_poll);
|
||||||
|
|
||||||
// Wait for send complete or timeout after 100ms.
|
// Wait for send complete or timeout after 100ms.
|
||||||
int timeout = 1000;
|
int timeout = 1000;
|
||||||
while (timeout > 0) {
|
while (timeout > 0) {
|
||||||
bool send_pending = (apic_read(APIC_REG_ICRLO) & 0x00001000);
|
bool send_pending = (apic_read(APIC_REG_ICRLO) & APIC_ICR_BUSY);
|
||||||
if (!send_pending) {
|
if (!send_pending) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -663,13 +672,13 @@ static bool start_cpu(int cpu_num)
|
|||||||
(void)read_apic_esr(is_p5);
|
(void)read_apic_esr(is_p5);
|
||||||
|
|
||||||
// Pulse the INIT IPI.
|
// Pulse the INIT IPI.
|
||||||
if (!send_ipi(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0, 0)) {
|
if (!send_ipi_and_wait(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0, 0)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (use_long_delays) {
|
if (use_long_delays) {
|
||||||
usleep(10*1000); // 10ms
|
usleep(10*1000); // 10ms
|
||||||
}
|
}
|
||||||
if (!send_ipi(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0, 0)) {
|
if (!send_ipi_and_wait(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0, 0)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -679,7 +688,7 @@ static bool start_cpu(int cpu_num)
|
|||||||
(void)read_apic_esr(is_p5);
|
(void)read_apic_esr(is_p5);
|
||||||
|
|
||||||
// Send the STARTUP IPI.
|
// Send the STARTUP IPI.
|
||||||
if (!send_ipi(apic_id, 0, 0, APIC_DELMODE_STARTUP, AP_TRAMPOLINE_PAGE, use_long_delays ? 300 : 10)) {
|
if (!send_ipi_and_wait(apic_id, 0, 0, APIC_DELMODE_STARTUP, AP_TRAMPOLINE_PAGE, use_long_delays ? 300 : 10)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -785,9 +794,12 @@ int smp_start(cpu_state_t cpu_state[MAX_CPUS])
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool smp_send_nmi(int cpu_num)
|
void smp_send_nmi(int cpu_num)
|
||||||
{
|
{
|
||||||
return send_ipi(cpu_num_to_apic_id[cpu_num], 0, 0, APIC_DELMODE_NMI, 0, 200);
|
while (apic_read(APIC_REG_ICRLO) & APIC_ICR_BUSY) {
|
||||||
|
__builtin_ia32_pause();
|
||||||
|
}
|
||||||
|
send_ipi(cpu_num_to_apic_id[cpu_num], 0, 0, APIC_DELMODE_NMI, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int smp_my_cpu_num(void)
|
int smp_my_cpu_num(void)
|
||||||
|
@ -29,8 +29,7 @@
|
|||||||
typedef enum __attribute__ ((packed)) {
|
typedef enum __attribute__ ((packed)) {
|
||||||
CPU_STATE_DISABLED = 0,
|
CPU_STATE_DISABLED = 0,
|
||||||
CPU_STATE_ENABLED = 1,
|
CPU_STATE_ENABLED = 1,
|
||||||
CPU_STATE_RUNNING = 2,
|
CPU_STATE_RUNNING = 2
|
||||||
CPU_STATE_HALTED = 3
|
|
||||||
} cpu_state_t;
|
} cpu_state_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -63,7 +62,7 @@ int smp_start(cpu_state_t cpu_state[MAX_CPUS]);
|
|||||||
* Sends a non-maskable interrupt to the CPU core whose ordinal number
|
* Sends a non-maskable interrupt to the CPU core whose ordinal number
|
||||||
* is cpu_num.
|
* is cpu_num.
|
||||||
*/
|
*/
|
||||||
bool smp_send_nmi(int cpu_num);
|
void smp_send_nmi(int cpu_num);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the ordinal number of the calling CPU core.
|
* Returns the ordinal number of the calling CPU core.
|
||||||
|
@ -117,10 +117,19 @@ void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segme
|
|||||||
void flush_caches(int my_cpu)
|
void flush_caches(int my_cpu)
|
||||||
{
|
{
|
||||||
if (my_cpu >= 0) {
|
if (my_cpu >= 0) {
|
||||||
barrier_wait(run_barrier);
|
bool use_spin_wait = (power_save < POWER_SAVE_HIGH);
|
||||||
|
if (use_spin_wait) {
|
||||||
|
barrier_spin_wait(run_barrier);
|
||||||
|
} else {
|
||||||
|
barrier_halt_wait(run_barrier);
|
||||||
|
}
|
||||||
if (my_cpu == master_cpu) {
|
if (my_cpu == master_cpu) {
|
||||||
cache_flush();
|
cache_flush();
|
||||||
}
|
}
|
||||||
barrier_wait(run_barrier);
|
if (use_spin_wait) {
|
||||||
|
barrier_spin_wait(run_barrier);
|
||||||
|
} else {
|
||||||
|
barrier_halt_wait(run_barrier);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,11 @@ int ticks_per_test[NUM_PASS_TYPES][NUM_TEST_PATTERNS];
|
|||||||
if (TRACE_BARRIERS) { \
|
if (TRACE_BARRIERS) { \
|
||||||
trace(my_cpu, "Run barrier wait at %s line %i", __FILE__, __LINE__); \
|
trace(my_cpu, "Run barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||||
} \
|
} \
|
||||||
barrier_wait(run_barrier); \
|
if (power_save < POWER_SAVE_HIGH) { \
|
||||||
|
barrier_spin_wait(run_barrier); \
|
||||||
|
} else { \
|
||||||
|
barrier_halt_wait(run_barrier); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
int run_test(int my_cpu, int test, int stage, int iterations)
|
int run_test(int my_cpu, int test, int stage, int iterations)
|
||||||
|
Loading…
Reference in New Issue
Block a user