diff --git a/app/config.c b/app/config.c index 591f5f2..ff9937f 100644 --- a/app/config.c +++ b/app/config.c @@ -92,10 +92,11 @@ cpu_state_t cpu_state[MAX_CPUS]; bool enable_temperature = false; bool enable_trace = false; -bool enable_halt = true; bool pause_at_start = true; +power_save_t power_save = POWER_SAVE_HIGH; + //------------------------------------------------------------------------------ // Private Functions //------------------------------------------------------------------------------ @@ -115,8 +116,14 @@ static void parse_option(const char *option, const char *params) } } else if (strncmp(option, "nopause", 8) == 0) { pause_at_start = false; - } else if (strncmp(option, "nohalt", 7) == 0) { - enable_halt = false; + } else if (strncmp(option, "powersave", 10) == 0) { + if (strncmp(params, "off", 4) == 0) { + power_save = POWER_SAVE_OFF; + } else if (strncmp(params, "low", 4) == 0) { + power_save = POWER_SAVE_LOW; + } else if (strncmp(params, "high", 5) == 0) { + power_save = POWER_SAVE_HIGH; + } } else if (strncmp(option, "smp", 4) == 0) { smp_enabled = true; } else if (strncmp(option, "trace", 6) == 0) { @@ -653,6 +660,8 @@ void config_init(void) enable_temperature = !no_temperature; + power_save = POWER_SAVE_HIGH; + const boot_params_t *boot_params = (boot_params_t *)boot_params_addr; uintptr_t cmd_line_addr = boot_params->cmd_line_ptr; diff --git a/app/config.h b/app/config.h index 5619cb5..9e504fa 100644 --- a/app/config.h +++ b/app/config.h @@ -28,6 +28,12 @@ typedef enum { ERROR_MODE_BADRAM } error_mode_t; +typedef enum { + POWER_SAVE_OFF, + POWER_SAVE_LOW, + POWER_SAVE_HIGH +} power_save_t; + extern uintptr_t pm_limit_lower; extern uintptr_t pm_limit_upper; @@ -41,10 +47,11 @@ extern cpu_state_t cpu_state[MAX_CPUS]; extern bool enable_temperature; extern bool enable_trace; -extern bool enable_halt; extern bool pause_at_start; +extern power_save_t power_save; + void config_init(void); void config_menu(bool initial); diff --git a/app/display.c b/app/display.c index 3a17865..98ae263 100644 --- a/app/display.c +++ b/app/display.c @@ -231,12 +231,21 @@ void scroll(void) void do_tick(int my_cpu) { - barrier_wait(run_barrier); + bool use_spin_wait = (power_save < POWER_SAVE_HIGH); + if (use_spin_wait) { + barrier_spin_wait(run_barrier); + } else { + barrier_halt_wait(run_barrier); + } if (master_cpu == my_cpu) { check_input(); error_update(); } - barrier_wait(run_barrier); + if (use_spin_wait) { + barrier_spin_wait(run_barrier); + } else { + barrier_halt_wait(run_barrier); + } // Only the master CPU does the update. if (master_cpu != my_cpu) { diff --git a/app/main.c b/app/main.c index 292f42b..73f15e3 100644 --- a/app/main.c +++ b/app/main.c @@ -118,21 +118,42 @@ uintptr_t test_addr[MAX_CPUS]; // Private Functions //------------------------------------------------------------------------------ -#define BARRIER \ +#define SHORT_BARRIER \ if (TRACE_BARRIERS) { \ trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \ } \ - barrier_wait(start_barrier); + if (power_save < POWER_SAVE_HIGH) { \ + barrier_spin_wait(start_barrier); \ + } else { \ + barrier_halt_wait(start_barrier); \ + } + +#define LONG_BARRIER \ + if (TRACE_BARRIERS) { \ + trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \ + } \ + if (power_save > POWER_SAVE_OFF) { \ + barrier_halt_wait(start_barrier); \ + } else { \ + barrier_spin_wait(start_barrier); \ + } static void run_at(uintptr_t addr, int my_cpu) { uintptr_t *new_start_addr = (uintptr_t *)(addr + startup - _start); + if (my_cpu == 0) { // Copy the program code and all data except the stacks. - memcpy((void *)addr, &_start, _stacks - _start); + memcpy((void *)addr, (void *)_start, _stacks - _start); + // Copy the thread-local storage. + size_t locals_offset = _stacks - _start + BSP_STACK_SIZE - LOCALS_SIZE; + for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { + memcpy((void *)(addr + locals_offset), (void *)(_start + locals_offset), LOCALS_SIZE); + locals_offset += AP_STACK_SIZE; + } } - BARRIER; + LONG_BARRIER; #ifndef __x86_64__ // The 32-bit startup code needs to know where it is located. @@ -317,7 +338,7 @@ static void test_all_windows(int my_cpu) display_active_cpu(my_cpu); } } - barrier_init(run_barrier, num_active_cpus); + barrier_reset(run_barrier, num_active_cpus); } int iterations = test_list[test_num].iterations; @@ -328,7 +349,7 @@ static void test_all_windows(int my_cpu) // Loop through all possible windows. do { - BARRIER; + LONG_BARRIER; if (bail) { break; } @@ -344,7 +365,7 @@ static void test_all_windows(int my_cpu) window_num = 1; } } - BARRIER; + SHORT_BARRIER; // Relocate if necessary. if (window_num > 0) { @@ -374,16 +395,9 @@ static void test_all_windows(int my_cpu) } setup_vm_map(window_start, window_end); } - BARRIER; + SHORT_BARRIER; - // There is a significant overhead in restarting halted CPU cores, so only enable - // halting if the memory present in the window is a reasonable size. - bool halt_if_inactive = enable_halt && num_enabled_cpus > num_active_cpus && num_mapped_pages > PAGE_C(16,MB); if (!i_am_active) { - if (!dummy_run && halt_if_inactive) { - cpu_state[my_cpu] = CPU_STATE_HALTED; - __asm__ __volatile__ ("hlt"); - } continue; } @@ -408,29 +422,6 @@ static void test_all_windows(int my_cpu) } if (i_am_master) { - if (!dummy_run && halt_if_inactive) { - int cpu_num = 0; - int retries = 0; - while (cpu_num < num_available_cpus) { - if (cpu_num == my_cpu) { - cpu_num++; - continue; - } - if (cpu_state[cpu_num] == CPU_STATE_ENABLED) { - // This catches a potential race between the inactive CPU halting and the master CPU waking - // it up. This should be an unlikely event, so just spin until the inactive CPU catches up. - usleep(10); - if (++retries < 1000) { - continue; - } - } - if (cpu_state[cpu_num] == CPU_STATE_HALTED) { - smp_send_nmi(cpu_num); - } - retries = 0; - cpu_num++; - } - } window_num++; } } while (window_end < pm_map[pm_map_size - 1].end); @@ -467,7 +458,7 @@ void main(void) set_scroll_lock(false); trace(0, "starting other CPUs"); } - barrier_init(start_barrier, num_enabled_cpus); + barrier_reset(start_barrier, num_enabled_cpus); int failed = smp_start(cpu_state); if (failed) { const char *message = "Failed to start CPU core %i. Press any key to reboot..."; @@ -501,7 +492,7 @@ void main(void) // where we left off after each relocation. while (1) { - BARRIER; + SHORT_BARRIER; if (my_cpu == 0) { if (start_run) { pass_num = 0; @@ -542,11 +533,11 @@ void main(void) start_test = false; rerun_test = false; } - BARRIER; + SHORT_BARRIER; if (test_list[test_num].enabled) { test_all_windows(my_cpu); } - BARRIER; + SHORT_BARRIER; if (my_cpu != 0) { continue; } diff --git a/boot/boot.h b/boot/boot.h index 702f3ac..9a65aef 100644 --- a/boot/boot.h +++ b/boot/boot.h @@ -24,6 +24,8 @@ #define STACKS_SIZE (BSP_STACK_SIZE + MAX_APS * AP_STACK_SIZE) +#define LOCALS_SIZE 16 /* Stack region reserved for thread-local storage */ + #define LOW_LOAD_ADDR 0x00010000 /* The low load address for the main program */ #define HIGH_LOAD_ADDR 0x00100000 /* The high load address for the main program */ diff --git a/boot/startup32.S b/boot/startup32.S index 9fd2a65..5f65f49 100644 --- a/boot/startup32.S +++ b/boot/startup32.S @@ -122,7 +122,7 @@ startup: call smp_my_cpu_num movl $AP_STACK_SIZE, %edx mul %edx - addl $BSP_STACK_SIZE, %eax + addl $(BSP_STACK_SIZE - LOCALS_SIZE), %eax leal _stacks@GOTOFF(%ebx), %esp addl %eax, %esp diff --git a/boot/startup64.S b/boot/startup64.S index c94c0e6..d0cd704 100644 --- a/boot/startup64.S +++ b/boot/startup64.S @@ -158,7 +158,7 @@ startup: call smp_my_cpu_num movl $AP_STACK_SIZE, %edx mul %edx - addq $BSP_STACK_SIZE, %rax + addq $(BSP_STACK_SIZE - LOCALS_SIZE), %rax leaq _stacks(%rip), %rsp addq %rax, %rsp diff --git a/build32/Makefile b/build32/Makefile index 498616b..86cd8b4 100644 --- a/build32/Makefile +++ b/build32/Makefile @@ -8,6 +8,7 @@ INC_DIRS = -I../boot -I../system -I../lib -I../tests -I../app SYS_OBJS = system/cpuid.o \ system/cpuinfo.o \ + system/cpulocal.o \ system/ehci.o \ system/font.o \ system/hwctrl.o \ diff --git a/build64/Makefile b/build64/Makefile index 50380cb..58b05c4 100644 --- a/build64/Makefile +++ b/build64/Makefile @@ -8,6 +8,7 @@ INC_DIRS = -I../boot -I../system -I../lib -I../tests -I../app SYS_OBJS = system/cpuid.o \ system/cpuinfo.o \ + system/cpulocal.o \ system/ehci.o \ system/font.o \ system/hwctrl.o \ diff --git a/lib/assert.h b/lib/assert.h new file mode 100644 index 0000000..4138969 --- /dev/null +++ b/lib/assert.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ASSERT_H +#define ASSERT_H +/** + * \file + * + * Provides a function to terminate the program if an unexpected and fatal + * error is detected. + * + *//* + * Copyright (C) 2022 Martin Whitaker. + */ + +/* + * Terminates the program (using a breakpoint exception) if expr is equal + * to zero. + */ +static inline void assert(int expr) +{ + if (!expr) { + __asm__ __volatile__ ("int $3"); + } +} + +#endif // ASSERT_H diff --git a/lib/barrier.c b/lib/barrier.c index bd578aa..a4a7248 100644 --- a/lib/barrier.c +++ b/lib/barrier.c @@ -1,19 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2020 Martin Whitaker. -// -// Derived from an extract of memtest86+ smp.c: -// -// MemTest86+ V5 Specific code (GPL V2.0) -// By Samuel DEMEULEMEESTER, sdemeule@memtest.org -// http://www.canardpc.com - http://www.memtest.org -// ------------------------------------------------ -// smp.c - MemTest-86 Version 3.5 -// -// Released under version 2 of the Gnu Public License. -// By Chris Brady +// Copyright (C) 2020-2022 Martin Whitaker. +#include #include +#include "cpulocal.h" +#include "smp.h" + +#include "assert.h" + #include "barrier.h" //------------------------------------------------------------------------------ @@ -22,34 +17,67 @@ void barrier_init(barrier_t *barrier, int num_threads) { - barrier->num_threads = num_threads; - barrier->count = num_threads; - spin_unlock(&barrier->lock); - spin_unlock(&barrier->st1); - spin_unlock(&barrier->st2); - spin_lock(&barrier->st2); + barrier->flag_num = allocate_local_flag(); + assert(barrier->flag_num >= 0); + + barrier_reset(barrier, num_threads); } -void barrier_wait(barrier_t *barrier) +void barrier_reset(barrier_t *barrier, int num_threads) +{ + barrier->num_threads = num_threads; + barrier->count = num_threads; + + local_flag_t *waiting_flags = local_flags(barrier->flag_num); + for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { + waiting_flags[cpu_num].flag = false; + } +} + +void barrier_spin_wait(barrier_t *barrier) { if (barrier == NULL || barrier->num_threads < 2) { return; } - spin_wait(&barrier->st1); // Wait if the barrier is active. - spin_lock(&barrier->lock); // Get lock for barrier struct. - if (--barrier->count == 0) { // Last process? - spin_lock(&barrier->st1); // Hold up any processes re-entering. - spin_unlock(&barrier->st2); // Release the other processes. - barrier->count++; - spin_unlock(&barrier->lock); - } else { - spin_unlock(&barrier->lock); - spin_wait(&barrier->st2); // Wait for peers to arrive. - spin_lock(&barrier->lock); - if (++barrier->count == barrier->num_threads) { - spin_unlock(&barrier->st1); - spin_lock(&barrier->st2); + local_flag_t *waiting_flags = local_flags(barrier->flag_num); + int my_cpu = smp_my_cpu_num(); + waiting_flags[my_cpu].flag = true; + if (__sync_fetch_and_sub(&barrier->count, 1) > 1) { + volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag; + while (*i_am_blocked) { + __builtin_ia32_pause(); + } + return; + } + // Last one here, so reset the barrier and wake the others. No need to + // check if a CPU core is actually waiting - just clear all the flags. + barrier->count = barrier->num_threads; + __sync_synchronize(); + for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { + waiting_flags[cpu_num].flag = false; + } +} + +void barrier_halt_wait(barrier_t *barrier) +{ + if (barrier == NULL || barrier->num_threads < 2) { + return; + } + local_flag_t *waiting_flags = local_flags(barrier->flag_num); + int my_cpu = smp_my_cpu_num(); + waiting_flags[my_cpu].flag = true; + if (__sync_fetch_and_sub(&barrier->count, 1) > 1) { + __asm__ __volatile__ ("hlt"); + return; + } + // Last one here, so reset the barrier and wake the others. + barrier->count = barrier->num_threads; + __sync_synchronize(); + waiting_flags[my_cpu].flag = false; + for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { + if (waiting_flags[cpu_num].flag) { + waiting_flags[cpu_num].flag = false; + smp_send_nmi(cpu_num); } - spin_unlock(&barrier->lock); } } diff --git a/lib/barrier.h b/lib/barrier.h index a0c3d2d..eed8583 100644 --- a/lib/barrier.h +++ b/lib/barrier.h @@ -10,6 +10,8 @@ * Copyright (C) 2020-2022 Martin Whitaker. */ +#include "cpulocal.h" + #include "spinlock.h" /** @@ -17,21 +19,31 @@ */ typedef struct { - int num_threads; - volatile int count; - spinlock_t lock; - spinlock_t st1; - spinlock_t st2; + int flag_num; + int num_threads; + int count; } barrier_t; /** - * Initialises the barrier to block the specified number of threads. + * Initialises a new barrier to block the specified number of threads. */ void barrier_init(barrier_t *barrier, int num_threads); /** - * Waits for all threads to arrive at the barrier. + * Resets an existing barrier to block the specified number of threads. */ -void barrier_wait(barrier_t *barrier); +void barrier_reset(barrier_t *barrier, int num_threads); + +/** + * Waits for all threads to arrive at the barrier. A CPU core spins in an + * idle loop when waiting. + */ +void barrier_spin_wait(barrier_t *barrier); + +/** + * Waits for all threads to arrive at the barrier. A CPU core halts when + * waiting. + */ +void barrier_halt_wait(barrier_t *barrier); #endif // BARRIER_H diff --git a/system/cpulocal.c b/system/cpulocal.c new file mode 100644 index 0000000..ceea148 --- /dev/null +++ b/system/cpulocal.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2022 Martin Whitaker. + +#include + +#include "boot.h" + +#include "cpulocal.h" + +//------------------------------------------------------------------------------ +// Variables +//------------------------------------------------------------------------------ + +int local_bytes_used = 0; + +//------------------------------------------------------------------------------ +// Public Functions +//------------------------------------------------------------------------------ + +int allocate_local_flag(void) +{ + if (local_bytes_used == LOCALS_SIZE) { + return -1; + } + return local_bytes_used += sizeof(bool); +} diff --git a/system/cpulocal.h b/system/cpulocal.h new file mode 100644 index 0000000..747c69f --- /dev/null +++ b/system/cpulocal.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef CPULOCAL_H +#define CPULOCAL_H +/** + * \file + * + * Provides functions to allocate and access thread-local flags. + * + *//* + * Copyright (C) 2022 Martin Whitaker. + */ + +#include +#include + +#include "boot.h" + +/** + * A single thread-local flag. These are spaced out in memory to ensure each + * flag occupies a different cache line. + */ +typedef struct __attribute__((packed)) { + bool flag; + uint8_t spacing[AP_STACK_SIZE - sizeof(bool)]; +} local_flag_t; + +/** + * Allocates an array of thread-local flags, one per CPU core, and returns + * a ID number that identifies the allocated array. Returns -1 if there is + * insufficient thread local storage remaining to allocate a new array of + * flags. + */ +int allocate_local_flag(void); + +/** + * Returns a pointer to the previously allocated array of thread-local flags + * identified by flag_num. + */ +static inline local_flag_t *local_flags(int flag_num) +{ + // The number returned by allocate_local_flag is the byte offset of the + // flag from the start of the thread-local storage. + return (local_flag_t *)(_stacks + BSP_STACK_SIZE - LOCALS_SIZE + flag_num); +} + +#endif // CPULOCAL_H diff --git a/system/reloc32.c b/system/reloc32.c index c3a80e8..b00c784 100644 --- a/system/reloc32.c +++ b/system/reloc32.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2020 Martin Whitaker. +// Copyright (C) 2020-2022 Martin Whitaker. // // Derived from memtest86+ reloc.c: // @@ -11,6 +11,8 @@ #include #include +#include "assert.h" + //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ @@ -61,13 +63,6 @@ typedef struct #define ELF32_R_TYPE(r_info) ((r_info) & 0xff) -static inline void assert(int expr) -{ - if (!expr) { - __asm__ __volatile__ ("int $3"); - } -} - /* * Return the run-time load address of the shared object. This must be inlined * in a function which uses global data. diff --git a/system/reloc64.c b/system/reloc64.c index 22a273e..7cc251e 100644 --- a/system/reloc64.c +++ b/system/reloc64.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2020 Martin Whitaker. +// Copyright (C) 2020-2022 Martin Whitaker. // // Derived from memtest86+ reloc.c: // @@ -11,6 +11,8 @@ #include #include +#include "assert.h" + //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ @@ -62,13 +64,6 @@ typedef struct #define ELF64_R_TYPE(r_info) ((r_info) & 0xffffffff) -static inline void assert(int expr) -{ - if (!expr) { - __asm__ __volatile__ ("int $3"); - } -} - /* * Return the run-time load address of the shared object. */ diff --git a/system/smp.c b/system/smp.c index 293234a..11433a6 100644 --- a/system/smp.c +++ b/system/smp.c @@ -63,6 +63,10 @@ #define APIC_DELMODE_STARTUP 6 #define APIC_DELMODE_EXTINT 7 +// APIC ICR busy flag + +#define APIC_ICR_BUSY (1 << 12) + // IA32_APIC_BASE MSR bits #define IA32_APIC_ENABLED (1 << 11) @@ -614,18 +618,23 @@ static bool find_cpus_in_rsdp(void) return false; } -static bool send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector, int delay_before_poll) +static inline void send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector) { apic_write(APIC_REG_ICRHI, apic_id << 24); apic_write(APIC_REG_ICRLO, trigger << 15 | level << 14 | mode << 8 | vector); +} + +static bool send_ipi_and_wait(int apic_id, int trigger, int level, int mode, uint8_t vector, int delay_before_poll) +{ + send_ipi(apic_id, trigger, level, mode, vector); usleep(delay_before_poll); // Wait for send complete or timeout after 100ms. int timeout = 1000; while (timeout > 0) { - bool send_pending = (apic_read(APIC_REG_ICRLO) & 0x00001000); + bool send_pending = (apic_read(APIC_REG_ICRLO) & APIC_ICR_BUSY); if (!send_pending) { return true; } @@ -663,13 +672,13 @@ static bool start_cpu(int cpu_num) (void)read_apic_esr(is_p5); // Pulse the INIT IPI. - if (!send_ipi(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0, 0)) { + if (!send_ipi_and_wait(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0, 0)) { return false; } if (use_long_delays) { usleep(10*1000); // 10ms } - if (!send_ipi(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0, 0)) { + if (!send_ipi_and_wait(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0, 0)) { return false; } @@ -679,7 +688,7 @@ static bool start_cpu(int cpu_num) (void)read_apic_esr(is_p5); // Send the STARTUP IPI. - if (!send_ipi(apic_id, 0, 0, APIC_DELMODE_STARTUP, AP_TRAMPOLINE_PAGE, use_long_delays ? 300 : 10)) { + if (!send_ipi_and_wait(apic_id, 0, 0, APIC_DELMODE_STARTUP, AP_TRAMPOLINE_PAGE, use_long_delays ? 300 : 10)) { return false; } @@ -785,9 +794,12 @@ int smp_start(cpu_state_t cpu_state[MAX_CPUS]) #endif } -bool smp_send_nmi(int cpu_num) +void smp_send_nmi(int cpu_num) { - return send_ipi(cpu_num_to_apic_id[cpu_num], 0, 0, APIC_DELMODE_NMI, 0, 200); + while (apic_read(APIC_REG_ICRLO) & APIC_ICR_BUSY) { + __builtin_ia32_pause(); + } + send_ipi(cpu_num_to_apic_id[cpu_num], 0, 0, APIC_DELMODE_NMI, 0); } int smp_my_cpu_num(void) diff --git a/system/smp.h b/system/smp.h index f18bc17..9cf608c 100644 --- a/system/smp.h +++ b/system/smp.h @@ -29,8 +29,7 @@ typedef enum __attribute__ ((packed)) { CPU_STATE_DISABLED = 0, CPU_STATE_ENABLED = 1, - CPU_STATE_RUNNING = 2, - CPU_STATE_HALTED = 3 + CPU_STATE_RUNNING = 2 } cpu_state_t; /** @@ -63,7 +62,7 @@ int smp_start(cpu_state_t cpu_state[MAX_CPUS]); * Sends a non-maskable interrupt to the CPU core whose ordinal number * is cpu_num. */ -bool smp_send_nmi(int cpu_num); +void smp_send_nmi(int cpu_num); /** * Returns the ordinal number of the calling CPU core. diff --git a/tests/test_helper.c b/tests/test_helper.c index ebd0903..cb15abb 100644 --- a/tests/test_helper.c +++ b/tests/test_helper.c @@ -117,10 +117,19 @@ void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segme void flush_caches(int my_cpu) { if (my_cpu >= 0) { - barrier_wait(run_barrier); + bool use_spin_wait = (power_save < POWER_SAVE_HIGH); + if (use_spin_wait) { + barrier_spin_wait(run_barrier); + } else { + barrier_halt_wait(run_barrier); + } if (my_cpu == master_cpu) { cache_flush(); } - barrier_wait(run_barrier); + if (use_spin_wait) { + barrier_spin_wait(run_barrier); + } else { + barrier_halt_wait(run_barrier); + } } } diff --git a/tests/tests.c b/tests/tests.c index 8983d8c..1969204 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -79,7 +79,11 @@ int ticks_per_test[NUM_PASS_TYPES][NUM_TEST_PATTERNS]; if (TRACE_BARRIERS) { \ trace(my_cpu, "Run barrier wait at %s line %i", __FILE__, __LINE__); \ } \ - barrier_wait(run_barrier); \ + if (power_save < POWER_SAVE_HIGH) { \ + barrier_spin_wait(run_barrier); \ + } else { \ + barrier_halt_wait(run_barrier); \ + } \ } int run_test(int my_cpu, int test, int stage, int iterations)