Optimise the barrier wait code.

This reduces the number of instructions between decrementing the count
and halting in the halt wait case. Use the same code for the spin wait
case for consistency.
This commit is contained in:
Martin Whitaker 2022-03-08 23:07:23 +00:00
parent ed0fd7830f
commit 0076e63885

View File

@ -42,7 +42,7 @@ void barrier_spin_wait(barrier_t *barrier)
local_flag_t *waiting_flags = local_flags(barrier->flag_num); local_flag_t *waiting_flags = local_flags(barrier->flag_num);
int my_cpu = smp_my_cpu_num(); int my_cpu = smp_my_cpu_num();
waiting_flags[my_cpu].flag = true; waiting_flags[my_cpu].flag = true;
if (__sync_fetch_and_sub(&barrier->count, 1) > 1) { if (__sync_sub_and_fetch(&barrier->count, 1) != 0) {
volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag; volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag;
while (*i_am_blocked) { while (*i_am_blocked) {
__builtin_ia32_pause(); __builtin_ia32_pause();
@ -66,7 +66,7 @@ void barrier_halt_wait(barrier_t *barrier)
local_flag_t *waiting_flags = local_flags(barrier->flag_num); local_flag_t *waiting_flags = local_flags(barrier->flag_num);
int my_cpu = smp_my_cpu_num(); int my_cpu = smp_my_cpu_num();
waiting_flags[my_cpu].flag = true; waiting_flags[my_cpu].flag = true;
if (__sync_fetch_and_sub(&barrier->count, 1) > 1) { if (__sync_sub_and_fetch(&barrier->count, 1) != 0) {
__asm__ __volatile__ ("hlt"); __asm__ __volatile__ ("hlt");
return; return;
} }