// SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2020-2022 Martin Whitaker. #include #include #include "cpulocal.h" #include "smp.h" #include "assert.h" #include "barrier.h" //------------------------------------------------------------------------------ // Public Functions //------------------------------------------------------------------------------ void barrier_init(barrier_t *barrier, int num_threads) { barrier->flag_num = allocate_local_flag(); assert(barrier->flag_num >= 0); barrier_reset(barrier, num_threads); } void barrier_reset(barrier_t *barrier, int num_threads) { barrier->num_threads = num_threads; barrier->count = num_threads; local_flag_t *waiting_flags = local_flags(barrier->flag_num); for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { waiting_flags[cpu_num].flag = false; } } void barrier_spin_wait(barrier_t *barrier) { if (barrier == NULL || barrier->num_threads < 2) { return; } local_flag_t *waiting_flags = local_flags(barrier->flag_num); int my_cpu = smp_my_cpu_num(); waiting_flags[my_cpu].flag = true; if (__sync_sub_and_fetch(&barrier->count, 1) != 0) { volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag; while (*i_am_blocked) { #if defined(__x86_64) || defined(__i386__) __builtin_ia32_pause(); #elif defined (__loongarch_lp64) __asm__ __volatile__ ( "nop \n\t" \ "nop \n\t" \ "nop \n\t" \ "nop \n\t" \ "nop \n\t" \ "nop \n\t" \ "nop \n\t" \ "nop \n\t" \ ); #endif } return; } // Last one here, so reset the barrier and wake the others. No need to // check if a CPU core is actually waiting - just clear all the flags. barrier->count = barrier->num_threads; __sync_synchronize(); for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { waiting_flags[cpu_num].flag = false; } } void barrier_halt_wait(barrier_t *barrier) { if (barrier == NULL || barrier->num_threads < 2) { return; } local_flag_t *waiting_flags = local_flags(barrier->flag_num); int my_cpu = smp_my_cpu_num(); waiting_flags[my_cpu].flag = true; // // There is a small window of opportunity for the wakeup signal to arrive // between us decrementing the barrier count and halting. So code the // following in assembler, both to ensure the window of opportunity is as // small as possible, and also to allow us to detect and skip over the // halt in the interrupt handler. // // if (__sync_sub_and_fetch(&barrier->count, 1) != 0) { // __asm__ __volatile__ ("hlt"); // return; // } // #if defined(__i386__) || defined(__x86_64__) __asm__ goto ("\t" "lock decl %0 \n\t" "je 0f \n\t" "hlt \n\t" "jmp %l[end] \n" "0: \n" : /* no outputs */ : "m" (barrier->count) : /* no clobbers */ : end ); #elif defined(__loongarch_lp64) __asm__ goto ("\t" "li.w $t0, -1\n\t" \ "li.w $t2, 1\n\t" \ "amadd_db.w $t1, $t0, %0\n\t" \ "bge $t2, $t1, 0f\n\t" \ "1: \n\t" \ "idle 0x0\n\t" \ "b 1b\n\t" \ "bl %l[end]\n\t" \ "0:\n\t" \ : /* no outputs */ : "r" (&(barrier->count)) : "$t0", "t1", "$t2" : end ); #endif // Last one here, so reset the barrier and wake the others. barrier->count = barrier->num_threads; __sync_synchronize(); waiting_flags[my_cpu].flag = false; for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) { if (waiting_flags[cpu_num].flag) { waiting_flags[cpu_num].flag = false; smp_send_nmi(cpu_num); } } end: return; }