diff --git a/app/interrupt.c b/app/interrupt.c index ab26782..2266a85 100644 --- a/app/interrupt.c +++ b/app/interrupt.c @@ -10,6 +10,7 @@ #include +#include "cpuid.h" #include "hwctrl.h" #include "keyboard.h" #include "screen.h" @@ -25,6 +26,7 @@ //------------------------------------------------------------------------------ #define HLT_OPCODE 0xf4 +#define JE_OPCODE 0x74 #ifdef __x86_64__ #define REG_PREFIX "r" @@ -120,7 +122,27 @@ void interrupt(struct trap_regs *trap_regs) if (trap_regs->vect == 2) { uint8_t *pc = (uint8_t *)trap_regs->ip; if (pc[-1] == HLT_OPCODE) { - // Assume this is a wakeup signal sent via IPI. + // Assume this is a barrier wakeup signal sent via IPI. + return; + } + // Catch the rare case that a core will fail to reach the HLT instruction before + // its wakeup signal arrives. The barrier code contains an atomic decrement, a JE + // instruction (two bytes), and a HLT instruction (one byte). The atomic decrement + // must have completed if another core has reached the point of sending the wakeup + // signals, so we should find the HLT opcode either at pc[0] or at pc[2]. If we find + // it, adjust the interrupt return address to point to the following instruction. + if (pc[0] == HLT_OPCODE || (pc[0] == JE_OPCODE && pc[2] == HLT_OPCODE)) { + uintptr_t *return_addr; + if (cpuid_info.flags.lm == 1) { + return_addr = (uintptr_t *)(trap_regs->sp - 40); + } else { + return_addr = (uintptr_t *)(trap_regs->sp - 12); + } + if (pc[2] == HLT_OPCODE) { + *return_addr += 3; + } else { + *return_addr += 1; + } return; } #if REPORT_PARITY_ERRORS diff --git a/lib/barrier.c b/lib/barrier.c index 9a4983a..17b8eb8 100644 --- a/lib/barrier.c +++ b/lib/barrier.c @@ -66,10 +66,29 @@ void barrier_halt_wait(barrier_t *barrier) local_flag_t *waiting_flags = local_flags(barrier->flag_num); int my_cpu = smp_my_cpu_num(); waiting_flags[my_cpu].flag = true; - if (__sync_sub_and_fetch(&barrier->count, 1) != 0) { - __asm__ __volatile__ ("hlt"); - return; - } + // + // There is a small window of opportunity for the wakeup signal to arrive + // between us decrementing the barrier count and halting. So code the + // following in assembler, both to ensure the window of opportunity is as + // small as possible, and also to allow us to detect and skip over the + // halt in the interrupt handler. + // + // if (__sync_sub_and_fetch(&barrier->count, 1) != 0) { + // __asm__ __volatile__ ("hlt"); + // return; + // } + // + __asm__ goto ("\t" + "lock decl %0 \n\t" + "je 0f \n\t" + "hlt \n\t" + "jmp %l[end] \n" + "0: \n" + : /* no outputs */ + : "m" (barrier->count) + : /* no clobbers */ + : end + ); // Last one here, so reset the barrier and wake the others. barrier->count = barrier->num_threads; __sync_synchronize(); @@ -80,4 +99,6 @@ void barrier_halt_wait(barrier_t *barrier) smp_send_nmi(cpu_num); } } +end: + return; }