From dcac5270687b5cd991f69ac44b9fbc78d6a5fbca Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Wed, 2 Feb 2022 12:20:39 +0000 Subject: [PATCH] Don't make assumptions about usable memory. When using a legacy BIOS, the memory regions used by the BIOS are well defined. This is not the case when using a UEFI BIOS. So include the stack area in the BSS so the loader knows how much memory to allocate, and check we have space to relocate the program to either low or high memory. There are still some assumptions in the USB driver code that need to be fixed. --- app/main.c | 60 ++++++++++++++++++++++++---- boot/boot.h | 12 ++++++ boot/startup32.S | 14 +++++-- boot/startup64.S | 14 +++++-- build32/ldscripts/memtest_shared.lds | 2 + build64/ldscripts/memtest_shared.lds | 2 + system/smp.h | 6 +-- 7 files changed, 92 insertions(+), 18 deletions(-) diff --git a/app/main.c b/app/main.c index a6d97bb..1e63e85 100644 --- a/app/main.c +++ b/app/main.c @@ -64,6 +64,9 @@ static volatile int init_state = 0; static int num_enabled_cpus = 1; +static uintptr_t low_load_addr; +static uintptr_t high_load_addr; + static barrier_t *start_barrier = NULL; static spinlock_t *start_mutex = NULL; @@ -122,7 +125,8 @@ static void run_at(uintptr_t addr, int my_cpu) uintptr_t *new_start_addr = (uintptr_t *)(addr + startup - _start); if (my_cpu == 0) { - memmove((void *)addr, &_start, _end - _start); + // Copy the program code and all data except the stacks. + memcpy((void *)addr, &_start, _stacks - _start); } BARRIER; @@ -134,6 +138,33 @@ static void run_at(uintptr_t addr, int my_cpu) goto *new_start_addr; } +static bool set_load_addr(uintptr_t *load_addr, size_t program_size, uintptr_t lower_limit, uintptr_t upper_limit) +{ + uintptr_t current_start = (uintptr_t)_start; + if (current_start >= lower_limit && current_start < upper_limit) { + *load_addr = current_start; + return true; + } + + for (int i = 0; i < pm_map_size; i++) { + uintptr_t try_start = pm_map[i].start << PAGE_SHIFT; + uintptr_t try_limit = pm_map[i].end << PAGE_SHIFT; + if (try_start == 0) try_start = 0x1000; + uintptr_t try_end = try_start + program_size; + if (try_end > try_limit) continue; + + if (try_start >= upper_limit) break; + if (try_end < lower_limit) continue; + + *load_addr = try_start; + return true; + } + + enable_trace = true; + trace(0, "Insufficient free space in range 0x%x to 0x%x", lower_limit, upper_limit - 1); + return false; +} + static void global_init(void) { floppy_off(); @@ -192,12 +223,25 @@ static void global_init(void) set_scroll_lock(true); } + size_t program_size = (_stacks - _start) + BSP_STACK_SIZE + (num_enabled_cpus - 1) * AP_STACK_SIZE; + + bool load_addr_ok = set_load_addr(& low_load_addr, program_size, 0, SIZE_C(1,MB)) + && set_load_addr(&high_load_addr, program_size, SIZE_C(1,MB), SIZE_C(2,GB)); + + trace(0, "program size %ikB", (int)(program_size / 1024)); + trace(0, " low_load_addr %0*x", 2*sizeof(uintptr_t), low_load_addr); + trace(0, "high_load_addr %0*x", 2*sizeof(uintptr_t), high_load_addr); for (int i = 0; i < pm_map_size; i++) { trace(0, "pm %0*x - %0*x", 2*sizeof(uintptr_t), pm_map[i].start, 2*sizeof(uintptr_t), pm_map[i].end); } if (rsdp_addr != 0) { trace(0, "ACPI RSDP found in %s at %0*x", rsdp_source, 2*sizeof(uintptr_t), rsdp_addr); } + if (!load_addr_ok) { + trace(0, "Cannot relocate program. Press any key to reboot..."); + while (get_key() == 0) { } + reboot(); + } start_barrier = smp_alloc_barrier(1); run_barrier = smp_alloc_barrier(1); @@ -288,7 +332,7 @@ static void test_all_windows(int my_cpu) // Relocation may disrupt the test. window_num = 1; } - if (window_num == 0 && pm_limit_lower >= HIGH_LOAD_ADDR) { + if (window_num == 0 && pm_limit_lower >= high_load_addr) { // Avoid unnecessary relocation. window_num = 1; } @@ -297,12 +341,12 @@ static void test_all_windows(int my_cpu) // Relocate if necessary. if (window_num > 0) { - if (!dummy_run && (uintptr_t)&_start != LOW_LOAD_ADDR) { - run_at(LOW_LOAD_ADDR, my_cpu); + if (!dummy_run && (uintptr_t)&_start != low_load_addr) { + run_at(low_load_addr, my_cpu); } } else { - if (!dummy_run && (uintptr_t)&_start != HIGH_LOAD_ADDR) { - run_at(HIGH_LOAD_ADDR, my_cpu); + if (!dummy_run && (uintptr_t)&_start != high_load_addr) { + run_at(high_load_addr, my_cpu); } } @@ -311,10 +355,10 @@ static void test_all_windows(int my_cpu) switch (window_num) { case 0: window_start = 0; - window_end = (HIGH_LOAD_ADDR >> PAGE_SHIFT); + window_end = (high_load_addr >> PAGE_SHIFT); break; case 1: - window_start = (HIGH_LOAD_ADDR >> PAGE_SHIFT); + window_start = (high_load_addr >> PAGE_SHIFT); window_end = VM_WINDOW_SIZE; break; default: diff --git a/boot/boot.h b/boot/boot.h index 7bdaa23..321b9ad 100644 --- a/boot/boot.h +++ b/boot/boot.h @@ -8,9 +8,19 @@ * Copyright (C) 2020-2022 Martin Whitaker. */ +/* + * NOTE: Increasing the value of MAX_APS would require: + * - relocating the stacks when the program is loaded in low memory + * - modifying smp.c to support the x2APIC architecture + * - adjusting the display if more than 3 digits are needed for CPU IDs + */ +#define MAX_APS 255 /* Maximum number of active APs */ + #define BSP_STACK_SIZE 16384 /* Stack size for the BSP */ #define AP_STACK_SIZE 1024 /* Stack size for each AP */ +#define STACKS_SIZE (BSP_STACK_SIZE + MAX_APS * AP_STACK_SIZE) + #define LOW_LOAD_ADDR 0x00010000 /* The low load address for the main program */ #define HIGH_LOAD_ADDR 0x00100000 /* The high load address for the main program */ @@ -70,6 +80,8 @@ extern uint32_t ap_startup_addr; extern uint8_t ap_trampoline_end[]; +extern uint8_t _stacks[]; + extern uint8_t _end[]; #endif /* ! __ASSEMBLY__ */ diff --git a/boot/startup32.S b/boot/startup32.S index 177dd2d..801096d 100644 --- a/boot/startup32.S +++ b/boot/startup32.S @@ -98,14 +98,13 @@ startup: movl %esi, boot_params_addr@GOTOFF(%ebx) 1: - # Pick the correct stack. The stacks are allocated immediately - # after the end of the loaded program, BSP first, then APs. + # Pick the correct stack. call smp_my_cpu_num movl $AP_STACK_SIZE, %edx mul %edx addl $BSP_STACK_SIZE, %eax - leal _end@GOTOFF(%ebx), %esp + leal _stacks@GOTOFF(%ebx), %esp addl %eax, %esp # Release the mutex that protects the startup stack. @@ -582,3 +581,12 @@ startup_stack_base: startup_stack_top: .previous + +# Main stack area. + + .section "stacks", "aw", @progbits + .align 256 + + . = . + STACKS_SIZE + + .previous diff --git a/boot/startup64.S b/boot/startup64.S index 69656a5..b721c2c 100644 --- a/boot/startup64.S +++ b/boot/startup64.S @@ -149,15 +149,14 @@ startup: jc 0b leaq startup_stack_top(%rip), %rsp - # Pick the correct stack. The stacks are allocated immediately - # after the end of the loaded program, BSP first, then APs. + # Pick the correct stack. xorq %rax, %rax call smp_my_cpu_num movl $AP_STACK_SIZE, %edx mul %edx addq $BSP_STACK_SIZE, %rax - leaq _end(%rip), %rsp + leaq _stacks(%rip), %rsp addq %rax, %rsp # Release the mutex that protects the startup stack. @@ -618,3 +617,12 @@ startup_stack_base: startup_stack_top: .previous + +# Main stack area. + + .section ".stacks", "aw", @nobits + .align 256 + + . = . + STACKS_SIZE + + .previous diff --git a/build32/ldscripts/memtest_shared.lds b/build32/ldscripts/memtest_shared.lds index 3a26a45..5cfcc48 100644 --- a/build32/ldscripts/memtest_shared.lds +++ b/build32/ldscripts/memtest_shared.lds @@ -45,6 +45,8 @@ SECTIONS { *(.bss) *(.bss.*) *(COMMON) + _stacks = .; + *(.stacks) /* _end must be at least 256 byte aligned */ . = ALIGN(256); _end = .; diff --git a/build64/ldscripts/memtest_shared.lds b/build64/ldscripts/memtest_shared.lds index 031e3d1..ef8614c 100644 --- a/build64/ldscripts/memtest_shared.lds +++ b/build64/ldscripts/memtest_shared.lds @@ -45,6 +45,8 @@ SECTIONS { *(.bss) *(.bss.*) *(COMMON) + _stacks = .; + *(.stacks) /* _end must be at least 256 byte aligned */ . = ALIGN(256); _end = .; diff --git a/system/smp.h b/system/smp.h index f5df8bc..cc495b5 100644 --- a/system/smp.h +++ b/system/smp.h @@ -16,11 +16,9 @@ #include "spinlock.h" /* - * The maximum number of CPU cores that can be used. Currently this is limited - * to 256 both by the number of available APIC IDs and the need to fit both - * the program and the CPU stacks in low memory. + * The maximum number of CPU cores that can be used. */ -#define MAX_CPUS 256 +#define MAX_CPUS (1 + MAX_APS) /* * The current state of a CPU core.