mirror of
https://github.com/memtest86plus/memtest86plus.git
synced 2024-11-30 11:03:48 -06:00
4100a44b12
Because we start the APs sequentially, it is unlikely they will coincide for the brief period that they use the temporary startup stack, but we should guard against it. This allows us to remove the mutex around the restart of each AP when relocating, which should improve test times.
621 lines
11 KiB
ArmAsm
621 lines
11 KiB
ArmAsm
// SPDX-License-Identifier: GPL-2.0
|
|
//
|
|
// startup64.S contains the 64-bit startup code for both the BSP and APs.
|
|
// It initialises stacks, memory management, and exception handling, clears
|
|
// the BSS, completes relocation, and finally calls the main application.
|
|
// It supports both the 32-bit and 64-bit Linux boot protocols and EFI boot
|
|
// for the first boot of the BSP.
|
|
//
|
|
// Copyright (C) 2020-2022 Martin Whitaker.
|
|
//
|
|
// Derived from memtest86+ head.S:
|
|
//
|
|
// linux/boot/head.S
|
|
// Copyright (C) 1991, 1992 Linus Torvalds
|
|
// 1-Jan-96 Modified by Chris Brady for use as a boot/loader for MemTest-86.
|
|
// Set up the memory management for flat non-paged linear addressing.
|
|
// 17 May 2004 : Added X86_PWRCAP for AMD64 (Memtest86+ - Samuel D.)
|
|
|
|
#define __ASSEMBLY__
|
|
|
|
#include "boot.h"
|
|
|
|
#define NUM_INT_VEC 20
|
|
|
|
.text
|
|
.code32
|
|
|
|
# The Linux 32-bit boot entry point.
|
|
|
|
.globl startup32
|
|
startup32:
|
|
cld
|
|
cli
|
|
|
|
# Get the load address.
|
|
|
|
movl 0x214(%esi), %ebx # bootparams.code32_start
|
|
|
|
# Save the boot params pointer.
|
|
|
|
movl %esi, (boot_params_addr - startup32)(%ebx)
|
|
|
|
# Use the startup stack until we pick the correct one.
|
|
|
|
leal (startup_stack_top - startup32)(%ebx), %esp
|
|
|
|
# Initialise the pml4 and pdp tables.
|
|
|
|
leal (pml4 - startup32)(%ebx), %ecx
|
|
leal (pdp - startup32)(%ebx), %edx
|
|
movl %edx, %eax
|
|
addl $0x3, %eax
|
|
movl %eax, 0(%ecx)
|
|
leal (pd0 - startup32)(%ebx), %eax
|
|
addl $0x3, %eax
|
|
movl %eax, 0(%edx)
|
|
leal (pd1 - startup32)(%ebx), %eax
|
|
addl $0x3, %eax
|
|
movl %eax, 8(%edx)
|
|
leal (pd2 - startup32)(%ebx), %eax
|
|
addl $0x3, %eax
|
|
movl %eax, 16(%edx)
|
|
leal (pd3 - startup32)(%ebx), %eax
|
|
addl $0x3, %eax
|
|
movl %eax, 24(%edx)
|
|
|
|
# Set the page directory base address.
|
|
|
|
movl %ecx, %cr3
|
|
|
|
# Enable PAE.
|
|
|
|
movl %cr4, %eax
|
|
orl $0x20, %eax
|
|
movl %eax, %cr4
|
|
|
|
# Enable long mode.
|
|
|
|
movl $0xc0000080, %ecx
|
|
rdmsr
|
|
orl $0x00000100, %eax
|
|
wrmsr
|
|
|
|
# Enable paging and protection.
|
|
|
|
movl %cr0, %eax
|
|
orl $0x80000001, %eax
|
|
movl %eax, %cr0
|
|
|
|
# Initialise the 64-bit GDT descriptor.
|
|
|
|
leal (gdt - startup32)(%ebx), %eax
|
|
movl %eax, 2 + (gdt_descr - startup32)(%ebx)
|
|
|
|
# Load the GDT and enter long mode.
|
|
|
|
lgdt (gdt_descr - startup32)(%ebx)
|
|
leal (startup - startup32)(%ebx), %eax
|
|
movw $KERNEL_CS, -2(%esp)
|
|
movl %eax, -6(%esp)
|
|
ljmp *-6(%esp)
|
|
|
|
.code64
|
|
|
|
# The EFI PE32+ boot entry point.
|
|
|
|
.org 0x1e0
|
|
.globl efi_boot
|
|
efi_boot:
|
|
movq %rcx, %rdi # the EFI image handle
|
|
movq %rdx, %rsi # the EFI system table pointer
|
|
movq $0, %rdx # the boot params pointer (0 = not yet allocated)
|
|
jmp efi_handover
|
|
|
|
# The Linux 64-bit boot entry point.
|
|
|
|
.org 0x200
|
|
.globl startup64
|
|
startup64:
|
|
cld
|
|
cli
|
|
|
|
# Save the boot params pointer.
|
|
|
|
movq %rsi, boot_params_addr(%rip)
|
|
|
|
jmp startup
|
|
|
|
# The Linux 64-bit EFI handover point.
|
|
|
|
.org 0x210
|
|
.globl efi_handover
|
|
efi_handover:
|
|
andq $~0xf, %rsp
|
|
call efi_setup
|
|
|
|
# Save the boot params pointer.
|
|
|
|
movq %rax, boot_params_addr(%rip)
|
|
|
|
# The 64-bit entry point for AP boot and for restart after relocation.
|
|
|
|
.globl startup
|
|
startup:
|
|
# Use the startup stack until we pick the correct one. We
|
|
# need to take a mutex to protect our use of the stack.
|
|
|
|
0: lock bts $0, startup_stack_mutex(%rip)
|
|
jc 0b
|
|
leaq startup_stack_top(%rip), %rsp
|
|
|
|
# Pick the correct stack. The stacks are allocated immediately
|
|
# after the end of the loaded program, BSP first, then APs.
|
|
|
|
xorq %rax, %rax
|
|
call smp_my_pcpu_num
|
|
movl $AP_STACK_SIZE, %edx
|
|
mul %edx
|
|
addq $BSP_STACK_SIZE, %rax
|
|
leaq _end(%rip), %rsp
|
|
addq %rax, %rsp
|
|
|
|
# Release the mutex that protects the startup stack.
|
|
movl $0, startup_stack_mutex(%rip)
|
|
|
|
# Initialise the pml4 and pdp tables.
|
|
|
|
leaq pml4(%rip), %rcx
|
|
leaq pdp(%rip), %rdx
|
|
movq %rdx, %rax
|
|
addq $0x3, %rax
|
|
movq %rax, 0(%rcx)
|
|
leaq pd0(%rip), %rax
|
|
addq $0x3, %rax
|
|
movq %rax, 0(%rdx)
|
|
leaq pd1(%rip), %rax
|
|
addq $0x3, %rax
|
|
movq %rax, 8(%rdx)
|
|
leaq pd2(%rip), %rax
|
|
addq $0x3, %rax
|
|
movq %rax, 16(%rdx)
|
|
leaq pd3(%rip), %rax
|
|
addq $0x3, %rax
|
|
movq %rax, 24(%rdx)
|
|
|
|
# Set the page directory base address.
|
|
|
|
movq %rcx, %cr3
|
|
|
|
# Initialise the GDT descriptor.
|
|
|
|
leaq gdt(%rip), %rax
|
|
movq %rax, 2 + gdt_descr(%rip)
|
|
|
|
# Load the GDT and the segment registers.
|
|
|
|
lgdt gdt_descr(%rip)
|
|
leaq flush(%rip), %rax
|
|
movw $KERNEL_CS, -2(%rsp)
|
|
movl %eax, -6(%rsp)
|
|
ljmp *-6(%rsp)
|
|
flush: movw $KERNEL_DS, %ax
|
|
movw %ax, %ds
|
|
movw %ax, %es
|
|
movw %ax, %fs
|
|
movw %ax, %gs
|
|
movw %ax, %ss
|
|
|
|
# Initialise the IDT.
|
|
|
|
leaq idt(%rip), %rdi
|
|
leaq vec0(%rip), %rsi
|
|
movw $NUM_INT_VEC, %cx
|
|
0: movq %rsi, %rdx
|
|
movl $(KERNEL_CS << 16), %eax
|
|
movw %dx, %ax # selector = 0x0010 = cs
|
|
movw $0x8E00, %dx # interrupt gate - dpl=0, present
|
|
movl %eax, (%edi)
|
|
movl %edx, 4(%edi)
|
|
shrq $32, %rdx
|
|
movl %edx, 8(%edi)
|
|
movl $0, 12(%edi)
|
|
addq $(vec1-vec0), %rsi
|
|
addq $16, %rdi
|
|
dec %cx
|
|
jnz 0b
|
|
|
|
# Initialise the IDT descriptor.
|
|
|
|
leaq idt(%rip), %rax
|
|
movq %rax, 2 + idt_descr(%rip)
|
|
|
|
# Load the IDT.
|
|
|
|
lidt idt_descr(%rip)
|
|
|
|
# Zero the BSS (if first boot).
|
|
|
|
cmpl $1, first_boot(%rip)
|
|
jnz 1f
|
|
xorq %rax, %rax
|
|
leaq _bss(%rip), %rdi
|
|
leaq _end(%rip), %rcx
|
|
subq %rdi, %rcx
|
|
0: movq %rax, (%rdi)
|
|
addq $8, %rdi
|
|
subq $8, %rcx
|
|
jnz 0b
|
|
movl $0, first_boot(%rip)
|
|
1:
|
|
# Initialise the FPU.
|
|
|
|
finit
|
|
|
|
#if 0
|
|
# Enable SSE.
|
|
|
|
movq %cr0, %rax
|
|
andw $0xfffb, %ax # clear coprocessor emulation bit
|
|
orw $0x0002, %ax # set coprocessor monitoring bit
|
|
mov %rax, %cr0
|
|
movq %cr4, %rax
|
|
orw $0x0600, %ax # set OSFXSR and OSXMMEXCPT
|
|
movq %rax, %cr4
|
|
#endif
|
|
|
|
# Call the dynamic linker to fix up the addresses in the GOT.
|
|
|
|
call reloc
|
|
|
|
# Run the application.
|
|
|
|
call main
|
|
|
|
# In case we return, simulate an exception.
|
|
|
|
pushfq
|
|
xorq %rax, %rax
|
|
movw %cs, %ax
|
|
pushq %rax
|
|
call 0f
|
|
0: pushq $0 # error code
|
|
pushq $257 # vector
|
|
jmp int_handler
|
|
|
|
# Individual interrupt vector handlers. These need to be spaced equally, to
|
|
# allow the IDT initialisation loop above to work, so we use noops to pad out
|
|
# where required.
|
|
|
|
vec0:
|
|
pushq $0 # error code
|
|
pushq $0 # vector
|
|
jmp int_handler
|
|
vec1:
|
|
pushq $0 # error code
|
|
pushq $1 # vector
|
|
jmp int_handler
|
|
|
|
vec2:
|
|
pushq $0 # error code
|
|
pushq $2 # vector
|
|
jmp int_handler
|
|
|
|
vec3:
|
|
pushq $0 # error code
|
|
pushq $3 # vector
|
|
jmp int_handler
|
|
|
|
vec4:
|
|
pushq $0 # error code
|
|
pushq $4 # vector
|
|
jmp int_handler
|
|
|
|
vec5:
|
|
pushq $0 # error code
|
|
pushq $5 # vector
|
|
jmp int_handler
|
|
|
|
vec6:
|
|
pushq $0 # error code
|
|
pushq $6 # vector
|
|
jmp int_handler
|
|
|
|
vec7:
|
|
pushq $0 # error code
|
|
pushq $7 # vector
|
|
jmp int_handler
|
|
|
|
vec8:
|
|
nop;nop # error code already provided
|
|
pushq $8 # vector
|
|
jmp int_handler
|
|
|
|
vec9:
|
|
pushq $0 # error code
|
|
pushq $9 # vector
|
|
jmp int_handler
|
|
|
|
vec10:
|
|
nop;nop # error code already provided
|
|
pushq $10 # vector
|
|
jmp int_handler
|
|
|
|
vec11:
|
|
nop;nop # error code already provided
|
|
pushq $11 # vector
|
|
jmp int_handler
|
|
|
|
vec12:
|
|
nop;nop # error code already provided
|
|
pushq $12 # vector
|
|
jmp int_handler
|
|
|
|
vec13:
|
|
nop;nop # error code already provided
|
|
pushq $13 # vector
|
|
jmp int_handler
|
|
|
|
vec14:
|
|
nop;nop # error code already provided
|
|
pushq $14 # vector
|
|
jmp int_handler
|
|
|
|
vec15:
|
|
pushq $0 # error code
|
|
pushq $15 # vector
|
|
jmp int_handler
|
|
|
|
vec16:
|
|
pushq $0 # error code
|
|
pushq $16 # vector
|
|
jmp int_handler
|
|
|
|
vec17:
|
|
nop;nop # error code
|
|
pushq $17 # vector
|
|
jmp int_handler
|
|
|
|
vec18:
|
|
pushq $0 # error code
|
|
pushq $18 # vector
|
|
jmp int_handler
|
|
|
|
vec19:
|
|
pushq $0 # error code
|
|
pushq $19 # vector
|
|
jmp int_handler
|
|
|
|
# The common interrupt handler code. Pass the register state to the
|
|
# application interrupt handler.
|
|
|
|
int_handler:
|
|
pushq %rax
|
|
pushq %rbx
|
|
pushq %rcx
|
|
pushq %rdx
|
|
pushq %rdi
|
|
pushq %rsi
|
|
pushq %rbp
|
|
|
|
# original stack pointer
|
|
leaq 96(%rsp), %rax
|
|
pushq %rax
|
|
xorq %rax, %rax
|
|
movw %ds, %ax
|
|
pushq %rax
|
|
movw %es, %ax
|
|
pushq %rax
|
|
movw %ss, %ax
|
|
pushq %rax
|
|
movq %rsp, %rdi # pointer to trap regs struct on the stack
|
|
call interrupt
|
|
addq $32, %rsp
|
|
|
|
popq %rbp
|
|
popq %rsi
|
|
popq %rdi
|
|
popq %rdx
|
|
popq %rcx
|
|
popq %rbx
|
|
popq %rax
|
|
addq $16, %rsp
|
|
iretq
|
|
|
|
# The interrupt descriptor table.
|
|
|
|
.align 4
|
|
.word 0 # for alignment
|
|
idt_descr:
|
|
.word idt_end - idt - 1 # size
|
|
.quad 0 # addr: filled in at run time
|
|
|
|
idt:
|
|
.fill NUM_INT_VEC*2, 8, 0 # filled in at run time
|
|
idt_end:
|
|
|
|
# The global descriptor table.
|
|
|
|
.word 0 # for alignment
|
|
gdt_descr:
|
|
.word gdt_end - gdt - 1 # size
|
|
.quad 0 # addr: filled in at run time
|
|
|
|
.align 4
|
|
.globl gdt
|
|
gdt:
|
|
.quad 0x0000000000000000 # NULL descriptor
|
|
.quad 0x0000000000000000 # not used
|
|
.quad 0x00209a0000000000 # 0x10 64-bit code at 0x000000
|
|
.quad 0x0000920000000000 # 0x18 64-bit data at 0x000000
|
|
|
|
.globl gdt_end
|
|
gdt_end:
|
|
|
|
.data
|
|
|
|
.macro ptes64 start, count=64
|
|
.quad \start + 0x0000000 + 0x83
|
|
.quad \start + 0x0200000 + 0x83
|
|
.quad \start + 0x0400000 + 0x83
|
|
.quad \start + 0x0600000 + 0x83
|
|
.quad \start + 0x0800000 + 0x83
|
|
.quad \start + 0x0A00000 + 0x83
|
|
.quad \start + 0x0C00000 + 0x83
|
|
.quad \start + 0x0E00000 + 0x83
|
|
.if \count-1
|
|
ptes64 "(\start+0x01000000)",\count-1
|
|
.endif
|
|
.endm
|
|
|
|
.macro maxdepth depth=1
|
|
.if \depth-1
|
|
maxdepth \depth-1
|
|
.endif
|
|
.endm
|
|
|
|
maxdepth
|
|
|
|
# The level 4 page map table.
|
|
|
|
.align 4096
|
|
.globl pml4
|
|
pml4:
|
|
.quad 0 # filled in at run time
|
|
|
|
# Page Directory Pointer Table:
|
|
# 4 Entries, pointing to the Page Directory Tables.
|
|
|
|
.align 4096
|
|
.globl pdp
|
|
pdp:
|
|
.quad 0 # filled in at run time
|
|
.quad 0 # filled in at run time
|
|
.quad 0 # filled in at run time
|
|
.quad 0 # filled in at run time
|
|
|
|
# Page Directory Tables:
|
|
# There are 4 tables. The first two map the first 2 GB of memory. The third
|
|
# is used with PAE to map the rest of memory in 1 GB segments. The fourth is
|
|
# reserved for mapping the video frame buffer. We use 2 MB pages so only the
|
|
# Page Directory Table is used (no page tables).
|
|
|
|
.align 4096
|
|
.globl pd0
|
|
pd0:
|
|
ptes64 0x0000000000000000
|
|
|
|
.align 4096
|
|
.globl pd1
|
|
pd1:
|
|
ptes64 0x0000000040000000
|
|
|
|
.align 4096
|
|
.globl pd2
|
|
pd2:
|
|
ptes64 0x0000000080000000
|
|
|
|
.align 4096
|
|
.globl pd3
|
|
pd3:
|
|
ptes64 0x00000000C0000000
|
|
|
|
.previous
|
|
|
|
# ap_trampoline is the entry point for CPUs other than the bootstrap
|
|
# CPU (BSP). It gets copied to a page in low memory, to enable the APs
|
|
# to boot when the main program has been loaded in high memory.
|
|
|
|
.code16
|
|
.align 4
|
|
|
|
.globl ap_trampoline
|
|
ap_trampoline:
|
|
movw %cs, %ax
|
|
movw %ax, %ds
|
|
|
|
# Patch the jump address.
|
|
|
|
movl (ap_startup_addr - ap_trampoline), %ebx
|
|
movl %ebx, (ap_jump - ap_trampoline + 2)
|
|
|
|
# Patch and load the GDT descriptor. It should point to the main
|
|
# GDT descriptor, which has already been initialised by the BSP.
|
|
|
|
movl %ebx, %eax
|
|
addl $(gdt - startup), %eax
|
|
movl %eax, (ap_gdt_descr - ap_trampoline + 2)
|
|
lgdt ap_gdt_descr - ap_trampoline
|
|
|
|
# Set the page directory base address.
|
|
|
|
movl %ebx, %eax
|
|
addl $(pml4 - startup), %eax
|
|
movl %eax, %cr3
|
|
|
|
# Enable PAE.
|
|
|
|
movl %cr4, %eax
|
|
orl $0x20, %eax
|
|
movl %eax, %cr4
|
|
|
|
# Enable long mode.
|
|
|
|
movl $0xc0000080, %ecx
|
|
rdmsr
|
|
orl $0x00000100, %eax
|
|
wrmsr
|
|
|
|
# Enable paging and protection.
|
|
|
|
movl %cr0, %eax
|
|
orl $0x80000001, %eax
|
|
movl %eax, %cr0
|
|
|
|
# Jump to the 64-bit entry point.
|
|
ap_jump:
|
|
data32 ljmp $KERNEL_CS, $0
|
|
|
|
.align 4
|
|
.word 0 # for alignment
|
|
ap_gdt_descr:
|
|
.word gdt_end - gdt - 1 # gdt limit
|
|
.long 0 # gdt base - filled in at run time
|
|
|
|
.globl ap_startup_addr
|
|
ap_startup_addr:
|
|
.long 0 # filled in at run time
|
|
|
|
.globl ap_trampoline_end
|
|
ap_trampoline_end:
|
|
|
|
.previous
|
|
|
|
# Variables.
|
|
|
|
.data
|
|
.align 4
|
|
|
|
.globl boot_params_addr
|
|
boot_params_addr:
|
|
.quad 0
|
|
|
|
startup_stack_mutex:
|
|
.long 0
|
|
|
|
first_boot:
|
|
.long 1
|
|
|
|
.previous
|
|
|
|
# Startup stack.
|
|
|
|
.bss
|
|
.align 16
|
|
|
|
startup_stack_base:
|
|
. = . + 64
|
|
startup_stack_top:
|
|
|
|
.previous
|