memtest86plus/boot/startup64.S
2020-07-03 22:42:45 +01:00

618 lines
11 KiB
ArmAsm

// SPDX-License-Identifier: GPL-2.0
//
// startup64.S contains the 64-bit startup code for both the BSP and APs.
// It initialises stacks, memory management, and exception handling, clears
// the BSS, completes relocation, and finally calls the main application.
// It supports both the 32-bit and 64-bit Linux boot protocols and EFI boot
// for the first boot of the BSP.
//
// Copyright (C) 2020 Martin Whitaker.
//
// Derived from memtest86+ head.S:
//
// linux/boot/head.S
// Copyright (C) 1991, 1992 Linus Torvalds
// 1-Jan-96 Modified by Chris Brady for use as a boot/loader for MemTest-86.
// Set up the memory management for flat non-paged linear addressing.
// 17 May 2004 : Added X86_PWRCAP for AMD64 (Memtest86+ - Samuel D.)
#define __ASSEMBLY__
#include "boot.h"
#define NUM_INT_VEC 20
.text
.code32
# The Linux 32-bit boot entry point.
.globl startup32
startup32:
cld
cli
# Use a temporary stack until we pick the correct one. We can
# safely use the high address, even if we are loaded low.
movl $(HIGH_LOAD_ADDR + startup_stack_top - startup), %esp
# Get the load address.
movl 0x214(%esi), %ebx
# Save the boot params pointer.
movl %esi, (boot_params_addr - startup32)(%ebx)
# Initialise the pml4 and pdp tables.
leal (pml4 - startup32)(%ebx), %ecx
leal (pdp - startup32)(%ebx), %edx
movl %edx, %eax
addl $0x3, %eax
movl %eax, 0(%ecx)
leal (pd0 - startup32)(%ebx), %eax
addl $0x3, %eax
movl %eax, 0(%edx)
leal (pd1 - startup32)(%ebx), %eax
addl $0x3, %eax
movl %eax, 8(%edx)
leal (pd2 - startup32)(%ebx), %eax
addl $0x3, %eax
movl %eax, 16(%edx)
leal (pd3 - startup32)(%ebx), %eax
addl $0x3, %eax
movl %eax, 24(%edx)
# Set the page directory base address.
movl %ecx, %cr3
# Enable PAE.
movl %cr4, %eax
orl $0x20, %eax
movl %eax, %cr4
# Enable long mode.
movl $0xc0000080, %ecx
rdmsr
orl $0x00000100, %eax
wrmsr
# Enable paging and protection.
movl %cr0, %eax
orl $0x80000001, %eax
movl %eax, %cr0
# Initialise the 64-bit GDT descriptor.
leal (gdt - startup32)(%ebx), %eax
movl %eax, 2 + (gdt_descr - startup32)(%ebx)
# Load the GDT and enter long mode.
lgdt (gdt_descr - startup32)(%ebx)
leal (startup - startup32)(%ebx), %eax
movw $KERNEL_CS, -2(%esp)
movl %eax, -6(%esp)
ljmp *-6(%esp)
.code64
# The EFI PE32+ boot entry point.
.org 0x100
.globl efi_boot
efi_boot:
movq %rcx, %rdi # the EFI image handle
movq %rdx, %rsi # the EFI system table pointer
movq $0, %rdx # the boot params pointer (0 = not yet allocated)
jmp efi_handover
# The Linux 64-bit boot entry point.
.org 0x200
.globl startup64
startup64:
cld
cli
# Save the boot params pointer.
movq %rsi, boot_params_addr(%rip)
jmp startup
# The Linux 64-bit EFI handover point.
.org 0x210
.globl efi_handover
efi_handover:
andq $~0xf, %rsp
call efi_setup
# Save the boot params pointer.
movq %rax, boot_params_addr(%rip)
# The 64-bit entry point for AP boot and for restart after relocation.
.globl startup
startup:
# Use a temporary stack until we pick the correct one.
leaq startup_stack_top(%rip), %rsp
# Pick the correct stack.
xorq %rax, %rax
call smp_my_pcpu_num
movl $AP_STACK_SIZE, %edx
mul %edx
leaq bsp_stack_top(%rip), %rsp
addq %rax, %rsp
# Initialise the pml4 and pdp tables.
leaq pml4(%rip), %rcx
leaq pdp(%rip), %rdx
movq %rdx, %rax
addq $0x3, %rax
movq %rax, 0(%rcx)
leaq pd0(%rip), %rax
addq $0x3, %rax
movq %rax, 0(%rdx)
leaq pd1(%rip), %rax
addq $0x3, %rax
movq %rax, 8(%rdx)
leaq pd2(%rip), %rax
addq $0x3, %rax
movq %rax, 16(%rdx)
leaq pd3(%rip), %rax
addq $0x3, %rax
movq %rax, 24(%rdx)
# Set the page directory base address.
movq %rcx, %cr3
# Initialise the GDT descriptor.
leaq gdt(%rip), %rax
movq %rax, 2 + gdt_descr(%rip)
# Load the GDT and the segment registers.
lgdt gdt_descr(%rip)
leaq flush(%rip), %rax
movw $KERNEL_CS, -2(%rsp)
movl %eax, -6(%rsp)
ljmp *-6(%rsp)
flush: movw $KERNEL_DS, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
movw %ax, %ss
# Initialise the IDT.
leaq idt(%rip), %rdi
leaq vec0(%rip), %rsi
movw $NUM_INT_VEC, %cx
0: movq %rsi, %rdx
movl $(KERNEL_CS << 16), %eax
movw %dx, %ax # selector = 0x0010 = cs
movw $0x8E00, %dx # interrupt gate - dpl=0, present
movl %eax, (%edi)
movl %edx, 4(%edi)
shrq $32, %rdx
movl %edx, 8(%edi)
movl $0, 12(%edi)
addq $(vec1-vec0), %rsi
addq $16, %rdi
dec %cx
jnz 0b
# Initialise the IDT descriptor.
leaq idt(%rip), %rax
movq %rax, 2 + idt_descr(%rip)
# Load the IDT.
lidt idt_descr(%rip)
# Zero the BSS (if first boot).
cmpl $1, first_boot(%rip)
jnz 1f
xorq %rax, %rax
leaq _bss(%rip), %rdi
leaq _end(%rip), %rcx
subq %rdi, %rcx
0: movq %rax, (%rdi)
addq $8, %rdi
subq $8, %rcx
jnz 0b
movl $0, first_boot(%rip)
1:
# Initialise the FPU.
finit
#if 0
# Enable SSE.
movq %cr0, %rax
andw $0xfffb, %ax # clear coprocessor emulation bit
orw $0x0002, %ax # set coprocessor monitoring bit
mov %rax, %cr0
movq %cr4, %rax
orw $0x0600, %ax # set OSFXSR and OSXMMEXCPT
movq %rax, %cr4
#endif
# Call the dynamic linker to fix up the addresses in the GOT.
call reloc
# Run the application.
call main
# In case we return, simulate an exception.
pushfq
xorq %rax, %rax
movw %cs, %ax
pushq %rax
call 0f
0: pushq $0 # error code
pushq $257 # vector
jmp int_handler
# Individual interrupt vector handlers. These need to be spaced equally, to
# allow the IDT initialisation loop above to work, so we use noops to pad out
# where required.
vec0:
pushq $0 # error code
pushq $0 # vector
jmp int_handler
vec1:
pushq $0 # error code
pushq $1 # vector
jmp int_handler
vec2:
pushq $0 # error code
pushq $2 # vector
jmp int_handler
vec3:
pushq $0 # error code
pushq $3 # vector
jmp int_handler
vec4:
pushq $0 # error code
pushq $4 # vector
jmp int_handler
vec5:
pushq $0 # error code
pushq $5 # vector
jmp int_handler
vec6:
pushq $0 # error code
pushq $6 # vector
jmp int_handler
vec7:
pushq $0 # error code
pushq $7 # vector
jmp int_handler
vec8:
nop;nop # error code already provided
pushq $8 # vector
jmp int_handler
vec9:
pushq $0 # error code
pushq $9 # vector
jmp int_handler
vec10:
nop;nop # error code already provided
pushq $10 # vector
jmp int_handler
vec11:
nop;nop # error code already provided
pushq $11 # vector
jmp int_handler
vec12:
nop;nop # error code already provided
pushq $12 # vector
jmp int_handler
vec13:
nop;nop # error code already provided
pushq $13 # vector
jmp int_handler
vec14:
nop;nop # error code already provided
pushq $14 # vector
jmp int_handler
vec15:
pushq $0 # error code
pushq $15 # vector
jmp int_handler
vec16:
pushq $0 # error code
pushq $16 # vector
jmp int_handler
vec17:
nop;nop # error code
pushq $17 # vector
jmp int_handler
vec18:
pushq $0 # error code
pushq $18 # vector
jmp int_handler
vec19:
pushq $0 # error code
pushq $19 # vector
jmp int_handler
# The common interrupt handler code. Pass the register state to the
# application interrupt handler.
int_handler:
pushq %rax
pushq %rbx
pushq %rcx
pushq %rdx
pushq %rdi
pushq %rsi
pushq %rbp
# original stack pointer
leaq 96(%rsp), %rax
pushq %rax
xorq %rax, %rax
movw %ds, %ax
pushq %rax
movw %es, %ax
pushq %rax
movw %ss, %ax
pushq %rax
movq %rsp, %rdi # pointer to trap regs struct on the stack
call interrupt
addq $32, %rsp
popq %rbp
popq %rsi
popq %rdi
popq %rdx
popq %rcx
popq %rbx
popq %rax
addq $16, %rsp
iretq
# The interrupt descriptor table.
.align 4
.word 0 # for alignment
idt_descr:
.word idt_end - idt - 1 # size
.quad 0 # addr: filled in at run time
idt:
.fill NUM_INT_VEC*2, 8, 0 # filled in at run time
idt_end:
# The global descriptor table.
.word 0 # for alignment
gdt_descr:
.word gdt_end - gdt - 1 # size
.quad 0 # addr: filled in at run time
.align 4
.globl gdt
gdt:
.quad 0x0000000000000000 # NULL descriptor
.quad 0x0000000000000000 # not used
.quad 0x00209a0000000000 # 0x10 64-bit code at 0x000000
.quad 0x0000920000000000 # 0x18 64-bit data at 0x000000
.globl gdt_end
gdt_end:
.data
.macro ptes64 start, count=64
.quad \start + 0x0000000 + 0x83
.quad \start + 0x0200000 + 0x83
.quad \start + 0x0400000 + 0x83
.quad \start + 0x0600000 + 0x83
.quad \start + 0x0800000 + 0x83
.quad \start + 0x0A00000 + 0x83
.quad \start + 0x0C00000 + 0x83
.quad \start + 0x0E00000 + 0x83
.if \count-1
ptes64 "(\start+0x01000000)",\count-1
.endif
.endm
.macro maxdepth depth=1
.if \depth-1
maxdepth \depth-1
.endif
.endm
maxdepth
# The level 4 page map table.
.align 4096
.globl pml4
pml4:
.quad 0 # filled in at run time
# Page Directory Pointer Table:
# 4 Entries, pointing to the Page Directory Tables.
.align 4096
.globl pdp
pdp:
.quad 0 # filled in at run time
.quad 0 # filled in at run time
.quad 0 # filled in at run time
.quad 0 # filled in at run time
# Page Directory Tables:
# There are 4 tables. The first two map the first 2 GB of memory. The third
# is used with PAE to map the rest of memory in 1 GB segments. The fourth is
# reserved for mapping the video frame buffer. We use 2 MB pages so only the
# Page Directory Table is used (no page tables).
.align 4096
.globl pd0
pd0:
ptes64 0x0000000000000000
.align 4096
.globl pd1
pd1:
ptes64 0x0000000040000000
.align 4096
.globl pd2
pd2:
ptes64 0x0000000080000000
.align 4096
.globl pd3
pd3:
ptes64 0x00000000C0000000
.previous
# ap_trampoline is the entry point for CPUs other than the bootstrap
# CPU (BSP). It gets copied to a page in low memory, to enable the APs
# to boot when the main program has been loaded in high memory.
.code16
.align 4
.globl ap_trampoline
ap_trampoline:
movw %cs, %ax
movw %ax, %ds
# Patch the jump address.
movl (ap_startup_addr - ap_trampoline), %ebx
movl %ebx, (ap_jump - ap_trampoline + 2)
# Patch and load the GDT descriptor. It should point to the main
# GDT descriptor, which has already been initialised by the BSP.
movl %ebx, %eax
addl $(gdt - startup), %eax
movl %eax, (ap_gdt_descr - ap_trampoline + 2)
lgdt ap_gdt_descr - ap_trampoline
# Set the page directory base address.
movl %ebx, %eax
addl $(pml4 - startup), %eax
movl %eax, %cr3
# Enable PAE.
movl %cr4, %eax
orl $0x20, %eax
movl %eax, %cr4
# Enable long mode.
movl $0xc0000080, %ecx
rdmsr
orl $0x00000100, %eax
wrmsr
# Enable paging and protection.
movl %cr0, %eax
orl $0x80000001, %eax
movl %eax, %cr0
# Jump to the 64-bit entry point.
ap_jump:
data32 ljmp $KERNEL_CS, $0
.align 4
.word 0 # for alignment
ap_gdt_descr:
.word gdt_end - gdt - 1 # gdt limit
.long 0 # gdt base - filled in at run time
.globl ap_startup_addr
ap_startup_addr:
.long 0 # filled in at run time
.globl ap_trampoline_end
ap_trampoline_end:
.previous
# Variables.
.data
.globl boot_params_addr
boot_params_addr:
.quad 0
first_boot:
.long 1
.previous
# Stacks.
.bss
.align 16
bsp_stack_base:
. = . + BSP_STACK_SIZE
bsp_stack_top:
ap_stacks_base:
. = . + (AP_STACK_SIZE * MAX_APS)
ap_stacks_top:
startup_stack_base:
. = . + 64
startup_stack_top:
.previous