mirror of
https://github.com/memtest86plus/memtest86plus.git
synced 2025-01-06 12:33:01 -06:00
Add initial NUMA awareness support (#378)
* Add a file containing useful macro definitions, currently a single top-level macro for obtaining the size of an array; use it to replace a sizeof(x) / sizeof(x[0]) construct in system/smbus.c . This requires switching the GCC build mode from C11 to C11 with GCC extensions. * Initial NUMA awareness (#12) support: parse the ACPI SRAT to build up new internal structures related to proximity domains and affinity; use these structures in setup_vm_map() and calculate_chunk() to skip the work on the processors which don't belong to the proximity domain currently being tested. Tested on a number of 1S single-domain, 2S multi-domain and 4S multi-domain platforms. SKIP_RANGE(iterations) trick by Martin Whitaker.
This commit is contained in:
parent
ded371e9da
commit
53ca89f8ae
@ -98,6 +98,7 @@ bool enable_trace = false;
|
||||
bool enable_sm = true;
|
||||
bool enable_bench = true;
|
||||
bool enable_mch_read = true;
|
||||
bool enable_numa = false;
|
||||
|
||||
bool enable_ecc_polling = false;
|
||||
|
||||
@ -245,6 +246,10 @@ static void parse_option(const char *option, const char *params)
|
||||
enable_sm = false;
|
||||
} else if (strncmp(option, "nosmp", 6) == 0) {
|
||||
smp_enabled = false;
|
||||
} else if (strncmp(option, "numa", 5) == 0) {
|
||||
enable_numa = true;
|
||||
} else if (strncmp(option, "nonuma", 7) == 0) {
|
||||
enable_numa = false;
|
||||
} else if (strncmp(option, "powersave", 10) == 0) {
|
||||
if (strncmp(params, "off", 4) == 0) {
|
||||
power_save = POWER_SAVE_OFF;
|
||||
|
@ -60,6 +60,7 @@ extern bool enable_tty;
|
||||
extern bool enable_bench;
|
||||
extern bool enable_mch_read;
|
||||
extern bool enable_ecc_polling;
|
||||
extern bool enable_numa;
|
||||
|
||||
extern bool pause_at_start;
|
||||
|
||||
|
@ -343,6 +343,14 @@ void display_start_test(void)
|
||||
display_test_description(test_list[test_num].description);
|
||||
test_bar_length = 0;
|
||||
test_ticks = 0;
|
||||
|
||||
#if 0
|
||||
uint64_t current_time = get_tsc();
|
||||
int secs = (current_time - run_start_time) / (1000 * (uint64_t)clks_per_msec);
|
||||
int mins = secs / 60; secs %= 60;
|
||||
int hours = mins / 60; mins %= 60;
|
||||
do_trace(0, "T %i: %i:%02i:%02i", test_num, hours, mins, secs);
|
||||
#endif
|
||||
}
|
||||
|
||||
void display_error_count(void)
|
||||
|
67
app/main.c
67
app/main.c
@ -114,6 +114,7 @@ spinlock_t *error_mutex = NULL;
|
||||
|
||||
vm_map_t vm_map[MAX_MEM_SEGMENTS];
|
||||
int vm_map_size = 0;
|
||||
uint32_t proximity_domains[MAX_CPUS];
|
||||
|
||||
int pass_num = 0;
|
||||
int test_num = 0;
|
||||
@ -242,6 +243,11 @@ static void global_init(void)
|
||||
|
||||
smp_init(smp_enabled);
|
||||
|
||||
// Force disable the NUMA code paths when no proximity domain was found.
|
||||
if (num_proximity_domains == 0) {
|
||||
enable_numa = false;
|
||||
}
|
||||
|
||||
// At this point we have started reserving physical pages in the memory
|
||||
// map for data structures that need to be permanently pinned in place.
|
||||
// This may overwrite any data structures passed to us by the BIOS and/or
|
||||
@ -267,7 +273,12 @@ static void global_init(void)
|
||||
num_enabled_cpus = 0;
|
||||
for (int i = 0; i < num_available_cpus; i++) {
|
||||
if (cpu_state[i] == CPU_STATE_ENABLED) {
|
||||
chunk_index[i] = num_enabled_cpus;
|
||||
if (enable_numa) {
|
||||
uint32_t proximity_domain_idx = smp_get_proximity_domain_idx(i);
|
||||
chunk_index[i] = smp_alloc_cpu_in_proximity_domain(proximity_domain_idx);
|
||||
} else {
|
||||
chunk_index[i] = num_enabled_cpus;
|
||||
}
|
||||
num_enabled_cpus++;
|
||||
}
|
||||
}
|
||||
@ -299,7 +310,10 @@ static void global_init(void)
|
||||
if (acpi_config.rsdp_addr != 0) {
|
||||
trace(0, "ACPI RSDP (v%u.%u) found in %s at %0*x", acpi_config.ver_maj, acpi_config.ver_min, rsdp_source, 2*sizeof(uintptr_t), acpi_config.rsdp_addr);
|
||||
trace(0, "ACPI FADT found at %0*x", 2*sizeof(uintptr_t), acpi_config.fadt_addr);
|
||||
trace(0, "ACPI SRAT found at %0*x", 2*sizeof(uintptr_t), acpi_config.srat_addr);
|
||||
//trace(0, "ACPI SLIT found at %0*x", 2*sizeof(uintptr_t), acpi_config.slit_addr);
|
||||
}
|
||||
|
||||
if (!load_addr_ok) {
|
||||
trace(0, "Cannot relocate program. Press any key to reboot...");
|
||||
while (get_key() == 0) { }
|
||||
@ -360,6 +374,7 @@ static void setup_vm_map(uintptr_t win_start, uintptr_t win_end)
|
||||
// Now initialise the virtual memory map with the intersection
|
||||
// of the window and the physical memory segments.
|
||||
for (int i = 0; i < pm_map_size; i++) {
|
||||
// These are page numbers.
|
||||
uintptr_t seg_start = pm_map[i].start;
|
||||
uintptr_t seg_end = pm_map[i].end;
|
||||
if (seg_start <= win_start) {
|
||||
@ -369,13 +384,53 @@ static void setup_vm_map(uintptr_t win_start, uintptr_t win_end)
|
||||
seg_end = win_end;
|
||||
}
|
||||
if (seg_start < seg_end && seg_start < win_end && seg_end > win_start) {
|
||||
num_mapped_pages += seg_end - seg_start;
|
||||
vm_map[vm_map_size].pm_base_addr = seg_start;
|
||||
vm_map[vm_map_size].start = first_word_mapping(seg_start);
|
||||
vm_map[vm_map_size].end = last_word_mapping(seg_end - 1, sizeof(testword_t));
|
||||
vm_map_size++;
|
||||
// We need to test part of that physical memory segment.
|
||||
if (enable_numa) {
|
||||
// Now also pay attention to proximity domains, which are based on physical addresses.
|
||||
uint64_t orig_start = (uint64_t)seg_start << PAGE_SHIFT;
|
||||
uint64_t orig_end = (uint64_t)seg_end << PAGE_SHIFT;
|
||||
uint32_t proximity_domain_idx;
|
||||
uint64_t new_start;
|
||||
uint64_t new_end;
|
||||
|
||||
while (1) {
|
||||
if (smp_narrow_to_proximity_domain(orig_start, orig_end, &proximity_domain_idx, &new_start, &new_end)) {
|
||||
// Create a new entry in the virtual memory map.
|
||||
num_mapped_pages += (new_end - new_start) >> PAGE_SHIFT;
|
||||
vm_map[vm_map_size].pm_base_addr = new_start >> PAGE_SHIFT;
|
||||
vm_map[vm_map_size].start = first_word_mapping(new_start >> PAGE_SHIFT);
|
||||
vm_map[vm_map_size].end = last_word_mapping((new_end >> PAGE_SHIFT) - 1, sizeof(testword_t));
|
||||
vm_map[vm_map_size].proximity_domain_idx = proximity_domain_idx;
|
||||
vm_map_size++;
|
||||
if (new_start != orig_start || new_end != orig_end) {
|
||||
// Proceed to the next part of the range.
|
||||
orig_start = new_end; // No shift here, we already have a physical address.
|
||||
orig_end = (uint64_t)seg_end << PAGE_SHIFT;
|
||||
} else {
|
||||
// We're done with this range.
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Could not match with proximity domain, fall back to default behaviour. This shouldn't happen !
|
||||
vm_map[vm_map_size].proximity_domain_idx = 0;
|
||||
goto non_numa_vm_map_entry;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
non_numa_vm_map_entry:
|
||||
num_mapped_pages += seg_end - seg_start;
|
||||
vm_map[vm_map_size].pm_base_addr = seg_start;
|
||||
vm_map[vm_map_size].start = first_word_mapping(seg_start);
|
||||
vm_map[vm_map_size].end = last_word_mapping(seg_end - 1, sizeof(testword_t));
|
||||
vm_map_size++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
do_trace(0, "vm %0*x - %0*x", 2*sizeof(uintptr_t), vm_map[i].start, 2*sizeof(uintptr_t), vm_map[i].end);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_all_windows(int my_cpu)
|
||||
|
@ -22,9 +22,14 @@
|
||||
/**
|
||||
* A mapping from a CPU core number to the index number of the memory chunk
|
||||
* it operates on when performing a memory test in parallel across all the
|
||||
* enabled cores.
|
||||
* enabled cores (in the current proximity domain, when NUMA awareness is
|
||||
* enabled).
|
||||
*/
|
||||
extern uint8_t chunk_index[MAX_CPUS];
|
||||
/**
|
||||
* An array where the count of used CPUs in the current proximity domain.
|
||||
*/
|
||||
extern uint8_t used_cpus_in_proximity_domain[MAX_PROXIMITY_DOMAINS];
|
||||
|
||||
/*
|
||||
* The number of CPU cores being used for the current test. This is always
|
||||
@ -87,6 +92,7 @@ typedef struct {
|
||||
uintptr_t pm_base_addr;
|
||||
testword_t *start;
|
||||
testword_t *end;
|
||||
uint32_t proximity_domain_idx;
|
||||
} vm_map_t;
|
||||
|
||||
/**
|
||||
|
30
boot/macros.h
Normal file
30
boot/macros.h
Normal file
@ -0,0 +1,30 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef MACROS_H
|
||||
#define MACROS_H
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* Provides miscellaneous useful definitions.
|
||||
*
|
||||
*//*
|
||||
* Copyright (C) 2024 Lionel Debroux.
|
||||
*/
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#ifdef __GNUC__
|
||||
// Enhanced definitions under GCC and compatible, e.g. Clang.
|
||||
|
||||
// These are from GPLv2 Linux 6.7, for erroring out when the argument isn't an array type.
|
||||
#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
|
||||
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
|
||||
#else
|
||||
// Fallback definitions.
|
||||
#define ARRAY_SIZE(var_) (sizeof(var_) / sizeof((var_)[0]))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -10,7 +10,7 @@ else
|
||||
GIT_AVAILABLE = true
|
||||
endif
|
||||
|
||||
CFLAGS = -std=c11 -Wall -Wextra -Wshadow -m32 -march=i586 -fpic -fno-builtin \
|
||||
CFLAGS = -std=gnu11 -Wall -Wextra -Wshadow -m32 -march=i586 -fpic -fno-builtin \
|
||||
-ffreestanding -fomit-frame-pointer -fno-stack-protector
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
|
@ -10,7 +10,7 @@ else
|
||||
GIT_AVAILABLE = true
|
||||
endif
|
||||
|
||||
CFLAGS = -std=c11 -Wall -Wextra -Wshadow -m64 -march=x86-64 -mno-mmx -mno-sse -mno-sse2 \
|
||||
CFLAGS = -std=gnu11 -Wall -Wextra -Wshadow -m64 -march=x86-64 -mno-mmx -mno-sse -mno-sse2 \
|
||||
-fpic -fno-builtin -ffreestanding -fomit-frame-pointer -fno-stack-protector
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
|
@ -64,18 +64,6 @@ typedef struct {
|
||||
uint8_t reserved[3];
|
||||
} rsdp_t;
|
||||
|
||||
typedef struct {
|
||||
char signature[4]; // "RSDT" or "XSDT"
|
||||
uint32_t length;
|
||||
uint8_t revision;
|
||||
uint8_t checksum;
|
||||
char oem_id[6];
|
||||
char oem_table_id[8];
|
||||
char oem_revision[4];
|
||||
char creator_id[4];
|
||||
char creator_revision[4];
|
||||
} rsdt_header_t;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Variables
|
||||
//------------------------------------------------------------------------------
|
||||
@ -89,7 +77,7 @@ static const efi_guid_t EFI_ACPI_2_RDSP_GUID = { 0x8868e871, 0xe4f1, 0x11d3, {0x
|
||||
|
||||
const char *rsdp_source = "";
|
||||
|
||||
acpi_t acpi_config = {0, 0, 0, 0, 0, 0, 0, false};
|
||||
acpi_t acpi_config = {0, 0, 0, 0, 0, /*0,*/ 0, 0, 0, false};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Functions
|
||||
@ -269,7 +257,7 @@ static uintptr_t find_acpi_table(uint32_t table_signature)
|
||||
|
||||
static bool parse_fadt(uintptr_t fadt_addr)
|
||||
{
|
||||
// FADT is a very big & complex table and we only need a few data.
|
||||
// FADT is a very big & complex table and we only need a few pieces of data.
|
||||
// We use byte offset instead of a complete struct.
|
||||
|
||||
// FADT Header is identical to RSDP Header
|
||||
@ -287,7 +275,7 @@ static bool parse_fadt(uintptr_t fadt_addr)
|
||||
acpi_config.ver_min = *(uint8_t *)(fadt_addr+FADT_MINOR_REV_OFFSET) & 0xF;
|
||||
}
|
||||
|
||||
// Get Old PM Base Address (32bit IO)
|
||||
// Get Old PM Base Address (32-bit IO)
|
||||
acpi_config.pm_addr = *(uint32_t *)(fadt_addr+FADT_PM_TMR_BLK_OFFSET);
|
||||
acpi_config.pm_is_io = true;
|
||||
|
||||
@ -341,4 +329,8 @@ void acpi_init(void)
|
||||
}
|
||||
|
||||
acpi_config.hpet_addr = find_acpi_table(HPETSignature);
|
||||
|
||||
acpi_config.srat_addr = find_acpi_table(SRATSignature);
|
||||
|
||||
//acpi_config.slit_addr = find_acpi_table(SLITSignature);
|
||||
}
|
||||
|
@ -23,16 +23,33 @@
|
||||
*/
|
||||
|
||||
typedef struct __attribute__ ((packed)) {
|
||||
uint8_t ver_maj;
|
||||
uint8_t ver_min;
|
||||
uintptr_t rsdp_addr;
|
||||
uintptr_t madt_addr;
|
||||
uintptr_t fadt_addr;
|
||||
uintptr_t hpet_addr;
|
||||
uintptr_t srat_addr;
|
||||
//uintptr_t slit_addr;
|
||||
uintptr_t pm_addr;
|
||||
uint8_t ver_maj;
|
||||
uint8_t ver_min;
|
||||
bool pm_is_io;
|
||||
} acpi_t;
|
||||
|
||||
/**
|
||||
* A struct for the headers of most ACPI tables.
|
||||
*/
|
||||
typedef struct {
|
||||
char signature[4]; // "RSDT" or "XSDT"
|
||||
uint32_t length;
|
||||
uint8_t revision;
|
||||
uint8_t checksum;
|
||||
char oem_id[6];
|
||||
char oem_table_id[8];
|
||||
char oem_revision[4];
|
||||
char creator_id[4];
|
||||
char creator_revision[4];
|
||||
} rsdt_header_t;
|
||||
|
||||
/**
|
||||
* The search step that located the ACPI RSDP (for debug).
|
||||
*/
|
||||
|
@ -224,7 +224,7 @@ static void init_pm_map(const e820_entry_t e820_map[], int e820_entries)
|
||||
|
||||
static void sort_pm_map(void)
|
||||
{
|
||||
// Do an insertion sort on the pm_map. On an already sorted list this should be a O(1) algorithm.
|
||||
// Do an insertion sort on the pm_map. On an already sorted list this should be a O(n) algorithm.
|
||||
for (int i = 0; i < pm_map_size; i++) {
|
||||
// Find where to insert the current element.
|
||||
int j = i - 1;
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "pci.h"
|
||||
#include "unistd.h"
|
||||
#include "string.h"
|
||||
#include "macros.h"
|
||||
|
||||
#include "cpuinfo.h"
|
||||
#include "memctrl.h"
|
||||
@ -1158,7 +1159,7 @@ static bool find_smb_controller(uint16_t vid, uint16_t did)
|
||||
{
|
||||
case PCI_VID_INTEL:
|
||||
{
|
||||
if (find_in_did_array(did, intel_ich5_dids, sizeof(intel_ich5_dids) / sizeof(intel_ich5_dids[0]))) {
|
||||
if (find_in_did_array(did, intel_ich5_dids, ARRAY_SIZE(intel_ich5_dids))) {
|
||||
return ich5_get_smb();
|
||||
}
|
||||
if (did == 0x7113) { // 82371AB/EB/MB PIIX4
|
||||
|
422
system/smp.c
422
system/smp.c
@ -16,6 +16,7 @@
|
||||
|
||||
#include "acpi.h"
|
||||
#include "boot.h"
|
||||
#include "macros.h"
|
||||
#include "bootparams.h"
|
||||
#include "efi.h"
|
||||
|
||||
@ -37,8 +38,6 @@
|
||||
// Constants
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_APIC_IDS 256
|
||||
|
||||
#define APIC_REGS_SIZE SIZE_C(4,KB)
|
||||
|
||||
// APIC registers
|
||||
@ -80,26 +79,37 @@
|
||||
|
||||
// MP config table entry types
|
||||
|
||||
#define MP_PROCESSOR 0
|
||||
#define MP_BUS 1
|
||||
#define MP_IOAPIC 2
|
||||
#define MP_INTSRC 3
|
||||
#define MP_LINTSRC 4
|
||||
#define MP_PROCESSOR 0
|
||||
#define MP_BUS 1
|
||||
#define MP_IOAPIC 2
|
||||
#define MP_INTSRC 3
|
||||
#define MP_LINTSRC 4
|
||||
|
||||
// MP processor cpu_flag values
|
||||
|
||||
#define CPU_ENABLED 1
|
||||
#define CPU_BOOTPROCESSOR 2
|
||||
#define CPU_ENABLED 1
|
||||
#define CPU_BOOTPROCESSOR 2
|
||||
|
||||
// MADT entry types
|
||||
|
||||
#define MADT_PROCESSOR 0
|
||||
#define MADT_LAPIC_ADDR 5
|
||||
#define MADT_PROCESSOR 0
|
||||
#define MADT_LAPIC_ADDR 5
|
||||
|
||||
// MADT processor flag values
|
||||
|
||||
#define MADT_PF_ENABLED 0x1
|
||||
#define MADT_PF_ONLINE_CAPABLE 0x2
|
||||
#define MADT_PF_ENABLED 0x1
|
||||
#define MADT_PF_ONLINE_CAPABLE 0x2
|
||||
|
||||
// SRAT entry types
|
||||
|
||||
#define SRAT_PROCESSOR_APIC_AFFINITY 0
|
||||
#define SRAT_MEMORY_AFFINITY 1
|
||||
#define SRAT_PROCESSOR_X2APIC_AFFINITY 2
|
||||
|
||||
// SRAT flag values
|
||||
#define SRAT_PAAF_ENABLED 1
|
||||
#define SRAT_MAF_ENABLED 1
|
||||
#define SRAT_PXAAF_ENABLED 1
|
||||
|
||||
// Private memory heap used for AP trampoline and synchronisation objects
|
||||
|
||||
@ -113,6 +123,12 @@
|
||||
|
||||
typedef volatile uint32_t apic_register_t[4];
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint32_t proximity_domain_idx;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
} memory_affinity_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t signature; // "_MP_"
|
||||
uint32_t phys_addr;
|
||||
@ -180,16 +196,9 @@ typedef struct {
|
||||
uint8_t dst_apic_lint;
|
||||
} mp_local_interrupt_entry_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
char signature[4]; // "APIC"
|
||||
uint32_t length;
|
||||
uint8_t revision;
|
||||
uint8_t checksum;
|
||||
char oem_id[6];
|
||||
char oem_table_id[8];
|
||||
char oem_revision[4];
|
||||
char creator_id[4];
|
||||
char creator_revision[4];
|
||||
rsdt_header_t h;
|
||||
uint32_t lapic_addr;
|
||||
uint32_t flags;
|
||||
} madt_table_header_t;
|
||||
@ -214,25 +223,87 @@ typedef struct {
|
||||
uint64_t lapic_addr;
|
||||
} madt_lapic_addr_entry_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
rsdt_header_t h;
|
||||
uint32_t revision;
|
||||
uint64_t reserved;
|
||||
} srat_table_header_t;
|
||||
|
||||
typedef struct {
|
||||
uint8_t type;
|
||||
uint8_t length;
|
||||
} srat_entry_header_t;
|
||||
|
||||
// SRAT subtable type 00: Processor Local APIC/SAPIC Affinity.
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint8_t type;
|
||||
uint8_t length;
|
||||
uint8_t proximity_domain_low;
|
||||
uint8_t apic_id;
|
||||
uint32_t flags;
|
||||
struct {
|
||||
uint32_t local_sapic_eid : 8;
|
||||
uint32_t proximity_domain_high : 24;
|
||||
};
|
||||
uint32_t clock_domain;
|
||||
} srat_processor_lapic_affinity_entry_t;
|
||||
|
||||
// SRAT subtable type 01: Memory Affinity.
|
||||
typedef struct __attribute__ ((packed)) {
|
||||
uint8_t type;
|
||||
uint8_t length;
|
||||
uint32_t proximity_domain;
|
||||
uint16_t reserved1;
|
||||
uint64_t base_address;
|
||||
uint64_t address_length;
|
||||
uint32_t reserved2;
|
||||
uint32_t flags;
|
||||
uint64_t reserved3;
|
||||
} srat_memory_affinity_entry_t;
|
||||
|
||||
// SRAT subtable type 02: Processor Local x2APIC Affinity
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint8_t type;
|
||||
uint8_t length;
|
||||
uint16_t reserved1;
|
||||
uint32_t proximity_domain;
|
||||
uint32_t apic_id;
|
||||
uint32_t flags;
|
||||
uint32_t clock_domain;
|
||||
uint32_t reserved2;
|
||||
} srat_processor_lx2apic_affinity_entry_t;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Variables
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static apic_register_t *apic = NULL;
|
||||
static apic_register_t *apic = NULL;
|
||||
|
||||
static uint8_t apic_id_to_cpu_num[MAX_APIC_IDS];
|
||||
static uint8_t apic_id_to_cpu_num[MAX_APIC_IDS];
|
||||
|
||||
static uint8_t cpu_num_to_apic_id[MAX_CPUS];
|
||||
static uint8_t apic_id_to_proximity_domain_idx[MAX_APIC_IDS];
|
||||
|
||||
static uintptr_t smp_heap_page = 0;
|
||||
static uint8_t cpu_num_to_apic_id[MAX_CPUS];
|
||||
|
||||
static uintptr_t alloc_addr = 0;
|
||||
static memory_affinity_t memory_affinity_ranges[MAX_APIC_IDS];
|
||||
|
||||
static uint32_t proximity_domains[MAX_PROXIMITY_DOMAINS];
|
||||
|
||||
static uint8_t cpus_in_proximity_domain[MAX_PROXIMITY_DOMAINS];
|
||||
uint8_t used_cpus_in_proximity_domain[MAX_PROXIMITY_DOMAINS];
|
||||
|
||||
static uintptr_t smp_heap_page = 0;
|
||||
|
||||
static uintptr_t alloc_addr = 0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Variables
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int num_available_cpus = 1; // There is always at least one CPU, the BSP
|
||||
int num_memory_affinity_ranges = 0;
|
||||
int num_proximity_domains = 0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Functions
|
||||
@ -384,10 +455,10 @@ static bool find_cpus_in_madt(void)
|
||||
madt_table_header_t *mpc = (madt_table_header_t *)map_region(acpi_config.madt_addr, sizeof(madt_table_header_t), true);
|
||||
if (mpc == NULL) return false;
|
||||
|
||||
mpc = (madt_table_header_t *)map_region(acpi_config.madt_addr, mpc->length, true);
|
||||
mpc = (madt_table_header_t *)map_region(acpi_config.madt_addr, mpc->h.length, true);
|
||||
if (mpc == NULL) return false;
|
||||
|
||||
if (acpi_checksum(mpc, mpc->length) != 0) {
|
||||
if (acpi_checksum(mpc, mpc->h.length) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -395,11 +466,14 @@ static bool find_cpus_in_madt(void)
|
||||
|
||||
int found_cpus = 0;
|
||||
|
||||
uint8_t *tab_entry_ptr = (uint8_t *)mpc + sizeof(madt_table_header_t);
|
||||
uint8_t *mpc_table_end = (uint8_t *)mpc + mpc->length;
|
||||
uint8_t *tab_entry_ptr = (uint8_t *)mpc + sizeof(*mpc);
|
||||
uint8_t *mpc_table_end = (uint8_t *)mpc + mpc->h.length;
|
||||
while (tab_entry_ptr < mpc_table_end) {
|
||||
madt_entry_header_t *entry_header = (madt_entry_header_t *)tab_entry_ptr;
|
||||
if (entry_header->type == MADT_PROCESSOR) {
|
||||
if (entry_header->length != sizeof(madt_processor_entry_t)) {
|
||||
return false;
|
||||
}
|
||||
madt_processor_entry_t *entry = (madt_processor_entry_t *)tab_entry_ptr;
|
||||
if (entry->flags & (MADT_PF_ENABLED|MADT_PF_ONLINE_CAPABLE)) {
|
||||
if (num_available_cpus < MAX_CPUS) {
|
||||
@ -412,7 +486,10 @@ static bool find_cpus_in_madt(void)
|
||||
found_cpus++;
|
||||
}
|
||||
}
|
||||
if (entry_header->type == MADT_LAPIC_ADDR) {
|
||||
else if (entry_header->type == MADT_LAPIC_ADDR) {
|
||||
if (entry_header->length != sizeof(madt_lapic_addr_entry_t)) {
|
||||
return false;
|
||||
}
|
||||
madt_lapic_addr_entry_t *entry = (madt_lapic_addr_entry_t *)tab_entry_ptr;
|
||||
apic_addr = (uintptr_t)entry->lapic_addr;
|
||||
}
|
||||
@ -427,6 +504,184 @@ static bool find_cpus_in_madt(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool find_numa_nodes_in_srat(void)
|
||||
{
|
||||
uint8_t * tab_entry_ptr;
|
||||
// The caller will do fixups.
|
||||
if (acpi_config.srat_addr == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
srat_table_header_t * srat = (srat_table_header_t *)map_region(acpi_config.srat_addr, sizeof(rsdt_header_t), true);
|
||||
if (srat == NULL) return false;
|
||||
|
||||
srat = (srat_table_header_t *)map_region(acpi_config.srat_addr, srat->h.length, true);
|
||||
if (srat == NULL) return false;
|
||||
|
||||
if (acpi_checksum(srat, srat->h.length) != 0) {
|
||||
return false;
|
||||
}
|
||||
// A table which contains fewer bytes than header + 1 processor local APIC entry + 1 memory affinity entry would be very weird.
|
||||
if (srat->h.length < sizeof(*srat) + sizeof(srat_processor_lapic_affinity_entry_t) + sizeof(srat_memory_affinity_entry_t)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
tab_entry_ptr = (uint8_t *)srat + sizeof(*srat);
|
||||
uint8_t * srat_table_end = (uint8_t *)srat + srat->h.length;
|
||||
// Pass 1: parse memory affinity entries and allocate proximity domains for each of them, while validating input a little bit.
|
||||
while (tab_entry_ptr < srat_table_end) {
|
||||
srat_entry_header_t *entry_header = (srat_entry_header_t *)tab_entry_ptr;
|
||||
if (entry_header->type == SRAT_PROCESSOR_APIC_AFFINITY) {
|
||||
if (entry_header->length != sizeof(srat_processor_lapic_affinity_entry_t)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (entry_header->type == SRAT_MEMORY_AFFINITY) {
|
||||
if (entry_header->length != sizeof(srat_memory_affinity_entry_t)) {
|
||||
return false;
|
||||
}
|
||||
srat_memory_affinity_entry_t *entry = (srat_memory_affinity_entry_t *)tab_entry_ptr;
|
||||
if (entry->flags & SRAT_MAF_ENABLED) {
|
||||
uint32_t proximity_domain = entry->proximity_domain;
|
||||
uint64_t start = entry->base_address;
|
||||
uint64_t end = entry->base_address + entry->address_length;
|
||||
int found = -1;
|
||||
|
||||
if (start > end) {
|
||||
// We've found a wraparound, that's not good.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Allocate entry in proximity_domains, if necessary. Linear search for now.
|
||||
for (int i = 0; i < num_proximity_domains; i++) {
|
||||
if (proximity_domains[i] == proximity_domain) {
|
||||
found = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found == -1) {
|
||||
// Not found, allocate entry.
|
||||
if (num_proximity_domains < (int)(ARRAY_SIZE(proximity_domains))) {
|
||||
proximity_domains[num_proximity_domains] = proximity_domain;
|
||||
found = num_proximity_domains;
|
||||
num_proximity_domains++;
|
||||
} else {
|
||||
// TODO Display message ?
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we have the index of the entry in proximity_domains in found, use it.
|
||||
if (num_memory_affinity_ranges < (int)(ARRAY_SIZE(memory_affinity_ranges))) {
|
||||
memory_affinity_ranges[num_memory_affinity_ranges].proximity_domain_idx = (uint32_t)found;
|
||||
memory_affinity_ranges[num_memory_affinity_ranges].start = start;
|
||||
memory_affinity_ranges[num_memory_affinity_ranges].end = end;
|
||||
num_memory_affinity_ranges++;
|
||||
} else {
|
||||
// TODO Display message ?
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (entry_header->type == SRAT_PROCESSOR_X2APIC_AFFINITY) {
|
||||
if (entry_header->length != sizeof(srat_processor_lx2apic_affinity_entry_t)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
tab_entry_ptr += entry_header->length;
|
||||
}
|
||||
|
||||
tab_entry_ptr = (uint8_t *)srat + sizeof(*srat);
|
||||
// Pass 2: parse processor APIC / x2APIC affinity entries.
|
||||
while (tab_entry_ptr < srat_table_end) {
|
||||
srat_entry_header_t *entry_header = (srat_entry_header_t *)tab_entry_ptr;
|
||||
uint32_t proximity_domain;
|
||||
uint32_t apic_id;
|
||||
if (entry_header->type == SRAT_PROCESSOR_APIC_AFFINITY) {
|
||||
srat_processor_lapic_affinity_entry_t *entry = (srat_processor_lapic_affinity_entry_t *)tab_entry_ptr;
|
||||
if (entry->flags & SRAT_PAAF_ENABLED) {
|
||||
int found1;
|
||||
proximity_domain = ((uint32_t)entry->proximity_domain_high) << 8 | entry->proximity_domain_low;
|
||||
apic_id = (uint32_t)entry->apic_id;
|
||||
|
||||
find_proximity_domain:
|
||||
found1 = -1;
|
||||
// Find entry in proximity_domains, if necessary. Linear search for now.
|
||||
for (int i = 0; i < num_proximity_domains; i++) {
|
||||
if (proximity_domains[i] == proximity_domain) {
|
||||
found1 = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found1 == -1) {
|
||||
// We've found an affinity entry whose proximity domain we don't know about.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do we know about that APIC ID ?
|
||||
int found2 = -1;
|
||||
for (int i = 0; i < num_available_cpus; i++) {
|
||||
if ((uint32_t)cpu_num_to_apic_id[i] == apic_id) {
|
||||
found2 = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found2 == -1) {
|
||||
// We've found an affinity entry whose APIC ID we don't know about.
|
||||
return false;
|
||||
}
|
||||
|
||||
apic_id_to_proximity_domain_idx[apic_id] = (uint32_t)found1;
|
||||
}
|
||||
}
|
||||
else if (entry_header->type == SRAT_PROCESSOR_X2APIC_AFFINITY) {
|
||||
srat_processor_lx2apic_affinity_entry_t *entry = (srat_processor_lx2apic_affinity_entry_t *)tab_entry_ptr;
|
||||
if (entry->flags & SRAT_PXAAF_ENABLED) {
|
||||
proximity_domain = entry->proximity_domain;
|
||||
apic_id = entry->apic_id;
|
||||
goto find_proximity_domain;
|
||||
}
|
||||
}
|
||||
tab_entry_ptr += entry_header->length;
|
||||
}
|
||||
|
||||
// TODO sort on proximity address, like in pm_map.
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static bool parse_slit(uintptr_t slit_addr)
|
||||
{
|
||||
// SLIT is a simple table.
|
||||
|
||||
// SLIT Header is identical to RSDP Header
|
||||
rsdt_header_t *slit = (rsdt_header_t *)slit_addr;
|
||||
|
||||
// Validate SLIT
|
||||
if (slit == NULL || acpi_checksum(slit, slit->length) != 0) {
|
||||
return false;
|
||||
}
|
||||
// A SLIT shall always contain at least one byte beyond the header and the number of localities.
|
||||
if (slit->length <= sizeof(*slit) + sizeof(uint64_t)) {
|
||||
return false;
|
||||
}
|
||||
// 8 bytes for the number of localities, followed by (number of localities) ^ 2 bytes.
|
||||
uint64_t localities = *(uint64_t *)((uint8_t *)slit + sizeof(*slit));
|
||||
if (localities > MAX_APIC_IDS) {
|
||||
return false;
|
||||
}
|
||||
if (slit->length != sizeof(*slit) + sizeof(uint64_t) + (localities * localities)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector)
|
||||
{
|
||||
apic_write(APIC_REG_ICRHI, apic_id << 24);
|
||||
@ -521,15 +776,34 @@ static bool start_cpu(int cpu_num)
|
||||
|
||||
void smp_init(bool smp_enable)
|
||||
{
|
||||
for (int i = 0; i < MAX_APIC_IDS; i++) {
|
||||
for (int i = 0; i < (int)(ARRAY_SIZE(apic_id_to_cpu_num)); i++) {
|
||||
apic_id_to_cpu_num[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < (int)(ARRAY_SIZE(apic_id_to_proximity_domain_idx)); i++) {
|
||||
apic_id_to_proximity_domain_idx[i] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < MAX_CPUS; i++) {
|
||||
for (int i = 0; i < (int)(ARRAY_SIZE(cpu_num_to_apic_id)); i++) {
|
||||
cpu_num_to_apic_id[i] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)(ARRAY_SIZE(memory_affinity_ranges)); i++) {
|
||||
memory_affinity_ranges[i].proximity_domain_idx = UINT32_C(0xFFFFFFFF);
|
||||
memory_affinity_ranges[i].start = 0;
|
||||
memory_affinity_ranges[i].end = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)(ARRAY_SIZE(cpus_in_proximity_domain)); i++) {
|
||||
cpus_in_proximity_domain[i] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)(ARRAY_SIZE(used_cpus_in_proximity_domain)); i++) {
|
||||
used_cpus_in_proximity_domain[i] = 0;
|
||||
}
|
||||
|
||||
num_available_cpus = 1;
|
||||
num_memory_affinity_ranges = 0;
|
||||
num_proximity_domains = 0;
|
||||
|
||||
if (cpuid_info.flags.x2apic) {
|
||||
uint32_t msrl, msrh;
|
||||
@ -548,13 +822,23 @@ void smp_init(bool smp_enable)
|
||||
|
||||
if (smp_enable) {
|
||||
(void)(find_cpus_in_madt() || find_cpus_in_floating_mp_struct());
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_available_cpus; i++) {
|
||||
apic_id_to_cpu_num[cpu_num_to_apic_id[i]] = i;
|
||||
}
|
||||
|
||||
if (smp_enable) {
|
||||
if (!find_numa_nodes_in_srat()) {
|
||||
// Do nothing.
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_available_cpus; i++) {
|
||||
uint32_t proximity_domain_idx = apic_id_to_proximity_domain_idx[i];
|
||||
cpus_in_proximity_domain[proximity_domain_idx]++;
|
||||
}
|
||||
|
||||
// Allocate a page of low memory for AP trampoline and sync objects.
|
||||
// These need to remain pinned in place during relocation.
|
||||
smp_heap_page = heap_alloc(HEAP_TYPE_LM_1, PAGE_SIZE, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
@ -623,9 +907,75 @@ int smp_my_cpu_num(void)
|
||||
return num_available_cpus > 1 ? apic_id_to_cpu_num[my_apic_id()] : 0;
|
||||
}
|
||||
|
||||
uint32_t smp_get_proximity_domain_idx(int cpu_num)
|
||||
{
|
||||
return num_available_cpus > 1 ? apic_id_to_proximity_domain_idx[cpu_num_to_apic_id[cpu_num]] : 0;
|
||||
}
|
||||
|
||||
int smp_narrow_to_proximity_domain(uint64_t start, uint64_t end, uint32_t * proximity_domain_idx, uint64_t * new_start, uint64_t * new_end)
|
||||
{
|
||||
for (int i = 0; i < num_memory_affinity_ranges; i++) {
|
||||
uint64_t range_start = memory_affinity_ranges[i].start;
|
||||
uint64_t range_end = memory_affinity_ranges[i].end;
|
||||
|
||||
if (start >= range_start) {
|
||||
if (start < range_end) {
|
||||
if (end <= range_end) {
|
||||
// range_start start end range_end.
|
||||
// The given vm_map range is entirely within a single memory affinity range. Nothing to split.
|
||||
*proximity_domain_idx = memory_affinity_ranges[i].proximity_domain_idx;
|
||||
*new_start = start;
|
||||
*new_end = end;
|
||||
return 1;
|
||||
} else {
|
||||
// range_start start range_end end.
|
||||
// The given vm_map range needs to be shortened.
|
||||
*proximity_domain_idx = memory_affinity_ranges[i].proximity_domain_idx;
|
||||
*new_start = start;
|
||||
*new_end = range_end;
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
// range_start range_end start end
|
||||
// Do nothing, skip to next memory affinity range.
|
||||
}
|
||||
} else {
|
||||
if (end < range_start) {
|
||||
// start end range_start range_end.
|
||||
// Do nothing, skip to next memory affinity range.
|
||||
} else {
|
||||
if (end <= range_end) {
|
||||
// start range_start end range_end.
|
||||
*proximity_domain_idx = memory_affinity_ranges[i].proximity_domain_idx;
|
||||
*new_start = start;
|
||||
*new_end = range_start;
|
||||
return 1;
|
||||
} else {
|
||||
// start range_start range_end end.
|
||||
*proximity_domain_idx = memory_affinity_ranges[i].proximity_domain_idx;
|
||||
*new_start = start;
|
||||
*new_end = range_start;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we come here, we haven't found a proximity domain which contains the given range. That shouldn't happen !
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void get_memory_affinity_entry(int idx, uint32_t * proximity_domain_idx, uint64_t * start, uint64_t * end)
|
||||
{
|
||||
*proximity_domain_idx = memory_affinity_ranges[idx].proximity_domain_idx;
|
||||
*start = memory_affinity_ranges[idx].start;
|
||||
*end = memory_affinity_ranges[idx].end;
|
||||
}
|
||||
#endif
|
||||
|
||||
barrier_t *smp_alloc_barrier(int num_threads)
|
||||
{
|
||||
barrier_t *barrier = (barrier_t *)(alloc_addr);
|
||||
barrier_t *barrier = (barrier_t *)(alloc_addr);
|
||||
alloc_addr += sizeof(barrier_t);
|
||||
barrier_init(barrier, num_threads);
|
||||
return barrier;
|
||||
|
43
system/smp.h
43
system/smp.h
@ -23,6 +23,16 @@
|
||||
*/
|
||||
#define MAX_CPUS (1 + MAX_APS)
|
||||
|
||||
/**
|
||||
* The maximum number of APIC IDs.
|
||||
*/
|
||||
#define MAX_APIC_IDS 256
|
||||
|
||||
/**
|
||||
* The maximum number of NUMA proximity domains.
|
||||
*/
|
||||
#define MAX_PROXIMITY_DOMAINS MAX_APIC_IDS
|
||||
|
||||
/**
|
||||
* The current state of a CPU core.
|
||||
*/
|
||||
@ -38,6 +48,12 @@ typedef enum __attribute__ ((packed)) {
|
||||
*/
|
||||
extern int num_available_cpus;
|
||||
|
||||
/**
|
||||
* The number of distinct memory proximity domains. Initially this is 1, but
|
||||
* may increase after calling smp_init().
|
||||
*/
|
||||
extern int num_proximity_domains;
|
||||
|
||||
/**
|
||||
* Initialises the SMP state and detects the number of available CPU cores.
|
||||
*/
|
||||
@ -60,6 +76,33 @@ void smp_send_nmi(int cpu_num);
|
||||
*/
|
||||
int smp_my_cpu_num(void);
|
||||
|
||||
/**
|
||||
* Return the index of the proximity domain corresponding to the current CPU number.
|
||||
* 1 in NUMA-unaware mode, >= 1 otherwise.
|
||||
*/
|
||||
uint32_t smp_get_proximity_domain_idx(int cpu_num);
|
||||
|
||||
/**
|
||||
* "Allocates" a CPU ID in the given proximity domain, for filling in NUMA-aware chunk index.
|
||||
* Returns the nth CPU ID found so far in the proximity domain.
|
||||
*/
|
||||
static inline uint8_t smp_alloc_cpu_in_proximity_domain(uint32_t proximity_domain_idx)
|
||||
{
|
||||
extern uint8_t used_cpus_in_proximity_domain[MAX_PROXIMITY_DOMAINS];
|
||||
uint8_t chunk_index = used_cpus_in_proximity_domain[proximity_domain_idx];
|
||||
used_cpus_in_proximity_domain[proximity_domain_idx]++;
|
||||
return chunk_index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the first span, limited to a single proximity domain, of the given memory range.
|
||||
*/
|
||||
int smp_narrow_to_proximity_domain(uint64_t start, uint64_t end, uint32_t * proximity_domain_idx, uint64_t * new_start, uint64_t * new_end);
|
||||
|
||||
//int count_cpus_for_proximity_domain_corresponding_to_range(uintptr_t start, uintptr_t end, uint32_t proximity_domain_idx);
|
||||
|
||||
//void get_memory_affinity_entry(int idx, uint32_t * proximity_domain_idx, uint64_t * start, uint64_t * end);
|
||||
|
||||
/**
|
||||
* Allocates and initialises a barrier object in pinned memory.
|
||||
*/
|
||||
|
@ -6,7 +6,6 @@
|
||||
// MemTest86+ V5 Specific code (GPL V2.0)
|
||||
// By Samuel DEMEULEMEESTER, sdemeule@memtest.org
|
||||
// http://www.canardpc.com - http://www.memtest.org
|
||||
// Thanks to Passmark for calculate_chunk() and various comments !
|
||||
// ----------------------------------------------------
|
||||
// test.c - MemTest-86 Version 3.4
|
||||
//
|
||||
|
@ -6,7 +6,6 @@
|
||||
// MemTest86+ V5 Specific code (GPL V2.0)
|
||||
// By Samuel DEMEULEMEESTER, sdemeule@memtest.org
|
||||
// http://www.canardpc.com - http://www.memtest.org
|
||||
// Thanks to Passmark for calculate_chunk() and various comments !
|
||||
// ----------------------------------------------------
|
||||
// test.c - MemTest-86 Version 3.4
|
||||
//
|
||||
|
@ -39,7 +39,7 @@ int test_block_move(int my_cpu, int iterations)
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, 16 * sizeof(testword_t));
|
||||
if ((end - start) < 15) continue; // we need at least 16 words for this test
|
||||
if ((end - start) < 15) SKIP_RANGE(1) // we need at least 16 words for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -90,7 +90,7 @@ int test_block_move(int my_cpu, int iterations)
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, 16 * sizeof(testword_t));
|
||||
if ((end - start) < 15) continue; // we need at least 16 words for this test
|
||||
if ((end - start) < 15) SKIP_RANGE(iterations) // we need at least 16 words for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -203,7 +203,7 @@ int test_block_move(int my_cpu, int iterations)
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, 16 * sizeof(testword_t));
|
||||
if ((end - start) < 15) continue; // we need at least 16 words for this test
|
||||
if ((end - start) < 15) SKIP_RANGE(1) // we need at least 16 words for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
|
@ -39,7 +39,7 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||
if ((end - start) < (n - 1)) continue; // we need at least n words for this test
|
||||
if ((end - start) < (n - 1)) SKIP_RANGE(1) // we need at least n words for this test
|
||||
end -= n; // avoids pointer overflow when incrementing p
|
||||
|
||||
testword_t *p = start + offset; // we assume each chunk has at least 'n' words, so this won't overflow
|
||||
@ -72,7 +72,7 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa
|
||||
for (int j = 0; j < vm_map_size; j++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
if ((end - start) < (n - 1)) continue; // we need at least n words for this test
|
||||
if ((end - start) < (n - 1)) SKIP_RANGE(1) // we need at least n words for this test
|
||||
|
||||
int k = 0;
|
||||
testword_t *p = start;
|
||||
@ -113,7 +113,7 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||
if ((end - start) < (n - 1)) continue; // we need at least n words for this test
|
||||
if ((end - start) < (n - 1)) SKIP_RANGE(1) // we need at least n words for this test
|
||||
end -= n; // avoids pointer overflow when incrementing p
|
||||
|
||||
testword_t *p = start + offset; // we assume each chunk has at least 'offset' words, so this won't overflow
|
||||
|
@ -41,7 +41,7 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -100,7 +100,7 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword
|
||||
for (int j = 0; j < vm_map_size; j++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -136,7 +136,7 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword
|
||||
for (int j = vm_map_size - 1; j >= 0; j--) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = end;
|
||||
testword_t *ps = end;
|
||||
|
@ -51,7 +51,7 @@ int test_mov_inv_random(int my_cpu)
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -89,7 +89,7 @@ int test_mov_inv_random(int my_cpu)
|
||||
for (int j = 0; j < vm_map_size; j++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
|
@ -42,7 +42,7 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -81,7 +81,7 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
|
||||
for (int j = 0; j < vm_map_size; j++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = start;
|
||||
testword_t *pe = start;
|
||||
@ -121,7 +121,7 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
|
||||
for (int j = vm_map_size - 1; j >= 0; j--) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
if (end < start) continue; // we need at least one word for this test
|
||||
if (end < start) SKIP_RANGE(1) // we need at least one word for this test
|
||||
|
||||
testword_t *p = end;
|
||||
testword_t *ps = end;
|
||||
|
@ -6,7 +6,6 @@
|
||||
// MemTest86+ V5 Specific code (GPL V2.0)
|
||||
// By Samuel DEMEULEMEESTER, sdemeule@memtest.org
|
||||
// http://www.canardpc.com - http://www.memtest.org
|
||||
// Thanks to Passmark for calculate_chunk() and various comments !
|
||||
// ----------------------------------------------------
|
||||
// test.c - MemTest-86 Version 3.4
|
||||
//
|
||||
|
@ -40,15 +40,37 @@ void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segme
|
||||
*start = vm_map[segment].start;
|
||||
*end = vm_map[segment].end;
|
||||
} else {
|
||||
uintptr_t segment_size = (vm_map[segment].end - vm_map[segment].start + 1) * sizeof(testword_t);
|
||||
uintptr_t chunk_size = round_down(segment_size / num_active_cpus, chunk_align);
|
||||
if (enable_numa) {
|
||||
uint32_t proximity_domain_idx = smp_get_proximity_domain_idx(my_cpu);
|
||||
|
||||
// Calculate chunk boundaries.
|
||||
*start = (testword_t *)((uintptr_t)vm_map[segment].start + chunk_size * chunk_index[my_cpu]);
|
||||
*end = (testword_t *)((uintptr_t)(*start) + chunk_size) - 1;
|
||||
// Is this CPU in the same proximity domain as the current segment ?
|
||||
if (proximity_domain_idx == vm_map[segment].proximity_domain_idx) {
|
||||
uintptr_t segment_size = (vm_map[segment].end - vm_map[segment].start + 1) * sizeof(testword_t);
|
||||
uintptr_t chunk_size = round_down(segment_size / used_cpus_in_proximity_domain[proximity_domain_idx], chunk_align);
|
||||
|
||||
if (*end > vm_map[segment].end) {
|
||||
*end = vm_map[segment].end;
|
||||
// Calculate chunk boundaries.
|
||||
*start = (testword_t *)((uintptr_t)vm_map[segment].start + chunk_size * chunk_index[my_cpu]);
|
||||
*end = (testword_t *)((uintptr_t)(*start) + chunk_size) - 1;
|
||||
|
||||
if (*end > vm_map[segment].end) {
|
||||
*end = vm_map[segment].end;
|
||||
}
|
||||
} else {
|
||||
// Nope.
|
||||
*start = (testword_t *)1;
|
||||
*end = (testword_t *)0;
|
||||
}
|
||||
} else {
|
||||
uintptr_t segment_size = (vm_map[segment].end - vm_map[segment].start + 1) * sizeof(testword_t);
|
||||
uintptr_t chunk_size = round_down(segment_size / num_active_cpus, chunk_align);
|
||||
|
||||
// Calculate chunk boundaries.
|
||||
*start = (testword_t *)((uintptr_t)vm_map[segment].start + chunk_size * chunk_index[my_cpu]);
|
||||
*end = (testword_t *)((uintptr_t)(*start) + chunk_size) - 1;
|
||||
|
||||
if (*end > vm_map[segment].end) {
|
||||
*end = vm_map[segment].end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,11 @@
|
||||
*/
|
||||
#define BAILOUT if (bail) return ticks
|
||||
|
||||
/**
|
||||
* A macro to skip the current range without disturbing waits on barriers and creating a deadlock.
|
||||
*/
|
||||
#define SKIP_RANGE(num_ticks) { if (my_cpu >= 0) { for (int iter = 0; iter < num_ticks; iter++) { do_tick(my_cpu); BAILOUT; } } continue; }
|
||||
|
||||
/**
|
||||
* Returns value rounded down to the nearest multiple of align_size.
|
||||
*/
|
||||
|
@ -77,13 +77,16 @@ int ticks_per_test[NUM_PASS_TYPES][NUM_TEST_PATTERNS];
|
||||
#define BARRIER \
|
||||
if (my_cpu >= 0) { \
|
||||
if (TRACE_BARRIERS) { \
|
||||
trace(my_cpu, "Run barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||
trace(my_cpu, "Run barrier wait begin at %s line %i", __FILE__, __LINE__); \
|
||||
} \
|
||||
if (power_save < POWER_SAVE_HIGH) { \
|
||||
barrier_spin_wait(run_barrier); \
|
||||
} else { \
|
||||
barrier_halt_wait(run_barrier); \
|
||||
} \
|
||||
if (TRACE_BARRIERS) { \
|
||||
trace(my_cpu, "Run barrier wait end at %s line %i", __FILE__, __LINE__); \
|
||||
} \
|
||||
}
|
||||
|
||||
int run_test(int my_cpu, int test, int stage, int iterations)
|
||||
|
Loading…
Reference in New Issue
Block a user