mirror of
https://github.com/memtest86plus/memtest86plus.git
synced 2024-11-23 08:26:23 -06:00
Add experimental mode with nontemporal stores (movnt[iq]) in own addr test, the only one where helps with performance across most processors I have access to, both single and multi-socket. Per #79, it saves several dozens of minutes on my 4S Opteron 62xx / 63xx servers equipped with 256 GB of RAM.
This commit is contained in:
parent
5a046291fa
commit
52a589fd29
@ -99,6 +99,7 @@ bool enable_sm = true;
|
||||
bool enable_bench = true;
|
||||
bool enable_mch_read = true;
|
||||
bool enable_numa = false;
|
||||
bool enable_nontemporal = false;
|
||||
|
||||
bool enable_ecc_polling = false;
|
||||
|
||||
@ -244,6 +245,8 @@ static void parse_option(const char *option, const char *params)
|
||||
usb_init_options |= USB_IGNORE_EHCI;
|
||||
} else if (strncmp(option, "nomch", 6) == 0) {
|
||||
enable_mch_read = false;
|
||||
} else if (strncmp(option, "nontemporal", 12) == 0) {
|
||||
enable_nontemporal = true;
|
||||
} else if (strncmp(option, "nopause", 8) == 0) {
|
||||
pause_at_start = false;
|
||||
} else if (strncmp(option, "nosm", 5) == 0) {
|
||||
|
@ -63,6 +63,7 @@ extern bool enable_bench;
|
||||
extern bool enable_mch_read;
|
||||
extern bool enable_ecc_polling;
|
||||
extern bool enable_numa;
|
||||
extern bool enable_nontemporal;
|
||||
|
||||
extern bool pause_at_start;
|
||||
|
||||
|
@ -226,6 +226,20 @@ static inline void cpuid(uint32_t op, uint32_t count, uint32_t *eax, uint32_t *e
|
||||
"2" (*ecx)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the processor supports nontemporal writes
|
||||
*/
|
||||
#define nontemporal_writes_supported() (cpuid_info.flags.sse2)
|
||||
|
||||
#elif defined(__loongarch_lp64)
|
||||
|
||||
/**
|
||||
* Returns whether the processor supports nontemporal writes
|
||||
*/
|
||||
// TODO
|
||||
#define nontemporal_writes_supported() (0)
|
||||
|
||||
#endif
|
||||
|
||||
#endif // CPUID_H
|
||||
|
@ -23,6 +23,7 @@
|
||||
#define __MEMRW_SUFFIX_64BIT "q"
|
||||
#define __MEMRW_READ_INSTRUCTIONS(bitwidth) "mov" __MEMRW_SUFFIX_##bitwidth##BIT " %1, %0"
|
||||
#define __MEMRW_WRITE_INSTRUCTIONS(bitwidth) "mov" __MEMRW_SUFFIX_##bitwidth##BIT " %1, %0"
|
||||
#define __MEMRW_WRITENT_INSTRUCTIONS(bitwidth) "movnti" __MEMRW_SUFFIX_##bitwidth##BIT " %1, %0"
|
||||
#define __MEMRW_FLUSH_INSTRUCTIONS(bitwidth) "mov" __MEMRW_SUFFIX_##bitwidth##BIT " %1, %0; mov" __MEMRW_SUFFIX_##bitwidth##BIT " %0, %1"
|
||||
|
||||
#elif defined(__loongarch_lp64)
|
||||
@ -62,6 +63,18 @@ static inline void write##bitwidth(const volatile uint##bitwidth##_t *ptr, uint#
|
||||
); \
|
||||
}
|
||||
|
||||
#define __MEMRW_WRITENT_FUNC(bitwidth) \
|
||||
static inline void write##bitwidth##nt(const volatile uint##bitwidth##_t *ptr, uint##bitwidth##_t val) \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
__MEMRW_WRITENT_INSTRUCTIONS(bitwidth) \
|
||||
: \
|
||||
: "m" (*ptr), \
|
||||
"r" (val) \
|
||||
: "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
#define __MEMRW_FLUSH_FUNC(bitwidth) \
|
||||
static inline void flush##bitwidth(const volatile uint##bitwidth##_t *ptr, uint##bitwidth##_t val) \
|
||||
{ \
|
||||
@ -108,6 +121,15 @@ __MEMRW_WRITE_FUNC(32)
|
||||
*/
|
||||
__MEMRW_WRITE_FUNC(64)
|
||||
|
||||
/**
|
||||
* Writes val to the 32-bit memory location pointed to by ptr, using non-temporal hint.
|
||||
*/
|
||||
__MEMRW_WRITENT_FUNC(32)
|
||||
/**
|
||||
* Writes val to the 64-bit memory location pointed to by ptr, using non-temporal hint.
|
||||
*/
|
||||
__MEMRW_WRITENT_FUNC(64)
|
||||
|
||||
/**
|
||||
* Writes val to the 8-bit memory location pointed to by ptr. Only returns when the write is complete.
|
||||
*/
|
||||
|
@ -20,6 +20,8 @@
|
||||
#include "display.h"
|
||||
#include "error.h"
|
||||
#include "test.h"
|
||||
#include "config.h"
|
||||
#include "cpuid.h"
|
||||
|
||||
#include "test_funcs.h"
|
||||
#include "test_helper.h"
|
||||
@ -36,7 +38,7 @@ static int pattern_fill(int my_cpu, testword_t offset)
|
||||
display_test_pattern_name("own address");
|
||||
}
|
||||
|
||||
// Write each address with it's own address.
|
||||
// Write each address with its own address.
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start = vm_map[i].start;
|
||||
testword_t *end = vm_map[i].end;
|
||||
@ -58,6 +60,41 @@ static int pattern_fill(int my_cpu, testword_t offset)
|
||||
continue;
|
||||
}
|
||||
test_addr[my_cpu] = (uintptr_t)p;
|
||||
if (!offset) {
|
||||
if (enable_nontemporal && nontemporal_writes_supported()) {
|
||||
do {
|
||||
write_word_nt(p, (testword_t)p);
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
__asm__ __volatile__ ("mfence");
|
||||
#elif defined(__loongarch_lp64)
|
||||
// TODO LoongArch barrier
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
do {
|
||||
write_word(p, (testword_t)p);
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (enable_nontemporal && nontemporal_writes_supported()) {
|
||||
do {
|
||||
write_word_nt(p, (testword_t)p + offset);
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
__asm__ __volatile__ ("mfence");
|
||||
#elif defined(__loongarch_lp64)
|
||||
// TODO LoongArch barrier
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
do {
|
||||
write_word(p, (testword_t)p + offset);
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
write_word(p, (testword_t)p + offset);
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
@ -97,13 +134,24 @@ static int pattern_check(int my_cpu, testword_t offset)
|
||||
continue;
|
||||
}
|
||||
test_addr[my_cpu] = (uintptr_t)p;
|
||||
do {
|
||||
testword_t expect = (testword_t)p + offset;
|
||||
testword_t actual = read_word(p);
|
||||
if (unlikely(actual != expect)) {
|
||||
data_error(p, expect, actual, true);
|
||||
}
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
if (!offset) {
|
||||
do {
|
||||
testword_t expect = (testword_t)p;
|
||||
testword_t actual = read_word(p);
|
||||
if (unlikely(actual != expect)) {
|
||||
data_error(p, expect, actual, true);
|
||||
}
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
}
|
||||
else {
|
||||
do {
|
||||
testword_t expect = (testword_t)p + offset;
|
||||
testword_t actual = read_word(p);
|
||||
if (unlikely(actual != expect)) {
|
||||
data_error(p, expect, actual, true);
|
||||
}
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
}
|
||||
do_tick(my_cpu);
|
||||
BAILOUT;
|
||||
} while (!at_end && ++pe); // advance pe to next start point
|
||||
|
@ -21,11 +21,13 @@
|
||||
*/
|
||||
#include "memrw.h"
|
||||
#if (ARCH_BITS == 64)
|
||||
#define read_word read64
|
||||
#define write_word write64
|
||||
#define read_word read64
|
||||
#define write_word write64
|
||||
#define write_word_nt write64nt
|
||||
#else
|
||||
#define read_word read32
|
||||
#define write_word write32
|
||||
#define read_word read32
|
||||
#define write_word write32
|
||||
#define write_word_nt write32nt
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user