mirror of
https://github.com/memtest86plus/memtest86plus.git
synced 2025-02-20 11:38:25 -06:00
Improve efficiency of random number generation (discussion #8).
Use a more efficient algorithm that can be in-lined, and keep the generator state in a local variable.
This commit is contained in:
parent
5e2ab9289b
commit
e92f488753
@ -34,19 +34,20 @@ int test_mov_inv_random(int my_cpu)
|
||||
{
|
||||
int ticks = 0;
|
||||
|
||||
uint64_t seed;
|
||||
testword_t seed;
|
||||
if (cpuid_info.flags.rdtsc) {
|
||||
seed = get_tsc();
|
||||
} else {
|
||||
seed = UINT64_C(0x12345678) * (1 + pass_num);
|
||||
seed = 1 + pass_num;
|
||||
}
|
||||
seed *= 0x87654321;
|
||||
|
||||
if (my_cpu == master_cpu) {
|
||||
display_test_pattern_value(seed);
|
||||
}
|
||||
|
||||
// Initialize memory with the initial pattern.
|
||||
random_seed(my_cpu, seed);
|
||||
testword_t prsg_state = seed;
|
||||
for (int i = 0; i < vm_map_size; i++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||
@ -69,7 +70,8 @@ int test_mov_inv_random(int my_cpu)
|
||||
}
|
||||
test_addr[my_cpu] = (uintptr_t)p;
|
||||
do {
|
||||
write_word(p, random(my_cpu));
|
||||
prsg_state = prsg(prsg_state);
|
||||
write_word(p, prsg_state);
|
||||
} while (p++ < pe); // test before increment in case pointer overflows
|
||||
do_tick(my_cpu);
|
||||
BAILOUT;
|
||||
@ -82,7 +84,7 @@ int test_mov_inv_random(int my_cpu)
|
||||
for (int i = 0; i < 2; i++) {
|
||||
flush_caches(my_cpu);
|
||||
|
||||
random_seed(my_cpu, seed);
|
||||
prsg_state = seed;
|
||||
for (int j = 0; j < vm_map_size; j++) {
|
||||
testword_t *start, *end;
|
||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||
@ -105,7 +107,8 @@ int test_mov_inv_random(int my_cpu)
|
||||
}
|
||||
test_addr[my_cpu] = (uintptr_t)p;
|
||||
do {
|
||||
testword_t expect = random(my_cpu) ^ invert;
|
||||
prsg_state = prsg(prsg_state);
|
||||
testword_t expect = prsg_state ^ invert;
|
||||
testword_t actual = read_word(p);
|
||||
if (unlikely(actual != expect)) {
|
||||
data_error(p, expect, actual, true);
|
||||
|
@ -25,71 +25,10 @@
|
||||
|
||||
#include "test_helper.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Types
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// We keep a separate LFSR for each CPU. Space them out by at least a cache line,
|
||||
// otherwise performance suffers.
|
||||
|
||||
typedef struct {
|
||||
uint64_t lfsr;
|
||||
uint64_t pad[7];
|
||||
} prsg_state_t;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Variables
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static prsg_state_t prsg_state[MAX_CPUS];
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static inline uint32_t prsg(int my_cpu)
|
||||
{
|
||||
// This implements a 64 bit linear feedback shift register with XNOR
|
||||
// feedback from taps 64, 63, 61, 60. It generates 32 new bits each
|
||||
// time the function is called. Because the feedback taps are all in
|
||||
// the upper 32 bits, we can generate the new bits in parallel.
|
||||
|
||||
uint64_t lfsr = prsg_state[my_cpu].lfsr;
|
||||
uint32_t feedback = ~((lfsr >> 32) ^ (lfsr >> 31) ^ (lfsr >> 29) ^ (lfsr >> 28));
|
||||
prsg_state[my_cpu].lfsr = (lfsr << 32) | feedback;
|
||||
return feedback;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Public Functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
void random_seed(int my_cpu, uint64_t seed)
|
||||
{
|
||||
if (my_cpu < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Avoid the PRSG illegal state.
|
||||
if (~seed == 0) {
|
||||
seed = 0;
|
||||
}
|
||||
prsg_state[my_cpu].lfsr = seed;
|
||||
}
|
||||
|
||||
testword_t random(int my_cpu)
|
||||
{
|
||||
if (my_cpu < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
testword_t value = prsg(my_cpu);
|
||||
#if TESTWORD_WIDTH > 32
|
||||
value = value << 32 | prsg(my_cpu);
|
||||
#endif
|
||||
return value;
|
||||
}
|
||||
|
||||
void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segment, size_t chunk_align)
|
||||
{
|
||||
if (my_cpu < 0) {
|
||||
|
@ -63,16 +63,23 @@ static inline uintptr_t round_up(uintptr_t value, size_t align_size)
|
||||
}
|
||||
|
||||
/**
|
||||
* Seeds the psuedo-random number generator for my_cpu.
|
||||
* Returns the next word in a pseudo-random sequence where state was the
|
||||
* previous word in that sequence.
|
||||
*/
|
||||
void random_seed(int my_cpu, uint64_t seed);
|
||||
|
||||
/**
|
||||
* Returns a psuedo-random number for my_cpu. The sequence of numbers returned
|
||||
* is repeatable for a given starting seed. The sequence repeats after 2^64 - 1
|
||||
* numbers. Within that period, no number is repeated.
|
||||
*/
|
||||
testword_t random(int my_cpu);
|
||||
static inline testword_t prsg(testword_t state)
|
||||
{
|
||||
// This uses the algorithms described at https://en.wikipedia.org/wiki/Xorshift
|
||||
#ifdef __x86_64__
|
||||
state ^= state << 13;
|
||||
state ^= state >> 7;
|
||||
state ^= state << 17;
|
||||
#else
|
||||
state ^= state << 13;
|
||||
state ^= state >> 17;
|
||||
state ^= state << 5;
|
||||
#endif
|
||||
return state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the start and end word address for the chunk of segment that is
|
||||
|
@ -106,6 +106,8 @@ int run_test(int my_cpu, int test, int stage, int iterations)
|
||||
}
|
||||
BARRIER;
|
||||
|
||||
testword_t prsg_state;
|
||||
|
||||
int ticks = 0;
|
||||
|
||||
switch (test) {
|
||||
@ -168,12 +170,16 @@ int run_test(int my_cpu, int test, int stage, int iterations)
|
||||
// Moving inversions, fixed random pattern.
|
||||
case 5:
|
||||
if (cpuid_info.flags.rdtsc) {
|
||||
random_seed(my_cpu, get_tsc());
|
||||
prsg_state = get_tsc();
|
||||
} else {
|
||||
random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num));
|
||||
prsg_state = 1 + pass_num;
|
||||
}
|
||||
prsg_state *= 0x12345678;
|
||||
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
testword_t pattern1 = random(my_cpu);
|
||||
prsg_state = prsg(prsg_state);
|
||||
|
||||
testword_t pattern1 = prsg_state;
|
||||
testword_t pattern2 = ~pattern1;
|
||||
|
||||
BARRIER;
|
||||
@ -213,13 +219,17 @@ int run_test(int my_cpu, int test, int stage, int iterations)
|
||||
// Modulo 20 check, fixed random pattern.
|
||||
case 9:
|
||||
if (cpuid_info.flags.rdtsc) {
|
||||
random_seed(my_cpu, get_tsc());
|
||||
prsg_state = get_tsc();
|
||||
} else {
|
||||
random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num));
|
||||
prsg_state = 1 + pass_num;
|
||||
}
|
||||
prsg_state *= 0x87654321;
|
||||
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
for (int offset = 0; offset < MODULO_N; offset++) {
|
||||
testword_t pattern1 = random(my_cpu);
|
||||
prsg_state = prsg(prsg_state);
|
||||
|
||||
testword_t pattern1 = prsg_state;
|
||||
testword_t pattern2 = ~pattern1;
|
||||
|
||||
BARRIER;
|
||||
|
Loading…
Reference in New Issue
Block a user