mirror of
https://github.com/memtest86plus/memtest86plus.git
synced 2025-02-20 11:38:25 -06:00
Improve efficiency of random number generation (discussion #8).
Use a more efficient algorithm that can be in-lined, and keep the generator state in a local variable.
This commit is contained in:
parent
5e2ab9289b
commit
e92f488753
@ -34,19 +34,20 @@ int test_mov_inv_random(int my_cpu)
|
|||||||
{
|
{
|
||||||
int ticks = 0;
|
int ticks = 0;
|
||||||
|
|
||||||
uint64_t seed;
|
testword_t seed;
|
||||||
if (cpuid_info.flags.rdtsc) {
|
if (cpuid_info.flags.rdtsc) {
|
||||||
seed = get_tsc();
|
seed = get_tsc();
|
||||||
} else {
|
} else {
|
||||||
seed = UINT64_C(0x12345678) * (1 + pass_num);
|
seed = 1 + pass_num;
|
||||||
}
|
}
|
||||||
|
seed *= 0x87654321;
|
||||||
|
|
||||||
if (my_cpu == master_cpu) {
|
if (my_cpu == master_cpu) {
|
||||||
display_test_pattern_value(seed);
|
display_test_pattern_value(seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize memory with the initial pattern.
|
// Initialize memory with the initial pattern.
|
||||||
random_seed(my_cpu, seed);
|
testword_t prsg_state = seed;
|
||||||
for (int i = 0; i < vm_map_size; i++) {
|
for (int i = 0; i < vm_map_size; i++) {
|
||||||
testword_t *start, *end;
|
testword_t *start, *end;
|
||||||
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
|
||||||
@ -69,7 +70,8 @@ int test_mov_inv_random(int my_cpu)
|
|||||||
}
|
}
|
||||||
test_addr[my_cpu] = (uintptr_t)p;
|
test_addr[my_cpu] = (uintptr_t)p;
|
||||||
do {
|
do {
|
||||||
write_word(p, random(my_cpu));
|
prsg_state = prsg(prsg_state);
|
||||||
|
write_word(p, prsg_state);
|
||||||
} while (p++ < pe); // test before increment in case pointer overflows
|
} while (p++ < pe); // test before increment in case pointer overflows
|
||||||
do_tick(my_cpu);
|
do_tick(my_cpu);
|
||||||
BAILOUT;
|
BAILOUT;
|
||||||
@ -82,7 +84,7 @@ int test_mov_inv_random(int my_cpu)
|
|||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
flush_caches(my_cpu);
|
flush_caches(my_cpu);
|
||||||
|
|
||||||
random_seed(my_cpu, seed);
|
prsg_state = seed;
|
||||||
for (int j = 0; j < vm_map_size; j++) {
|
for (int j = 0; j < vm_map_size; j++) {
|
||||||
testword_t *start, *end;
|
testword_t *start, *end;
|
||||||
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
|
||||||
@ -105,7 +107,8 @@ int test_mov_inv_random(int my_cpu)
|
|||||||
}
|
}
|
||||||
test_addr[my_cpu] = (uintptr_t)p;
|
test_addr[my_cpu] = (uintptr_t)p;
|
||||||
do {
|
do {
|
||||||
testword_t expect = random(my_cpu) ^ invert;
|
prsg_state = prsg(prsg_state);
|
||||||
|
testword_t expect = prsg_state ^ invert;
|
||||||
testword_t actual = read_word(p);
|
testword_t actual = read_word(p);
|
||||||
if (unlikely(actual != expect)) {
|
if (unlikely(actual != expect)) {
|
||||||
data_error(p, expect, actual, true);
|
data_error(p, expect, actual, true);
|
||||||
|
@ -25,71 +25,10 @@
|
|||||||
|
|
||||||
#include "test_helper.h"
|
#include "test_helper.h"
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Types
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// We keep a separate LFSR for each CPU. Space them out by at least a cache line,
|
|
||||||
// otherwise performance suffers.
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint64_t lfsr;
|
|
||||||
uint64_t pad[7];
|
|
||||||
} prsg_state_t;
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Private Variables
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
static prsg_state_t prsg_state[MAX_CPUS];
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Private Functions
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
static inline uint32_t prsg(int my_cpu)
|
|
||||||
{
|
|
||||||
// This implements a 64 bit linear feedback shift register with XNOR
|
|
||||||
// feedback from taps 64, 63, 61, 60. It generates 32 new bits each
|
|
||||||
// time the function is called. Because the feedback taps are all in
|
|
||||||
// the upper 32 bits, we can generate the new bits in parallel.
|
|
||||||
|
|
||||||
uint64_t lfsr = prsg_state[my_cpu].lfsr;
|
|
||||||
uint32_t feedback = ~((lfsr >> 32) ^ (lfsr >> 31) ^ (lfsr >> 29) ^ (lfsr >> 28));
|
|
||||||
prsg_state[my_cpu].lfsr = (lfsr << 32) | feedback;
|
|
||||||
return feedback;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Public Functions
|
// Public Functions
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
void random_seed(int my_cpu, uint64_t seed)
|
|
||||||
{
|
|
||||||
if (my_cpu < 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Avoid the PRSG illegal state.
|
|
||||||
if (~seed == 0) {
|
|
||||||
seed = 0;
|
|
||||||
}
|
|
||||||
prsg_state[my_cpu].lfsr = seed;
|
|
||||||
}
|
|
||||||
|
|
||||||
testword_t random(int my_cpu)
|
|
||||||
{
|
|
||||||
if (my_cpu < 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
testword_t value = prsg(my_cpu);
|
|
||||||
#if TESTWORD_WIDTH > 32
|
|
||||||
value = value << 32 | prsg(my_cpu);
|
|
||||||
#endif
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segment, size_t chunk_align)
|
void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segment, size_t chunk_align)
|
||||||
{
|
{
|
||||||
if (my_cpu < 0) {
|
if (my_cpu < 0) {
|
||||||
|
@ -63,16 +63,23 @@ static inline uintptr_t round_up(uintptr_t value, size_t align_size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Seeds the psuedo-random number generator for my_cpu.
|
* Returns the next word in a pseudo-random sequence where state was the
|
||||||
|
* previous word in that sequence.
|
||||||
*/
|
*/
|
||||||
void random_seed(int my_cpu, uint64_t seed);
|
static inline testword_t prsg(testword_t state)
|
||||||
|
{
|
||||||
/**
|
// This uses the algorithms described at https://en.wikipedia.org/wiki/Xorshift
|
||||||
* Returns a psuedo-random number for my_cpu. The sequence of numbers returned
|
#ifdef __x86_64__
|
||||||
* is repeatable for a given starting seed. The sequence repeats after 2^64 - 1
|
state ^= state << 13;
|
||||||
* numbers. Within that period, no number is repeated.
|
state ^= state >> 7;
|
||||||
*/
|
state ^= state << 17;
|
||||||
testword_t random(int my_cpu);
|
#else
|
||||||
|
state ^= state << 13;
|
||||||
|
state ^= state >> 17;
|
||||||
|
state ^= state << 5;
|
||||||
|
#endif
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the start and end word address for the chunk of segment that is
|
* Calculates the start and end word address for the chunk of segment that is
|
||||||
|
@ -106,6 +106,8 @@ int run_test(int my_cpu, int test, int stage, int iterations)
|
|||||||
}
|
}
|
||||||
BARRIER;
|
BARRIER;
|
||||||
|
|
||||||
|
testword_t prsg_state;
|
||||||
|
|
||||||
int ticks = 0;
|
int ticks = 0;
|
||||||
|
|
||||||
switch (test) {
|
switch (test) {
|
||||||
@ -168,12 +170,16 @@ int run_test(int my_cpu, int test, int stage, int iterations)
|
|||||||
// Moving inversions, fixed random pattern.
|
// Moving inversions, fixed random pattern.
|
||||||
case 5:
|
case 5:
|
||||||
if (cpuid_info.flags.rdtsc) {
|
if (cpuid_info.flags.rdtsc) {
|
||||||
random_seed(my_cpu, get_tsc());
|
prsg_state = get_tsc();
|
||||||
} else {
|
} else {
|
||||||
random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num));
|
prsg_state = 1 + pass_num;
|
||||||
}
|
}
|
||||||
|
prsg_state *= 0x12345678;
|
||||||
|
|
||||||
for (int i = 0; i < iterations; i++) {
|
for (int i = 0; i < iterations; i++) {
|
||||||
testword_t pattern1 = random(my_cpu);
|
prsg_state = prsg(prsg_state);
|
||||||
|
|
||||||
|
testword_t pattern1 = prsg_state;
|
||||||
testword_t pattern2 = ~pattern1;
|
testword_t pattern2 = ~pattern1;
|
||||||
|
|
||||||
BARRIER;
|
BARRIER;
|
||||||
@ -213,13 +219,17 @@ int run_test(int my_cpu, int test, int stage, int iterations)
|
|||||||
// Modulo 20 check, fixed random pattern.
|
// Modulo 20 check, fixed random pattern.
|
||||||
case 9:
|
case 9:
|
||||||
if (cpuid_info.flags.rdtsc) {
|
if (cpuid_info.flags.rdtsc) {
|
||||||
random_seed(my_cpu, get_tsc());
|
prsg_state = get_tsc();
|
||||||
} else {
|
} else {
|
||||||
random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num));
|
prsg_state = 1 + pass_num;
|
||||||
}
|
}
|
||||||
|
prsg_state *= 0x87654321;
|
||||||
|
|
||||||
for (int i = 0; i < iterations; i++) {
|
for (int i = 0; i < iterations; i++) {
|
||||||
for (int offset = 0; offset < MODULO_N; offset++) {
|
for (int offset = 0; offset < MODULO_N; offset++) {
|
||||||
testword_t pattern1 = random(my_cpu);
|
prsg_state = prsg(prsg_state);
|
||||||
|
|
||||||
|
testword_t pattern1 = prsg_state;
|
||||||
testword_t pattern2 = ~pattern1;
|
testword_t pattern2 = ~pattern1;
|
||||||
|
|
||||||
BARRIER;
|
BARRIER;
|
||||||
|
Loading…
Reference in New Issue
Block a user