Significantly optimize the bit fade and own addr tests for size, by folding near-identical switch case bodies together, and removing code duplication by merging pattern_fill() and pattern_check(). Also, add a rep stos[lq] path in the bit fade test.

Before:
   text    data     bss     dec     hex filename
   1581       4       0    1585     631 tests/bit_fade.o
   1236       0       0    1236     4d4 tests/own_addr.o

After:
   text    data     bss     dec     hex filename
   1013       4       0    1017     3f9 tests/bit_fade.o
    787       0       0     787     313 tests/own_addr.o
This commit is contained in:
Lionel Debroux 2023-10-11 23:37:53 +02:00
parent 46dc67a795
commit 011fb7e905
4 changed files with 71 additions and 42 deletions

View File

@ -24,11 +24,13 @@
#include "test_funcs.h"
#include "test_helper.h"
#define HAND_OPTIMISED 1 // Use hand-optimised assembler code for performance.
//------------------------------------------------------------------------------
// Private Functions
//------------------------------------------------------------------------------
static int pattern_fill(int my_cpu, testword_t pattern)
static int pattern_fill_check(int my_cpu, testword_t pattern, bool check)
{
int ticks = 0;
@ -57,9 +59,43 @@ static int pattern_fill(int my_cpu, testword_t pattern)
continue;
}
test_addr[my_cpu] = (uintptr_t)p;
do {
write_word(p, pattern);
} while (p++ < pe); // test before increment in case pointer overflows
if (!check) {
#if HAND_OPTIMISED
#ifdef __x86_64__
uint64_t length = pe - p + 1;
__asm__ __volatile__ ("\t"
"rep \n\t"
"stosq \n\t"
:
: "c" (length), "D" (p), "a" (pattern)
:
);
p = pe;
#else
uint32_t length = pe - p + 1;
__asm__ __volatile__ ("\t"
"rep \n\t"
"stosl \n\t"
:
: "c" (length), "D" (p), "a" (pattern)
:
);
p = pe;
#endif
#else
do {
write_word(p, pattern);
} while (p++ < pe); // test before increment in case pointer overflows
#endif
}
else {
do {
testword_t actual = read_word(p);
if (unlikely(actual != pattern)) {
data_error(p, pattern, actual, true);
}
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu);
BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point
@ -70,7 +106,7 @@ static int pattern_fill(int my_cpu, testword_t pattern)
return ticks;
}
static int pattern_check(int my_cpu, testword_t pattern)
/*static int pattern_check(int my_cpu, testword_t pattern)
{
int ticks = 0;
@ -107,7 +143,7 @@ static int pattern_check(int my_cpu, testword_t pattern)
}
return ticks;
}
}*/
static int fade_delay(int my_cpu, int sleep_secs)
{
@ -143,9 +179,13 @@ int test_bit_fade(int my_cpu, int stage, int sleep_secs)
int ticks = 0;
testword_t pattern = stage < 3 ? all_zero : all_ones;
stage %= 3;
switch (stage) {
case 0:
ticks = pattern_fill(my_cpu, all_zero);
ticks = pattern_fill_check(my_cpu, pattern, false);
break;
case 1:
// Only sleep once.
@ -154,19 +194,7 @@ int test_bit_fade(int my_cpu, int stage, int sleep_secs)
}
break;
case 2:
ticks = pattern_check(my_cpu, all_zero);
break;
case 3:
ticks = pattern_fill(my_cpu, all_ones);
break;
case 4:
// Only sleep once.
if (stage != last_stage) {
ticks = fade_delay(my_cpu, sleep_secs);
}
break;
case 5:
ticks = pattern_check(my_cpu, all_ones);
ticks = pattern_fill_check(my_cpu, pattern, true);
break;
default:
break;

View File

@ -28,7 +28,7 @@
// Private Functions
//------------------------------------------------------------------------------
static int pattern_fill(int my_cpu, testword_t offset)
static int __attribute__((noclone)) pattern_fill_check(int my_cpu, testword_t offset, bool check)
{
int ticks = 0;
@ -58,9 +58,20 @@ static int pattern_fill(int my_cpu, testword_t offset)
continue;
}
test_addr[my_cpu] = (uintptr_t)p;
do {
write_word(p, (testword_t)p + offset);
} while (p++ < pe); // test before increment in case pointer overflows
if (!check) {
do {
write_word(p, (testword_t)p + offset);
} while (p++ < pe); // test before increment in case pointer overflows
}
else {
do {
testword_t expect = (testword_t)p + offset;
testword_t actual = read_word(p);
if (unlikely(actual != expect)) {
data_error(p, expect, actual, true);
}
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu);
BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point
@ -71,7 +82,7 @@ static int pattern_fill(int my_cpu, testword_t offset)
return ticks;
}
static int pattern_check(int my_cpu, testword_t offset)
/*static int pattern_check(int my_cpu, testword_t offset)
{
int ticks = 0;
@ -110,18 +121,17 @@ static int pattern_check(int my_cpu, testword_t offset)
}
return ticks;
}
}*/
//------------------------------------------------------------------------------
// Public Functions
//------------------------------------------------------------------------------
int test_own_addr1(int my_cpu)
int test_own_addr1(int my_cpu, int stage)
{
int ticks = 0;
ticks += pattern_fill(my_cpu, 0);
ticks += pattern_check(my_cpu, 0);
ticks += pattern_fill_check(my_cpu, 0, !!stage);
return ticks;
}
@ -143,16 +153,7 @@ int test_own_addr2(int my_cpu, int stage)
offset /= VM_WINDOW_SIZE;
#endif
switch (stage) {
case 0:
ticks = pattern_fill(my_cpu, offset);
break;
case 1:
ticks = pattern_check(my_cpu, offset);
break;
default:
break;
}
ticks = pattern_fill_check(my_cpu, offset, !!stage);
return ticks;
}

View File

@ -17,7 +17,7 @@
int test_addr_walk1(int my_cpu);
int test_own_addr1(int my_cpu);
int test_own_addr1(int my_cpu, int stage);
int test_own_addr2(int my_cpu, int stage);

View File

@ -51,7 +51,7 @@
test_pattern_t test_list[NUM_TEST_PATTERNS] = {
// ena, cpu, stgs, itrs, errs, description
{ true, SEQ, 1, 6, 0, "[Address test, walking ones, no cache] "},
{false, SEQ, 1, 6, 0, "[Address test, own address in window] "},
{false, SEQ, 2, 6, 0, "[Address test, own address in window] "},
{ true, SEQ, 2, 6, 0, "[Address test, own address + window] "},
{ true, PAR, 1, 6, 0, "[Moving inversions, 1s & 0s] "},
{ true, PAR, 1, 3, 0, "[Moving inversions, 8 bit pattern] "},
@ -124,7 +124,7 @@ int run_test(int my_cpu, int test, int stage, int iterations)
// Address test, own address in window.
case 1:
ticks += test_own_addr1(my_cpu);
ticks += test_own_addr1(my_cpu, stage);
BAILOUT;
break;