BROKEN Testing code for nontemporal writes in other tests, which usually slows down tests, sometimes a lot, especially when SMT is enabled !

This commit is contained in:
Lionel Debroux 2022-12-11 19:10:36 +01:00
parent a373e97175
commit 20f05d5658
5 changed files with 233 additions and 87 deletions

View File

@ -19,6 +19,8 @@
#include "display.h" #include "display.h"
#include "error.h" #include "error.h"
#include "test.h" #include "test.h"
#include "config.h"
#include "cpuid.h"
#include "test_funcs.h" #include "test_funcs.h"
#include "test_helper.h" #include "test_helper.h"
@ -59,26 +61,52 @@ int test_block_move(int my_cpu, int iterations)
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
testword_t pattern1 = 1; testword_t pattern1 = 1;
do { // Nontemporal stores seem to be mostly bad for performance here.
testword_t pattern2 = ~pattern1; if (enable_nontemporal && cpuid_info.flags.sse2) {
write_word(p + 0, pattern1); do {
write_word(p + 1, pattern1); testword_t pattern2 = ~pattern1;
write_word(p + 2, pattern1); write_word_nt(p + 0, pattern1);
write_word(p + 3, pattern1); write_word_nt(p + 1, pattern1);
write_word(p + 4, pattern2); write_word_nt(p + 2, pattern1);
write_word(p + 5, pattern2); write_word_nt(p + 3, pattern1);
write_word(p + 6, pattern1); write_word_nt(p + 4, pattern2);
write_word(p + 7, pattern1); write_word_nt(p + 5, pattern2);
write_word(p + 8, pattern1); write_word_nt(p + 6, pattern1);
write_word(p + 9, pattern1); write_word_nt(p + 7, pattern1);
write_word(p + 10, pattern2); write_word_nt(p + 8, pattern1);
write_word(p + 11, pattern2); write_word_nt(p + 9, pattern1);
write_word(p + 12, pattern1); write_word_nt(p + 10, pattern2);
write_word(p + 13, pattern1); write_word_nt(p + 11, pattern2);
write_word(p + 14, pattern2); write_word_nt(p + 12, pattern1);
write_word(p + 15, pattern2); write_word_nt(p + 13, pattern1);
pattern1 = pattern1 << 1 | pattern1 >> (TESTWORD_WIDTH - 1); // rotate left write_word_nt(p + 14, pattern2);
} while (p <= (pe - 16) && (p += 16)); // test before increment in case pointer overflows write_word_nt(p + 15, pattern2);
pattern1 = pattern1 << 1 | pattern1 >> (TESTWORD_WIDTH - 1); // rotate left
} while (p <= (pe - 16) && (p += 16)); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
testword_t pattern2 = ~pattern1;
write_word(p + 0, pattern1);
write_word(p + 1, pattern1);
write_word(p + 2, pattern1);
write_word(p + 3, pattern1);
write_word(p + 4, pattern2);
write_word(p + 5, pattern2);
write_word(p + 6, pattern1);
write_word(p + 7, pattern1);
write_word(p + 8, pattern1);
write_word(p + 9, pattern1);
write_word(p + 10, pattern2);
write_word(p + 11, pattern2);
write_word(p + 12, pattern1);
write_word(p + 13, pattern1);
write_word(p + 14, pattern2);
write_word(p + 15, pattern2);
pattern1 = pattern1 << 1 | pattern1 >> (TESTWORD_WIDTH - 1); // rotate left
} while (p <= (pe - 16) && (p += 16)); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point

View File

@ -19,6 +19,8 @@
#include "display.h" #include "display.h"
#include "error.h" #include "error.h"
#include "test.h" #include "test.h"
#include "config.h"
#include "cpuid.h"
#include "test_funcs.h" #include "test_funcs.h"
#include "test_helper.h" #include "test_helper.h"
@ -59,9 +61,18 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { // Nontemporal stores seem to be bad for performance here.
write_word(p, pattern1); if (enable_nontemporal && cpuid_info.flags.sse2) {
} while (p <= (pe - n) && (p += n)); // test before increment in case pointer overflows do {
write_word_nt(p, pattern1);
} while (p <= (pe - n) && (p += n)); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
write_word(p, pattern1);
} while (p <= (pe - n) && (p += n)); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point
@ -92,15 +103,30 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { // Nontemporal stores seem to be bad for performance here.
if (k != offset) { if (enable_nontemporal && cpuid_info.flags.sse2) {
write_word(p, pattern2); do {
} if (k != offset) {
k++; write_word_nt(p, pattern2);
if (k == n) { }
k = 0; k++;
} if (k == n) {
} while (p++ < pe); // test before increment in case pointer overflows k = 0;
}
} while (p++ < pe); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
if (k != offset) {
write_word(p, pattern2);
}
k++;
if (k == n) {
k = 0;
}
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point

View File

@ -19,6 +19,8 @@
#include "display.h" #include "display.h"
#include "error.h" #include "error.h"
#include "test.h" #include "test.h"
#include "config.h"
#include "cpuid.h"
#include "test_funcs.h" #include "test_funcs.h"
#include "test_helper.h" #include "test_helper.h"
@ -119,13 +121,26 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { // Nontemporal stores seem to be bad for performance here.
testword_t actual = read_word(p); if (enable_nontemporal && cpuid_info.flags.sse2) {
if (unlikely(actual != pattern1)) { do {
data_error(p, pattern1, actual, true); testword_t actual = read_word(p);
} if (unlikely(actual != pattern1)) {
write_word(p, pattern2); data_error(p, pattern1, actual, true);
} while (p++ < pe); // test before increment in case pointer overflows }
write_word_nt(p, pattern2);
} while (p++ < pe); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
testword_t actual = read_word(p);
if (unlikely(actual != pattern1)) {
data_error(p, pattern1, actual, true);
}
write_word(p, pattern2);
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point
@ -155,13 +170,26 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { // Nontemporal stores seem to be bad for performance here.
testword_t actual = read_word(p); if (enable_nontemporal && cpuid_info.flags.sse2) {
if (unlikely(actual != pattern2)) { do {
data_error(p, pattern2, actual, true); testword_t actual = read_word(p);
} if (unlikely(actual != pattern2)) {
write_word(p, pattern1); data_error(p, pattern2, actual, true);
} while (p-- > ps); // test before decrement in case pointer overflows }
write_word_nt(p, pattern1);
} while (p-- > ps); // test before decrement in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
testword_t actual = read_word(p);
if (unlikely(actual != pattern2)) {
data_error(p, pattern2, actual, true);
}
write_word(p, pattern1);
} while (p-- > ps); // test before decrement in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_start && --ps); // advance ps to next start point } while (!at_start && --ps); // advance ps to next start point

View File

@ -16,12 +16,12 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include "cpuid.h"
#include "tsc.h"
#include "display.h" #include "display.h"
#include "error.h" #include "error.h"
#include "test.h" #include "test.h"
#include "config.h"
#include "cpuid.h"
#include "tsc.h"
#include "test_funcs.h" #include "test_funcs.h"
#include "test_helper.h" #include "test_helper.h"
@ -70,10 +70,19 @@ int test_mov_inv_random(int my_cpu)
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { if (enable_nontemporal && cpuid_info.flags.sse2) {
prsg_state = prsg(prsg_state); do {
write_word(p, prsg_state); prsg_state = prsg(prsg_state);
} while (p++ < pe); // test before increment in case pointer overflows write_word_nt(p, prsg_state);
} while (p++ < pe); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
prsg_state = prsg(prsg_state);
write_word(p, prsg_state);
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point
@ -108,15 +117,29 @@ int test_mov_inv_random(int my_cpu)
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { if (enable_nontemporal && cpuid_info.flags.sse2) {
prsg_state = prsg(prsg_state); do {
testword_t expect = prsg_state ^ invert; prsg_state = prsg(prsg_state);
testword_t actual = read_word(p); testword_t expect = prsg_state ^ invert;
if (unlikely(actual != expect)) { testword_t actual = read_word(p);
data_error(p, expect, actual, true); if (unlikely(actual != expect)) {
} data_error(p, expect, actual, true);
write_word(p, ~expect); }
} while (p++ < pe); // test before increment in case pointer overflows write_word_nt(p, ~expect);
} while (p++ < pe); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
prsg_state = prsg(prsg_state);
testword_t expect = prsg_state ^ invert;
testword_t actual = read_word(p);
if (unlikely(actual != expect)) {
data_error(p, expect, actual, true);
}
write_word(p, ~expect);
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point

View File

@ -19,6 +19,8 @@
#include "display.h" #include "display.h"
#include "error.h" #include "error.h"
#include "test.h" #include "test.h"
#include "config.h"
#include "cpuid.h"
#include "test_funcs.h" #include "test_funcs.h"
#include "test_helper.h" #include "test_helper.h"
@ -31,10 +33,10 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
{ {
int ticks = 0; int ticks = 0;
testword_t pattern = (testword_t)1 << offset; testword_t pattern = inverse ? ~((testword_t)1 << offset) : (testword_t)1 << offset;
if (my_cpu == master_cpu) { if (my_cpu == master_cpu) {
display_test_pattern_value(inverse ? ~pattern : pattern); display_test_pattern_value(pattern);
} }
// Initialize memory with the initial pattern. // Initialize memory with the initial pattern.
@ -60,10 +62,19 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { if (enable_nontemporal && cpuid_info.flags.sse2) {
write_word(p, inverse ? ~pattern : pattern); do {
pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left write_word_nt(p, pattern);
} while (p++ < pe); // test before increment in case pointer overflows pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left
} while (p++ < pe); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
write_word(p, pattern);
pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point
@ -72,7 +83,7 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
// Check for initial pattern and then write the complement for each memory location. // Check for initial pattern and then write the complement for each memory location.
// Test from bottom up and then from the top down. // Test from bottom up and then from the top down.
for (int i = 0; i < iterations; i++) { for (int i = 0; i < iterations; i++) {
pattern = (testword_t)1 << offset; pattern = inverse ? ~((testword_t)1 << offset) : (testword_t)1 << offset;
flush_caches(my_cpu); flush_caches(my_cpu);
@ -98,20 +109,36 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)p; test_addr[my_cpu] = (uintptr_t)p;
do { if (enable_nontemporal && cpuid_info.flags.sse2) {
testword_t expect = inverse ? ~pattern : pattern; do {
testword_t actual = read_word(p); testword_t expect = pattern;
if (unlikely(actual != expect)) { testword_t actual = read_word(p);
data_error(p, expect, actual, true); if (unlikely(actual != expect)) {
} data_error(p, expect, actual, true);
write_word(p, ~expect); }
pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left write_word_nt(p, ~expect);
} while (p++ < pe); // test before increment in case pointer overflows pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left
} while (p++ < pe); // test before increment in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
testword_t expect = pattern;
testword_t actual = read_word(p);
if (unlikely(actual != expect)) {
data_error(p, expect, actual, true);
}
write_word(p, ~expect);
pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left
} while (p++ < pe); // test before increment in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_end && ++pe); // advance pe to next start point } while (!at_end && ++pe); // advance pe to next start point
} }
pattern = ~pattern;
flush_caches(my_cpu); flush_caches(my_cpu);
for (int j = vm_map_size - 1; j >= 0; j--) { for (int j = vm_map_size - 1; j >= 0; j--) {
@ -136,15 +163,29 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse)
continue; continue;
} }
test_addr[my_cpu] = (uintptr_t)ps; test_addr[my_cpu] = (uintptr_t)ps;
do { if (enable_nontemporal && cpuid_info.flags.sse2) {
pattern = pattern >> 1 | pattern << (TESTWORD_WIDTH - 1); // rotate right do {
testword_t expect = inverse ? pattern : ~pattern; pattern = pattern >> 1 | pattern << (TESTWORD_WIDTH - 1); // rotate right
testword_t actual = read_word(p); testword_t expect = pattern;
if (unlikely(actual != expect)) { testword_t actual = read_word(p);
data_error(p, expect, actual, true); if (unlikely(actual != expect)) {
} data_error(p, expect, actual, true);
write_word(p, ~expect); }
} while (p-- > ps); // test before decrement in case pointer overflows write_word_nt(p, ~expect);
} while (p-- > ps); // test before decrement in case pointer overflows
__asm__ __volatile__ ("mfence");
}
else {
do {
pattern = pattern >> 1 | pattern << (TESTWORD_WIDTH - 1); // rotate right
testword_t expect = pattern;
testword_t actual = read_word(p);
if (unlikely(actual != expect)) {
data_error(p, expect, actual, true);
}
write_word(p, ~expect);
} while (p-- > ps); // test before decrement in case pointer overflows
}
do_tick(my_cpu); do_tick(my_cpu);
BAILOUT; BAILOUT;
} while (!at_start && --ps); // advance ps to next start point } while (!at_start && --ps); // advance ps to next start point