diff --git a/tests/block_move.c b/tests/block_move.c index 4dbafeb..4bed2b3 100644 --- a/tests/block_move.c +++ b/tests/block_move.c @@ -19,6 +19,8 @@ #include "display.h" #include "error.h" #include "test.h" +#include "config.h" +#include "cpuid.h" #include "test_funcs.h" #include "test_helper.h" @@ -59,26 +61,52 @@ int test_block_move(int my_cpu, int iterations) } test_addr[my_cpu] = (uintptr_t)p; testword_t pattern1 = 1; - do { - testword_t pattern2 = ~pattern1; - write_word(p + 0, pattern1); - write_word(p + 1, pattern1); - write_word(p + 2, pattern1); - write_word(p + 3, pattern1); - write_word(p + 4, pattern2); - write_word(p + 5, pattern2); - write_word(p + 6, pattern1); - write_word(p + 7, pattern1); - write_word(p + 8, pattern1); - write_word(p + 9, pattern1); - write_word(p + 10, pattern2); - write_word(p + 11, pattern2); - write_word(p + 12, pattern1); - write_word(p + 13, pattern1); - write_word(p + 14, pattern2); - write_word(p + 15, pattern2); - pattern1 = pattern1 << 1 | pattern1 >> (TESTWORD_WIDTH - 1); // rotate left - } while (p <= (pe - 16) && (p += 16)); // test before increment in case pointer overflows + // Nontemporal stores seem to be mostly bad for performance here. + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + testword_t pattern2 = ~pattern1; + write_word_nt(p + 0, pattern1); + write_word_nt(p + 1, pattern1); + write_word_nt(p + 2, pattern1); + write_word_nt(p + 3, pattern1); + write_word_nt(p + 4, pattern2); + write_word_nt(p + 5, pattern2); + write_word_nt(p + 6, pattern1); + write_word_nt(p + 7, pattern1); + write_word_nt(p + 8, pattern1); + write_word_nt(p + 9, pattern1); + write_word_nt(p + 10, pattern2); + write_word_nt(p + 11, pattern2); + write_word_nt(p + 12, pattern1); + write_word_nt(p + 13, pattern1); + write_word_nt(p + 14, pattern2); + write_word_nt(p + 15, pattern2); + pattern1 = pattern1 << 1 | pattern1 >> (TESTWORD_WIDTH - 1); // rotate left + } while (p <= (pe - 16) && (p += 16)); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + testword_t pattern2 = ~pattern1; + write_word(p + 0, pattern1); + write_word(p + 1, pattern1); + write_word(p + 2, pattern1); + write_word(p + 3, pattern1); + write_word(p + 4, pattern2); + write_word(p + 5, pattern2); + write_word(p + 6, pattern1); + write_word(p + 7, pattern1); + write_word(p + 8, pattern1); + write_word(p + 9, pattern1); + write_word(p + 10, pattern2); + write_word(p + 11, pattern2); + write_word(p + 12, pattern1); + write_word(p + 13, pattern1); + write_word(p + 14, pattern2); + write_word(p + 15, pattern2); + pattern1 = pattern1 << 1 | pattern1 >> (TESTWORD_WIDTH - 1); // rotate left + } while (p <= (pe - 16) && (p += 16)); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point diff --git a/tests/modulo_n.c b/tests/modulo_n.c index 00a1dda..4a580a7 100644 --- a/tests/modulo_n.c +++ b/tests/modulo_n.c @@ -19,6 +19,8 @@ #include "display.h" #include "error.h" #include "test.h" +#include "config.h" +#include "cpuid.h" #include "test_funcs.h" #include "test_helper.h" @@ -59,9 +61,18 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - write_word(p, pattern1); - } while (p <= (pe - n) && (p += n)); // test before increment in case pointer overflows + // Nontemporal stores seem to be bad for performance here. + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + write_word_nt(p, pattern1); + } while (p <= (pe - n) && (p += n)); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + write_word(p, pattern1); + } while (p <= (pe - n) && (p += n)); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point @@ -92,15 +103,30 @@ int test_modulo_n(int my_cpu, int iterations, testword_t pattern1, testword_t pa continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - if (k != offset) { - write_word(p, pattern2); - } - k++; - if (k == n) { - k = 0; - } - } while (p++ < pe); // test before increment in case pointer overflows + // Nontemporal stores seem to be bad for performance here. + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + if (k != offset) { + write_word_nt(p, pattern2); + } + k++; + if (k == n) { + k = 0; + } + } while (p++ < pe); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + if (k != offset) { + write_word(p, pattern2); + } + k++; + if (k == n) { + k = 0; + } + } while (p++ < pe); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point diff --git a/tests/mov_inv_fixed.c b/tests/mov_inv_fixed.c index 8320766..50b493e 100644 --- a/tests/mov_inv_fixed.c +++ b/tests/mov_inv_fixed.c @@ -19,6 +19,8 @@ #include "display.h" #include "error.h" #include "test.h" +#include "config.h" +#include "cpuid.h" #include "test_funcs.h" #include "test_helper.h" @@ -119,13 +121,26 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - testword_t actual = read_word(p); - if (unlikely(actual != pattern1)) { - data_error(p, pattern1, actual, true); - } - write_word(p, pattern2); - } while (p++ < pe); // test before increment in case pointer overflows + // Nontemporal stores seem to be bad for performance here. + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + testword_t actual = read_word(p); + if (unlikely(actual != pattern1)) { + data_error(p, pattern1, actual, true); + } + write_word_nt(p, pattern2); + } while (p++ < pe); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + testword_t actual = read_word(p); + if (unlikely(actual != pattern1)) { + data_error(p, pattern1, actual, true); + } + write_word(p, pattern2); + } while (p++ < pe); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point @@ -155,13 +170,26 @@ int test_mov_inv_fixed(int my_cpu, int iterations, testword_t pattern1, testword continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - testword_t actual = read_word(p); - if (unlikely(actual != pattern2)) { - data_error(p, pattern2, actual, true); - } - write_word(p, pattern1); - } while (p-- > ps); // test before decrement in case pointer overflows + // Nontemporal stores seem to be bad for performance here. + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + testword_t actual = read_word(p); + if (unlikely(actual != pattern2)) { + data_error(p, pattern2, actual, true); + } + write_word_nt(p, pattern1); + } while (p-- > ps); // test before decrement in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + testword_t actual = read_word(p); + if (unlikely(actual != pattern2)) { + data_error(p, pattern2, actual, true); + } + write_word(p, pattern1); + } while (p-- > ps); // test before decrement in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_start && --ps); // advance ps to next start point diff --git a/tests/mov_inv_random.c b/tests/mov_inv_random.c index d487c88..7e8833f 100644 --- a/tests/mov_inv_random.c +++ b/tests/mov_inv_random.c @@ -16,12 +16,12 @@ #include #include -#include "cpuid.h" -#include "tsc.h" - #include "display.h" #include "error.h" #include "test.h" +#include "config.h" +#include "cpuid.h" +#include "tsc.h" #include "test_funcs.h" #include "test_helper.h" @@ -70,10 +70,19 @@ int test_mov_inv_random(int my_cpu) continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - prsg_state = prsg(prsg_state); - write_word(p, prsg_state); - } while (p++ < pe); // test before increment in case pointer overflows + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + prsg_state = prsg(prsg_state); + write_word_nt(p, prsg_state); + } while (p++ < pe); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + prsg_state = prsg(prsg_state); + write_word(p, prsg_state); + } while (p++ < pe); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point @@ -108,15 +117,29 @@ int test_mov_inv_random(int my_cpu) continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - prsg_state = prsg(prsg_state); - testword_t expect = prsg_state ^ invert; - testword_t actual = read_word(p); - if (unlikely(actual != expect)) { - data_error(p, expect, actual, true); - } - write_word(p, ~expect); - } while (p++ < pe); // test before increment in case pointer overflows + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + prsg_state = prsg(prsg_state); + testword_t expect = prsg_state ^ invert; + testword_t actual = read_word(p); + if (unlikely(actual != expect)) { + data_error(p, expect, actual, true); + } + write_word_nt(p, ~expect); + } while (p++ < pe); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + prsg_state = prsg(prsg_state); + testword_t expect = prsg_state ^ invert; + testword_t actual = read_word(p); + if (unlikely(actual != expect)) { + data_error(p, expect, actual, true); + } + write_word(p, ~expect); + } while (p++ < pe); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point diff --git a/tests/mov_inv_walk1.c b/tests/mov_inv_walk1.c index 78ba7e5..a49693e 100644 --- a/tests/mov_inv_walk1.c +++ b/tests/mov_inv_walk1.c @@ -19,6 +19,8 @@ #include "display.h" #include "error.h" #include "test.h" +#include "config.h" +#include "cpuid.h" #include "test_funcs.h" #include "test_helper.h" @@ -31,10 +33,10 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse) { int ticks = 0; - testword_t pattern = (testword_t)1 << offset; + testword_t pattern = inverse ? ~((testword_t)1 << offset) : (testword_t)1 << offset; if (my_cpu == master_cpu) { - display_test_pattern_value(inverse ? ~pattern : pattern); + display_test_pattern_value(pattern); } // Initialize memory with the initial pattern. @@ -60,10 +62,19 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse) continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - write_word(p, inverse ? ~pattern : pattern); - pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left - } while (p++ < pe); // test before increment in case pointer overflows + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + write_word_nt(p, pattern); + pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left + } while (p++ < pe); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + write_word(p, pattern); + pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left + } while (p++ < pe); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point @@ -72,7 +83,7 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse) // Check for initial pattern and then write the complement for each memory location. // Test from bottom up and then from the top down. for (int i = 0; i < iterations; i++) { - pattern = (testword_t)1 << offset; + pattern = inverse ? ~((testword_t)1 << offset) : (testword_t)1 << offset; flush_caches(my_cpu); @@ -98,20 +109,36 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse) continue; } test_addr[my_cpu] = (uintptr_t)p; - do { - testword_t expect = inverse ? ~pattern : pattern; - testword_t actual = read_word(p); - if (unlikely(actual != expect)) { - data_error(p, expect, actual, true); - } - write_word(p, ~expect); - pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left - } while (p++ < pe); // test before increment in case pointer overflows + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + testword_t expect = pattern; + testword_t actual = read_word(p); + if (unlikely(actual != expect)) { + data_error(p, expect, actual, true); + } + write_word_nt(p, ~expect); + pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left + } while (p++ < pe); // test before increment in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + testword_t expect = pattern; + testword_t actual = read_word(p); + if (unlikely(actual != expect)) { + data_error(p, expect, actual, true); + } + write_word(p, ~expect); + pattern = pattern << 1 | pattern >> (TESTWORD_WIDTH - 1); // rotate left + } while (p++ < pe); // test before increment in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_end && ++pe); // advance pe to next start point } + pattern = ~pattern; + flush_caches(my_cpu); for (int j = vm_map_size - 1; j >= 0; j--) { @@ -136,15 +163,29 @@ int test_mov_inv_walk1(int my_cpu, int iterations, int offset, bool inverse) continue; } test_addr[my_cpu] = (uintptr_t)ps; - do { - pattern = pattern >> 1 | pattern << (TESTWORD_WIDTH - 1); // rotate right - testword_t expect = inverse ? pattern : ~pattern; - testword_t actual = read_word(p); - if (unlikely(actual != expect)) { - data_error(p, expect, actual, true); - } - write_word(p, ~expect); - } while (p-- > ps); // test before decrement in case pointer overflows + if (enable_nontemporal && cpuid_info.flags.sse2) { + do { + pattern = pattern >> 1 | pattern << (TESTWORD_WIDTH - 1); // rotate right + testword_t expect = pattern; + testword_t actual = read_word(p); + if (unlikely(actual != expect)) { + data_error(p, expect, actual, true); + } + write_word_nt(p, ~expect); + } while (p-- > ps); // test before decrement in case pointer overflows + __asm__ __volatile__ ("mfence"); + } + else { + do { + pattern = pattern >> 1 | pattern << (TESTWORD_WIDTH - 1); // rotate right + testword_t expect = pattern; + testword_t actual = read_word(p); + if (unlikely(actual != expect)) { + data_error(p, expect, actual, true); + } + write_word(p, ~expect); + } while (p-- > ps); // test before decrement in case pointer overflows + } do_tick(my_cpu); BAILOUT; } while (!at_start && --ps); // advance ps to next start point