diff --git a/system/cache.h b/system/cache.h
index 26fde06..69dd176 100644
--- a/system/cache.h
+++ b/system/cache.h
@@ -2,9 +2,9 @@
 #ifndef CACHE_H
 #define CACHE_H
 /*
- * Provides functions to enable/disable the CPU caches.
+ * Provides functions to enable, disable, and flush the CPU caches.
  *
- * Copyright (C) 2020 Martin Whitaker.
+ * Copyright (C) 2020-2021 Martin Whitaker.
  */
 
 /*
@@ -20,7 +20,7 @@ static inline void cache_off(void)
         "wbinvd                     \n"
         : /* no outputs */
         : /* no inputs */
-        : "rax"
+        : "rax", "memory"
     );
 #else
     __asm__ __volatile__ ("\t"
@@ -30,7 +30,7 @@ static inline void cache_off(void)
         "wbinvd                     \n"
         : /* no outputs */
         : /* no inputs */
-        : "eax"
+        : "eax", "memory"
     );
 #endif
 }
@@ -47,7 +47,7 @@ static inline void cache_on(void)
         "movq   %%rax, %%cr0        \n"
         : /* no outputs */
         : /* no inputs */
-        : "rax"
+        : "rax", "memory"
     );
 #else
     __asm__ __volatile__ ("\t"
@@ -56,9 +56,22 @@ static inline void cache_on(void)
         "movl   %%eax, %%cr0        \n"
         : /* no outputs */
         : /* no inputs */
-        : "eax"
+        : "eax", "memory"
     );
 #endif
 }
 
+/*
+ * Flush the CPU caches.
+ */
+static inline void cache_flush(void)
+{
+    __asm__ __volatile__ ("\t"
+        "wbinvd\n"
+        : /* no outputs */
+        : /* no inputs */
+        : "memory"
+    );
+}
+
 #endif // CACHE_H
diff --git a/tests/bit_fade.c b/tests/bit_fade.c
index d9feb1c..23e3288 100644
--- a/tests/bit_fade.c
+++ b/tests/bit_fade.c
@@ -66,6 +66,8 @@ static int pattern_fill(int my_vcpu, testword_t pattern)
         } while (!at_end && ++pe); // advance pe to next start point
     }
 
+    flush_caches(my_vcpu);
+
     return ticks;
 }
 
diff --git a/tests/block_move.c b/tests/block_move.c
index 603dd17..d3ff2bb 100644
--- a/tests/block_move.c
+++ b/tests/block_move.c
@@ -82,7 +82,7 @@ int test_block_move(int my_vcpu, int iterations)
             BAILOUT;
         } while (!at_end && ++pe); // advance pe to next start point
     }
-    barrier_wait(run_barrier);
+    flush_caches(my_vcpu);
 
     // Now move the data around. First move the data up half of the segment size 
     // we are testing. Then move the data to the original location + 32 bytes.
@@ -193,7 +193,8 @@ int test_block_move(int my_vcpu, int iterations)
             }
         } while (!at_end && ++pe); // advance pe to next start point
     }
-    barrier_wait(run_barrier);
+
+    flush_caches(my_vcpu);
 
     // Now check the data. The error checking is rather crude.  We just check that the
     // adjacent words are the same.
diff --git a/tests/modulo_n.c b/tests/modulo_n.c
index e58c173..4d4c153 100644
--- a/tests/modulo_n.c
+++ b/tests/modulo_n.c
@@ -105,6 +105,8 @@ int test_modulo_n(int my_vcpu, int iterations, testword_t pattern1, testword_t p
         }
     }
 
+    flush_caches(my_vcpu);
+
     // Now check every nth location.
     for (int i = 0; i < vm_map_size; i++) {
         testword_t *start, *end;
diff --git a/tests/mov_inv_fixed.c b/tests/mov_inv_fixed.c
index a1ced74..84d7ae5 100644
--- a/tests/mov_inv_fixed.c
+++ b/tests/mov_inv_fixed.c
@@ -94,6 +94,8 @@ int test_mov_inv_fixed(int my_vcpu, int iterations, testword_t pattern1, testwor
     // Check for the current pattern and then write the alternate pattern for
     // each memory location. Test from the bottom up and then from the top down.
     for (int i = 0; i < iterations; i++) {
+        flush_caches(my_vcpu);
+
         for (int j = 0; j < vm_map_size; j++) {
             testword_t *start, *end;
             calculate_chunk(&start, &end, my_vcpu, j, sizeof(testword_t));
@@ -127,6 +129,8 @@ int test_mov_inv_fixed(int my_vcpu, int iterations, testword_t pattern1, testwor
             } while (!at_end && ++pe); // advance pe to next start point
         }
 
+        flush_caches(my_vcpu);
+
         for (int j = vm_map_size - 1; j >= 0; j--) {
             testword_t *start, *end;
             calculate_chunk(&start, &end, my_vcpu, j, sizeof(testword_t));
diff --git a/tests/mov_inv_random.c b/tests/mov_inv_random.c
index 4ecbb92..601a5c8 100644
--- a/tests/mov_inv_random.c
+++ b/tests/mov_inv_random.c
@@ -80,6 +80,8 @@ int test_mov_inv_random(int my_vcpu)
     // memory location. Repeat.
     testword_t invert = 0;
     for (int i = 0; i < 2; i++) {
+        flush_caches(my_vcpu);
+
         random_seed(my_vcpu, seed);
         for (int j = 0; j < vm_map_size; j++) {
             testword_t *start, *end;
diff --git a/tests/mov_inv_walk1.c b/tests/mov_inv_walk1.c
index ef68539..66241b2 100644
--- a/tests/mov_inv_walk1.c
+++ b/tests/mov_inv_walk1.c
@@ -73,6 +73,8 @@ int test_mov_inv_walk1(int my_vcpu, int iterations, int offset, bool inverse)
     for (int i = 0; i < iterations; i++) {
         pattern = (testword_t)1 << offset;
 
+        flush_caches(my_vcpu);
+
         for (int j = 0; j < vm_map_size; j++) {
             testword_t *start, *end;
             calculate_chunk(&start, &end, my_vcpu, j, sizeof(testword_t));
@@ -108,6 +110,8 @@ int test_mov_inv_walk1(int my_vcpu, int iterations, int offset, bool inverse)
             } while (!at_end && ++pe); // advance pe to next start point
         }
 
+        flush_caches(my_vcpu);
+
         for (int j = vm_map_size - 1; j >= 0; j--) {
             testword_t *start, *end;
             calculate_chunk(&start, &end, my_vcpu, j, sizeof(testword_t));
diff --git a/tests/own_addr.c b/tests/own_addr.c
index 5f56f15..e03af16 100644
--- a/tests/own_addr.c
+++ b/tests/own_addr.c
@@ -65,6 +65,8 @@ static int pattern_fill(int my_vcpu, testword_t offset)
         } while (!at_end && ++pe); // advance pe to next start point
     }
 
+    flush_caches(my_vcpu);
+
     return ticks;
 }
 
diff --git a/tests/test_helper.c b/tests/test_helper.c
index ad2bebb..63303bd 100644
--- a/tests/test_helper.c
+++ b/tests/test_helper.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2020 Martin Whitaker.
+// Copyright (C) 2020-2021 Martin Whitaker.
 //
 // Partly derived from an extract of memtest86+ test.c:
 //
@@ -15,6 +15,10 @@
 
 #include <stdint.h>
 
+#include "cache.h"
+
+#include "barrier.h"
+
 #include "config.h"
 #include "display.h"
 
@@ -108,3 +112,14 @@ void calculate_chunk(testword_t **start, testword_t **end, int my_vcpu, int segm
         }
     }
 }
+
+void flush_caches(int my_vcpu)
+{
+    if (my_vcpu >= 0) {
+        barrier_wait(run_barrier);
+        if (my_vcpu == master_vcpu) {
+            cache_flush();
+        }
+        barrier_wait(run_barrier);
+    }
+}
diff --git a/tests/test_helper.h b/tests/test_helper.h
index 195e7ca..6f12958 100644
--- a/tests/test_helper.h
+++ b/tests/test_helper.h
@@ -78,4 +78,10 @@ testword_t random(int my_vcpu);
  */
 void calculate_chunk(testword_t **start, testword_t **end, int my_vcpu, int segment, size_t chunk_align);
 
+/*
+ * Flushes the CPU caches. If SMP is enabled, synchronises the threads before
+ * and after issuing the cache flush instruction.
+ */
+void flush_caches(int my_vcpu);
+
 #endif // TEST_HELPER_H