From 41b10016d489125a15b689dfd851034b5de6c61f Mon Sep 17 00:00:00 2001
From: Jonathan Shook <jshook@gmail.com>
Date: Wed, 24 Feb 2021 18:23:32 -0600
Subject: [PATCH] add starting point for incremental test

---
 .../activities/baselines/hotcold.yaml         | 30 ----------
 .../activities/baselines/incremental.yaml     | 59 +++++++++++--------
 2 files changed, 33 insertions(+), 56 deletions(-)
 delete mode 100644 driver-cql-shaded/src/main/resources/activities/baselines/hotcold.yaml

diff --git a/driver-cql-shaded/src/main/resources/activities/baselines/hotcold.yaml b/driver-cql-shaded/src/main/resources/activities/baselines/hotcold.yaml
deleted file mode 100644
index 66e4a92cf..000000000
--- a/driver-cql-shaded/src/main/resources/activities/baselines/hotcold.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-description: |
-  A set of named scenarios for testing cache performance.
-  This is a set of named scenarios packaged as a workload file that can
-  be used to test datasets of different sizes. This workload file
-  contains no specific workloads of its own. It is expected to be used
-  with existing workload definitions. By default, it will use
-  the cql-tabular2 workload.
-  Going forward, some conventions will be established about what parameters
-  mean for dataset sizing. For now, the names used in cql-tabular2 will be
-  the standard. For now, this means partsize and partcount.
-  partsize is used to modulo the selected partition for a row write
-  partcount is used to modulo a pseudo-random row selector to fall within the
-  known dataset size for extant data. These must be calculated together
-  to ensure that reads address valid data by default. Some partially empty
-  read ratio can be configured by adjusting these parameters with respect to
-  known dataset sizes.
-  The scenario names are suggestive of the dataset size with basic exponents.
-  For example 1e5 means 100000.
-  Defaults:
-    rows (dataset size basis)
-    partsize: rows/100
-    partcount: rows/100
-
-
-scenarios:
-  hotcold1e5:
-    schema_1e5: TEMPLATE(workload,cql-tabular2) schema
-    rampup_1e5: TEMPLATE(workload,cql-tabular2) rampup rampup-cycles=TEMPLATE(rows,1e5) partsize=TEMPLATE(partsize,1e3)
-    main_1e5: TEMPLATE(workload,cql-tabular2) main main-cycles=TEMPLATE(rows,1e5) partcount=TEMPLATE(partcount,1e3)
-
diff --git a/driver-cql-shaded/src/main/resources/activities/baselines/incremental.yaml b/driver-cql-shaded/src/main/resources/activities/baselines/incremental.yaml
index 1b7b4c9ed..3d25004b1 100644
--- a/driver-cql-shaded/src/main/resources/activities/baselines/incremental.yaml
+++ b/driver-cql-shaded/src/main/resources/activities/baselines/incremental.yaml
@@ -3,30 +3,49 @@ description:
   Rows will be added incrementally in both rampup and main phases. However, during
   the main phase, reads will also occur at the same rate, with the read patterns
   selecting from the size of data written up to that point.
+  In order to ensure that the reads and writes operate against the same set of
+  identifiers, it is crucial that the ratios are not adjusted unless the binding
+  recipes are adjusted to match. With write:read ratio of 1:1 and a prefix function
+  Div(2L) at the front of the main phase bindings, the writes and reads will address
+  the same rows rather than playing leap-frog on the cycle values.
+  The main phase can be run without the rampup phase for this workload, as long
+  as your test is defined as an incremental write and read test. If you need
+  background data pre-loaded to ensure realistic read times against pre-indexed
+  data, then you may use the rampup phase before the main phase. However, be aware
+  that these are simply different test definitions, and are both valid in different ways.
+  Due to how this workload is meant to be used, you must specify main-cycles= when
+  invoking the main phase.
+  The cycles value for the main test includes operations for both writes and reads,
+  thus the logical number of rows in the dataset will be effectively half of that.
+  This workload is intended to be run with a sufficiently high number of cycles.
+  Two key details should be obvious in the read latency metrics -- 1) the relationship
+  between dataset size, request rate, and response times and 2) inflection points
+  between any hot and cold access modes for LRU or other caching mechanisms as
+  the primary cache layer is saturated.
 
 scenarios:
   default:
     schema: run tags=phase:schema threads==1
-    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
-    main: run tags=phase:main cycles===TEMPLATE(main-cycles,100000) threads=auto
+    #    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
+    main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
   default-schema: run tags=phase:schema threads==1
-  default-rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
-  default-main: run tags=phase:main cycles===TEMPLATE(main-cycles,100000) threads=auto
+  #  default-rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
+  default-main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
   astra:
     schema: run tags=phase:astra-schema threads==1
-    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
-    main: run tags=phase:main cycles===TEMPLATE(main-cycles,100000) threads=auto
+    #    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,0) threads=auto
+    main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
 
 params:
   instrument: true
 
 bindings:
   seq_key: ToString()
-  seq_value: Hash(); ToString();
-  read_key: HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
-  read_value: HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
-  write_key: Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
-  write_value: Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
+  rampup_value: Hash(); ToString();
+  read_key: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
+  read_value: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
+  write_key: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
+  write_value: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
 
 
 blocks:
@@ -70,21 +89,9 @@ blocks:
       - rampup-insert: |
           insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental)
           (key, value)
-          values ({seq_key},{seq_value});
+          values ({rampup_key},{rampup_value});
         tags:
           name: rampup-insert
-  - name: verify
-    tags:
-      phase: verify
-      type: read
-    params:
-      cl: TEMPLATE(read_cl,LOCAL_QUORUM)
-    statements:
-      - verify-select: |
-          select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={seq_key};
-        verify-fields: key->seq_key, value->seq_value
-        tags:
-          name: verify
   - name: main-read
     tags:
       phase: main
@@ -94,7 +101,7 @@ blocks:
       cl: TEMPLATE(read_cl,LOCAL_QUORUM)
     statements:
       - main-select: |
-          select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={rw_key};
+          select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={read_key};
         tags:
           name: main-select
   - name: main-write
@@ -107,7 +114,7 @@ blocks:
     statements:
       - main-insert: |
           insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental)
-          (key, value) values ({rw_key}, {rw_value});
+          (key, value) values ({write_key}, {write_value});
         tags:
           name: main-insert