add starting point for incremental test

2025-02-25 18:55:28 -06:00 · 2021-02-24 18:23:32 -06:00
parent 1607eaf3c1
commit 41b10016d4
2 changed files with 33 additions and 56 deletions
--- a/driver-cql-shaded/src/main/resources/activities/baselines/hotcold.yaml
+++ b/driver-cql-shaded/src/main/resources/activities/baselines/hotcold.yaml
@@ -1,30 +0,0 @@
-description: |
-  A set of named scenarios for testing cache performance.
-  This is a set of named scenarios packaged as a workload file that can
-  be used to test datasets of different sizes. This workload file
-  contains no specific workloads of its own. It is expected to be used
-  with existing workload definitions. By default, it will use
-  the cql-tabular2 workload.
-  Going forward, some conventions will be established about what parameters
-  mean for dataset sizing. For now, the names used in cql-tabular2 will be
-  the standard. For now, this means partsize and partcount.
-  partsize is used to modulo the selected partition for a row write
-  partcount is used to modulo a pseudo-random row selector to fall within the
-  known dataset size for extant data. These must be calculated together
-  to ensure that reads address valid data by default. Some partially empty
-  read ratio can be configured by adjusting these parameters with respect to
-  known dataset sizes.
-  The scenario names are suggestive of the dataset size with basic exponents.
-  For example 1e5 means 100000.
-  Defaults:
-    rows (dataset size basis)
-    partsize: rows/100
-    partcount: rows/100
-
-
-scenarios:
-  hotcold1e5:
-    schema_1e5: TEMPLATE(workload,cql-tabular2) schema
-    rampup_1e5: TEMPLATE(workload,cql-tabular2) rampup rampup-cycles=TEMPLATE(rows,1e5) partsize=TEMPLATE(partsize,1e3)
-    main_1e5: TEMPLATE(workload,cql-tabular2) main main-cycles=TEMPLATE(rows,1e5) partcount=TEMPLATE(partcount,1e3)
-
--- a/driver-cql-shaded/src/main/resources/activities/baselines/incremental.yaml
+++ b/driver-cql-shaded/src/main/resources/activities/baselines/incremental.yaml
@@ -3,30 +3,49 @@ description:
  Rows will be added incrementally in both rampup and main phases. However, during
  the main phase, reads will also occur at the same rate, with the read patterns
  selecting from the size of data written up to that point.
+  In order to ensure that the reads and writes operate against the same set of
+  identifiers, it is crucial that the ratios are not adjusted unless the binding
+  recipes are adjusted to match. With write:read ratio of 1:1 and a prefix function
+  Div(2L) at the front of the main phase bindings, the writes and reads will address
+  the same rows rather than playing leap-frog on the cycle values.
+  The main phase can be run without the rampup phase for this workload, as long
+  as your test is defined as an incremental write and read test. If you need
+  background data pre-loaded to ensure realistic read times against pre-indexed
+  data, then you may use the rampup phase before the main phase. However, be aware
+  that these are simply different test definitions, and are both valid in different ways.
+  Due to how this workload is meant to be used, you must specify main-cycles= when
+  invoking the main phase.
+  The cycles value for the main test includes operations for both writes and reads,
+  thus the logical number of rows in the dataset will be effectively half of that.
+  This workload is intended to be run with a sufficiently high number of cycles.
+  Two key details should be obvious in the read latency metrics -- 1) the relationship
+  between dataset size, request rate, and response times and 2) inflection points
+  between any hot and cold access modes for LRU or other caching mechanisms as
+  the primary cache layer is saturated.

 scenarios:
  default:
    schema: run tags=phase:schema threads==1
-    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
-    main: run tags=phase:main cycles===TEMPLATE(main-cycles,100000) threads=auto
+    #    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
+    main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
  default-schema: run tags=phase:schema threads==1
-  default-rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
-  default-main: run tags=phase:main cycles===TEMPLATE(main-cycles,100000) threads=auto
+  #  default-rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
+  default-main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
  astra:
    schema: run tags=phase:astra-schema threads==1
-    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
-    main: run tags=phase:main cycles===TEMPLATE(main-cycles,100000) threads=auto
+    #    rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,0) threads=auto
+    main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto

 params:
  instrument: true

 bindings:
  seq_key: ToString()
-  seq_value: Hash(); ToString();
-  read_key: HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
-  read_value: HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
-  write_key: Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
-  write_value: Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
+  rampup_value: Hash(); ToString();
+  read_key: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
+  read_value: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
+  write_key: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
+  write_value: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();


 blocks:
@@ -70,21 +89,9 @@ blocks:
      - rampup-insert: |
          insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental)
          (key, value)
-          values ({seq_key},{seq_value});
+          values ({rampup_key},{rampup_value});
        tags:
          name: rampup-insert
-  - name: verify
-    tags:
-      phase: verify
-      type: read
-    params:
-      cl: TEMPLATE(read_cl,LOCAL_QUORUM)
-    statements:
-      - verify-select: |
-          select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={seq_key};
-        verify-fields: key->seq_key, value->seq_value
-        tags:
-          name: verify
  - name: main-read
    tags:
      phase: main
@@ -94,7 +101,7 @@ blocks:
      cl: TEMPLATE(read_cl,LOCAL_QUORUM)
    statements:
      - main-select: |
-          select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={rw_key};
+          select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={read_key};
        tags:
          name: main-select
  - name: main-write
@@ -107,7 +114,7 @@ blocks:
    statements:
      - main-insert: |
          insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental)
-          (key, value) values ({rw_key}, {rw_value});
+          (key, value) values ({write_key}, {write_value});
        tags:
          name: main-insert