From c6a0d53d30cb4a19a4bbdb644dfc91fbbbc4ea4e Mon Sep 17 00:00:00 2001
From: Jonathan Shook <jshook@gmail.com>
Date: Wed, 14 Apr 2021 11:29:48 -0500
Subject: [PATCH] stabilized version conventions for workloads with baselines2

---
 .../activities/baselines/cql-iot.yaml         |   3 +-
 .../activities/baselinesv2/cql-keyvalue2.yaml | 109 ++++++++++++++
 .../baselinesv2/cql-timeseries2.yaml          | 138 ++++++++++++++++++
 3 files changed, 249 insertions(+), 1 deletion(-)
 create mode 100644 driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml
 create mode 100644 driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml

diff --git a/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml b/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml
index dab647836..9c22e329d 100644
--- a/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml
+++ b/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml
@@ -1,6 +1,7 @@
 # nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
 description: |
-  This workload emulates a time-series data model and access patterns.
+  This workload emulates a time-series data model and access patterns. This is the same a cql-timeseries,
+  which is the preferred name as it is more canonical. This workload is retained for historic reasons.
 
 scenarios:
   default:
diff --git a/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml
new file mode 100644
index 000000000..9c5f243cb
--- /dev/null
+++ b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml
@@ -0,0 +1,109 @@
+description: |
+  A workload with only text keys and text values.
+  The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring
+  system capacity most directly in terms of raw operations. As a reference point, it provides some insight around types of
+  workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput.
+  During preload, all keys are set with a value. During the main phase of the workload, random keys from the known
+  population are replaced with new values which never repeat. During the main phase, random partitions are selected for
+  upsert, with row values never repeating.
+
+scenarios:
+  default:
+    schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
+    rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
+    main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
+  astra:
+    schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
+    rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
+    main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
+
+bindings:
+  seq_key: Mod(<<keycount:1000000000>>); ToString() -> String
+  seq_value: Hash(); Mod(<<valuecount:1000000000>>); ToString() -> String
+  rw_key: <<keydist:Uniform(0,1000000000)->int>>; ToString() -> String
+  rw_value: Hash(); <<valdist:Uniform(0,1000000000)->int>>; ToString() -> String
+
+blocks:
+  - name: schema
+    tags:
+      phase: schema
+    params:
+      prepared: false
+    statements:
+    - create-keyspace: |
+        create keyspace if not exists <<keyspace:baselines>>
+        WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
+        AND durable_writes = true;
+      tags:
+         name: create-keyspace
+    - create-table: |
+        create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
+        key text,
+         value text,
+         PRIMARY KEY (key)
+        );
+      tags:
+        name: create-table
+  - name: schema-astra
+    tags:
+      phase: schema-astra
+    params:
+      prepared: false
+    statements:
+    - create-table: |
+        create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
+        key text,
+         value text,
+         PRIMARY KEY (key)
+        );
+      tags:
+        name: create-table-astra
+  - name: rampup
+    tags:
+      phase: rampup
+    params:
+      cl: <<write_cl:LOCAL_QUORUM>>
+    statements:
+      - rampup-insert: |
+          insert into <<keyspace:baselines>>.<<table:keyvalue>>
+          (key, value)
+          values ({seq_key},{seq_value});
+        tags:
+           name: rampup-insert
+  - name: verify
+    tags:
+      phase: verify
+      type: read
+    params:
+      cl: <<read_cl:LOCAL_QUORUM>>
+    statements:
+      - verify-select: |
+          select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={seq_key};
+        verify-fields: key->seq_key, value->seq_value
+        tags:
+           name: verify
+  - name: main-read
+    tags:
+      phase: main
+      type: read
+    params:
+      ratio: 5
+      cl: <<read_cl:LOCAL_QUORUM>>
+    statements:
+      - main-select: |
+         select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={rw_key};
+        tags:
+          name: main-select
+  - name: main-write
+    tags:
+      phase: main
+      type: write
+    params:
+      ratio: 5
+      cl: <<write_cl:LOCAL_QUORUM>>
+    statements:
+      - main-insert: |
+          insert into <<keyspace:baselines>>.<<table:keyvalue>>
+          (key, value) values ({rw_key}, {rw_value});
+        tags:
+           name: main-insert
diff --git a/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml
new file mode 100644
index 000000000..4e0aa6a9b
--- /dev/null
+++ b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml
@@ -0,0 +1,138 @@
+# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
+description: |
+  This workload emulates a time-series data model and access patterns.
+
+scenarios:
+  default:
+    schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
+    rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
+    main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
+  astra:
+    schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
+    rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
+    main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
+params:
+  instrument: TEMPLATE(instrument,false)
+bindings:
+  machine_id: Mod(<<sources:10000>>); ToHashedUUID() -> java.util.UUID
+  sensor_name: HashedLineToString('data/variable_words.txt')
+  time: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); ToDate()
+  cell_timestamp: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); Mul(1000L)
+  sensor_value: Normal(0.0,5.0); Add(100.0) -> double
+  station_id: Div(<<sources:10000>>);Mod(<<stations:100>>); ToHashedUUID() -> java.util.UUID
+  data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200)
+blocks:
+  - tags:
+      phase: schema
+    params:
+      prepared: false
+    statements:
+      - create-keyspace: |
+          create keyspace if not exists <<keyspace:baselines>>
+          WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
+          AND durable_writes = true;
+        tags:
+          name: create-keyspace
+      - create-table : |
+          create table if not exists <<keyspace:baselines>>.<<table:iot>> (
+          machine_id UUID,     // source machine
+          sensor_name text,    // sensor name
+          time timestamp,      // timestamp of collection
+          sensor_value double, //
+          station_id UUID,     // source location
+          data text,
+          PRIMARY KEY ((machine_id, sensor_name), time)
+          ) WITH CLUSTERING ORDER BY (time DESC)
+           AND compression = { 'sstable_compression' : '<<compression:LZ4Compressor>>' }
+           AND compaction = {
+           'class': 'TimeWindowCompactionStrategy',
+           'compaction_window_size': <<expiry_minutes:60>>,
+           'compaction_window_unit': 'MINUTES'
+          };
+        tags:
+          name: create-table
+      - truncate-table: |
+          truncate table <<keyspace:baselines>>.<<table:iot>>;
+        tags:
+          name: truncate-table
+  - tags:
+      phase: schema-astra
+    params:
+      prepared: false
+    statements:
+      - create-table-astra : |
+          create table if not exists <<keyspace:baselines>>.<<table:iot>> (
+          machine_id UUID,     // source machine
+          sensor_name text,    // sensor name
+          time timestamp,      // timestamp of collection
+          sensor_value double, //
+          station_id UUID,     // source location
+          data text,
+          PRIMARY KEY ((machine_id, sensor_name), time)
+          ) WITH CLUSTERING ORDER BY (time DESC);
+        tags:
+          name: create-table-astra
+  - tags:
+      phase: rampup
+    params:
+      cl: <<write_cl:LOCAL_QUORUM>>
+    statements:
+      - insert-rampup: |
+          insert into  <<keyspace:baselines>>.<<table:iot>>
+          (machine_id, sensor_name, time, sensor_value, station_id, data)
+          values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
+          using timestamp {cell_timestamp}
+        idempotent: true
+        tags:
+          name: insert-rampup
+        params:
+          instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
+  - tags:
+      phase: verify
+      type: read
+    params:
+      ratio: 1
+      cl: <<read_cl:LOCAL_QUORUM>>
+    statements:
+      - select-verify: |
+          select * from <<keyspace:baselines>>.<<table:iot>>
+          where machine_id={machine_id} and sensor_name={sensor_name} and time={time};
+        verify-fields: "*, -cell_timestamp"
+        tags:
+          name: select-verify
+        params:
+          instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
+  - tags:
+      phase: main
+      type: read
+    params:
+      ratio: <<read_ratio:1>>
+      cl: <<read_cl:LOCAL_QUORUM>>
+    statements:
+      - select-read: |
+          select * from <<keyspace:baselines>>.<<table:iot>>
+          where machine_id={machine_id} and sensor_name={sensor_name}
+          limit <<limit:10>>
+        tags:
+          name: select-read
+        params:
+          instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
+
+  - tags:
+      phase: main
+      type: write
+    params:
+      ratio: <<write_ratio:9>>
+      cl: <<write_cl:LOCAL_QUORUM>>
+    statements:
+      - insert-main: |
+          insert into <<keyspace:baselines>>.<<table:iot>>
+          (machine_id, sensor_name, time, sensor_value, station_id, data)
+          values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
+          using timestamp {cell_timestamp}
+        idempotent: true
+        tags:
+          name: insert-main
+        params:
+          instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
+