From c6a0d53d30cb4a19a4bbdb644dfc91fbbbc4ea4e Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Wed, 14 Apr 2021 11:29:48 -0500 Subject: [PATCH] stabilized version conventions for workloads with baselines2 --- .../activities/baselines/cql-iot.yaml | 3 +- .../activities/baselinesv2/cql-keyvalue2.yaml | 109 ++++++++++++++ .../baselinesv2/cql-timeseries2.yaml | 138 ++++++++++++++++++ 3 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml create mode 100644 driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml diff --git a/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml b/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml index dab647836..9c22e329d 100644 --- a/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml +++ b/driver-cql-shaded/src/main/resources/activities/baselines/cql-iot.yaml @@ -1,6 +1,7 @@ # nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost description: | - This workload emulates a time-series data model and access patterns. + This workload emulates a time-series data model and access patterns. This is the same a cql-timeseries, + which is the preferred name as it is more canonical. This workload is retained for historic reasons. scenarios: default: diff --git a/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml new file mode 100644 index 000000000..9c5f243cb --- /dev/null +++ b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml @@ -0,0 +1,109 @@ +description: | + A workload with only text keys and text values. + The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring + system capacity most directly in terms of raw operations. As a reference point, it provides some insight around types of + workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput. + During preload, all keys are set with a value. During the main phase of the workload, random keys from the known + population are replaced with new values which never repeat. During the main phase, random partitions are selected for + upsert, with row values never repeating. + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + +bindings: + seq_key: Mod(<>); ToString() -> String + seq_value: Hash(); Mod(<>); ToString() -> String + rw_key: <int>>; ToString() -> String + rw_value: Hash(); <int>>; ToString() -> String + +blocks: + - name: schema + tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table: | + create table if not exists <>.<> ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table + - name: schema-astra + tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table: | + create table if not exists <>.<> ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table-astra + - name: rampup + tags: + phase: rampup + params: + cl: <> + statements: + - rampup-insert: | + insert into <>.<> + (key, value) + values ({seq_key},{seq_value}); + tags: + name: rampup-insert + - name: verify + tags: + phase: verify + type: read + params: + cl: <> + statements: + - verify-select: | + select * from <>.<> where key={seq_key}; + verify-fields: key->seq_key, value->seq_value + tags: + name: verify + - name: main-read + tags: + phase: main + type: read + params: + ratio: 5 + cl: <> + statements: + - main-select: | + select * from <>.<> where key={rw_key}; + tags: + name: main-select + - name: main-write + tags: + phase: main + type: write + params: + ratio: 5 + cl: <> + statements: + - main-insert: | + insert into <>.<> + (key, value) values ({rw_key}, {rw_value}); + tags: + name: main-insert diff --git a/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml new file mode 100644 index 000000000..4e0aa6a9b --- /dev/null +++ b/driver-cql-shaded/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml @@ -0,0 +1,138 @@ +# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost +description: | + This workload emulates a time-series data model and access patterns. + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto +params: + instrument: TEMPLATE(instrument,false) +bindings: + machine_id: Mod(<>); ToHashedUUID() -> java.util.UUID + sensor_name: HashedLineToString('data/variable_words.txt') + time: Mul(<>L); Div(<>L); ToDate() + cell_timestamp: Mul(<>L); Div(<>L); Mul(1000L) + sensor_value: Normal(0.0,5.0); Add(100.0) -> double + station_id: Div(<>);Mod(<>); ToHashedUUID() -> java.util.UUID + data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200) +blocks: + - tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC) + AND compression = { 'sstable_compression' : '<>' } + AND compaction = { + 'class': 'TimeWindowCompactionStrategy', + 'compaction_window_size': <>, + 'compaction_window_unit': 'MINUTES' + }; + tags: + name: create-table + - truncate-table: | + truncate table <>.<>; + tags: + name: truncate-table + - tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table-astra : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC); + tags: + name: create-table-astra + - tags: + phase: rampup + params: + cl: <> + statements: + - insert-rampup: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-rampup + params: + instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false)) + - tags: + phase: verify + type: read + params: + ratio: 1 + cl: <> + statements: + - select-verify: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} and time={time}; + verify-fields: "*, -cell_timestamp" + tags: + name: select-verify + params: + instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false)) + - tags: + phase: main + type: read + params: + ratio: <> + cl: <> + statements: + - select-read: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} + limit <> + tags: + name: select-read + params: + instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false)) + + - tags: + phase: main + type: write + params: + ratio: <> + cl: <> + statements: + - insert-main: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-main + params: + instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false)) +