stabilized version conventions for workloads with baselines2

This commit is contained in:
Jonathan Shook 2021-04-14 11:29:48 -05:00
parent 4c84c804f3
commit c6a0d53d30
3 changed files with 249 additions and 1 deletions

View File

@ -1,6 +1,7 @@
# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
description: |
This workload emulates a time-series data model and access patterns.
This workload emulates a time-series data model and access patterns. This is the same a cql-timeseries,
which is the preferred name as it is more canonical. This workload is retained for historic reasons.
scenarios:
default:

View File

@ -0,0 +1,109 @@
description: |
A workload with only text keys and text values.
The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring
system capacity most directly in terms of raw operations. As a reference point, it provides some insight around types of
workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput.
During preload, all keys are set with a value. During the main phase of the workload, random keys from the known
population are replaced with new values which never repeat. During the main phase, random partitions are selected for
upsert, with row values never repeating.
scenarios:
default:
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
astra:
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
bindings:
seq_key: Mod(<<keycount:1000000000>>); ToString() -> String
seq_value: Hash(); Mod(<<valuecount:1000000000>>); ToString() -> String
rw_key: <<keydist:Uniform(0,1000000000)->int>>; ToString() -> String
rw_value: Hash(); <<valdist:Uniform(0,1000000000)->int>>; ToString() -> String
blocks:
- name: schema
tags:
phase: schema
params:
prepared: false
statements:
- create-keyspace: |
create keyspace if not exists <<keyspace:baselines>>
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
AND durable_writes = true;
tags:
name: create-keyspace
- create-table: |
create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
key text,
value text,
PRIMARY KEY (key)
);
tags:
name: create-table
- name: schema-astra
tags:
phase: schema-astra
params:
prepared: false
statements:
- create-table: |
create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
key text,
value text,
PRIMARY KEY (key)
);
tags:
name: create-table-astra
- name: rampup
tags:
phase: rampup
params:
cl: <<write_cl:LOCAL_QUORUM>>
statements:
- rampup-insert: |
insert into <<keyspace:baselines>>.<<table:keyvalue>>
(key, value)
values ({seq_key},{seq_value});
tags:
name: rampup-insert
- name: verify
tags:
phase: verify
type: read
params:
cl: <<read_cl:LOCAL_QUORUM>>
statements:
- verify-select: |
select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={seq_key};
verify-fields: key->seq_key, value->seq_value
tags:
name: verify
- name: main-read
tags:
phase: main
type: read
params:
ratio: 5
cl: <<read_cl:LOCAL_QUORUM>>
statements:
- main-select: |
select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={rw_key};
tags:
name: main-select
- name: main-write
tags:
phase: main
type: write
params:
ratio: 5
cl: <<write_cl:LOCAL_QUORUM>>
statements:
- main-insert: |
insert into <<keyspace:baselines>>.<<table:keyvalue>>
(key, value) values ({rw_key}, {rw_value});
tags:
name: main-insert

View File

@ -0,0 +1,138 @@
# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
description: |
This workload emulates a time-series data model and access patterns.
scenarios:
default:
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
astra:
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
params:
instrument: TEMPLATE(instrument,false)
bindings:
machine_id: Mod(<<sources:10000>>); ToHashedUUID() -> java.util.UUID
sensor_name: HashedLineToString('data/variable_words.txt')
time: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); ToDate()
cell_timestamp: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); Mul(1000L)
sensor_value: Normal(0.0,5.0); Add(100.0) -> double
station_id: Div(<<sources:10000>>);Mod(<<stations:100>>); ToHashedUUID() -> java.util.UUID
data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200)
blocks:
- tags:
phase: schema
params:
prepared: false
statements:
- create-keyspace: |
create keyspace if not exists <<keyspace:baselines>>
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
AND durable_writes = true;
tags:
name: create-keyspace
- create-table : |
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
machine_id UUID, // source machine
sensor_name text, // sensor name
time timestamp, // timestamp of collection
sensor_value double, //
station_id UUID, // source location
data text,
PRIMARY KEY ((machine_id, sensor_name), time)
) WITH CLUSTERING ORDER BY (time DESC)
AND compression = { 'sstable_compression' : '<<compression:LZ4Compressor>>' }
AND compaction = {
'class': 'TimeWindowCompactionStrategy',
'compaction_window_size': <<expiry_minutes:60>>,
'compaction_window_unit': 'MINUTES'
};
tags:
name: create-table
- truncate-table: |
truncate table <<keyspace:baselines>>.<<table:iot>>;
tags:
name: truncate-table
- tags:
phase: schema-astra
params:
prepared: false
statements:
- create-table-astra : |
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
machine_id UUID, // source machine
sensor_name text, // sensor name
time timestamp, // timestamp of collection
sensor_value double, //
station_id UUID, // source location
data text,
PRIMARY KEY ((machine_id, sensor_name), time)
) WITH CLUSTERING ORDER BY (time DESC);
tags:
name: create-table-astra
- tags:
phase: rampup
params:
cl: <<write_cl:LOCAL_QUORUM>>
statements:
- insert-rampup: |
insert into <<keyspace:baselines>>.<<table:iot>>
(machine_id, sensor_name, time, sensor_value, station_id, data)
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
using timestamp {cell_timestamp}
idempotent: true
tags:
name: insert-rampup
params:
instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
- tags:
phase: verify
type: read
params:
ratio: 1
cl: <<read_cl:LOCAL_QUORUM>>
statements:
- select-verify: |
select * from <<keyspace:baselines>>.<<table:iot>>
where machine_id={machine_id} and sensor_name={sensor_name} and time={time};
verify-fields: "*, -cell_timestamp"
tags:
name: select-verify
params:
instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
- tags:
phase: main
type: read
params:
ratio: <<read_ratio:1>>
cl: <<read_cl:LOCAL_QUORUM>>
statements:
- select-read: |
select * from <<keyspace:baselines>>.<<table:iot>>
where machine_id={machine_id} and sensor_name={sensor_name}
limit <<limit:10>>
tags:
name: select-read
params:
instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
- tags:
phase: main
type: write
params:
ratio: <<write_ratio:9>>
cl: <<write_cl:LOCAL_QUORUM>>
statements:
- insert-main: |
insert into <<keyspace:baselines>>.<<table:iot>>
(machine_id, sensor_name, time, sensor_value, station_id, data)
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
using timestamp {cell_timestamp}
idempotent: true
tags:
name: insert-main
params:
instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))