diff --git a/adapter-cqld4/src/main/resources/activities/baselines/README.md b/adapter-cqld4/src/main/resources/activities/baselines/README.md new file mode 100644 index 000000000..4ac9747ec --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/README.md @@ -0,0 +1,12 @@ +# Baselines Version 1 + +In order to avoid changing those tests and possibly impacting +results without warning, the baseline sets are being kept +in separate directories. + +## Status + +This directory is for baselines version 1. The files in this directory +should not be modified arbitrarily. They need to remain stable so that +comparisons to previous results based on these workloads are still valid. + diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-iot-dse.yaml b/adapter-cqld4/src/main/resources/activities/baselines/cql-iot-dse.yaml new file mode 100644 index 000000000..cfb151f99 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-iot-dse.yaml @@ -0,0 +1,107 @@ +# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost +description: An IOT workload with more optimal settings for DSE + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + +bindings: + machine_id: Mod(<>); ToHashedUUID() -> java.util.UUID + sensor_name: HashedLineToString('data/variable_words.txt') + time: Mul(<>L); Div(<>L); ToDate() + cell_timestamp: Mul(<>L); Div(<>L); Mul(1000L) + sensor_value: Normal(0.0,5.0); Add(100.0) -> double + station_id: Div(<>);Mod(<>); ToHashedUUID() -> java.util.UUID + data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200) +blocks: + - tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC) + AND compression = { 'sstable_compression' : '<>' } + AND nodesync={'enabled': 'true'} + AND compaction = { + 'class': 'TimeWindowCompactionStrategy', + 'compaction_window_size': <>, + 'compaction_window_unit': 'MINUTES', + 'split_during_flush': true + }; + tags: + name: create-table + - truncate-table: | + truncate table <>.<>; + tags: + name: truncate-table + - tags: + phase: rampup + params: + cl: <> + statements: + - insert-rampup: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-rampup + - tags: + phase: verify + type: read + params: + ratio: 1 + cl: <> + statements: + - select-verify: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} and time={time}; + verify-fields: "*, -cell_timestamp" + tags: + name: select-verify + - tags: + phase: main + type: read + params: + ratio: <> + cl: <> + statements: + - select-read: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} + limit <> + tags: + name: select-read + - tags: + phase: main + type: write + params: + ratio: <> + cl: <> + statements: + - insert-main: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-main diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-iot.md b/adapter-cqld4/src/main/resources/activities/baselines/cql-iot.md new file mode 100644 index 000000000..a3d6817b4 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-iot.md @@ -0,0 +1,93 @@ +--- +title: CQL IoT +weight: 2 +--- + +# CQL IoT + +## Description + +The CQL IoT workload demonstrates a time-series telemetry system as typically found in IoT applications. The bulk of the +traffic is telemetry ingest. This is useful for establishing steady-state capacity with an actively managed data +lifecycle. This is a steady-state workload, where inserts are 90% of the operations and queries are the remaining 10%. + +## Named Scenarios + +### default + +The default scenario for cql-iot.yaml runs the conventional test phases: schema, rampup, main + +## Testing Considerations + +For in-depth testing, this workload will take some time to build up data density where TTLs begin purging expired data. +At this point, the test should be considered steady-state. + +## Data Set + +### baselines.iot dataset (rampup,main) + +- machine_id - 1000 unique values +- sensor_name - 100 symbolic names, from a seed file +- time - monotonically increasing timestamp +- station_id - 100 unique values +- sensor_value - normal distribution, median 100, stddev 5.0 + +## Operations + +### insert (rampup, main) + + insert into baselines.iot + (machine_id, sensor_name, time, sensor_value, station_id) + values (?,?,?,?,?) + +### query (main) + + select * from baselines.iot + where machine_id=? and sensor_name=? + limit 10 + +## Workload Parameters + +This workload has no adjustable parameters when used in the baseline tests. + +When used for additional testing, the following parameters should be supported: + +- machines - the number of unique sources (default: 1000) +- stations - the number of unique stations (default: 100) +- limit - the limit for rows in reads (default: 10) +- expiry_minutes - the TTL for data in minutes. +- compression - enabled or disabled, to disable, set compression='' +- write_cl - the consistency level for writes (default: LOCAL_QUORUM) +- read_cl - the consistency level for reads (defaultL LOCAL_QUORUM) + +## Key Performance Metrics + +Client side metrics are a more accurate measure of the system behavior from a user's perspective. For microbench and +baseline tests, these are the only required metrics. When gathering metrics from multiple server nodes, they should be +kept in aggregate form, for min, max, and average for each time interval in monitoring. For example, the avg p99 latency +for reads should be kept, as well as the min p99 latency for reads. If possible metrics, should be kept in plot form, +with discrete histogram values per interval. + +### Client-Side + +- read ops/s +- write ops/s +- read latency histograms +- write latency histograms +- exception counts + +### Server-Side + +- bytes compacted over time +- pending compactions +- active data on disk +- total data on disk + +## Notes on Interpretation + +- In order for this test to show useful performance contrasts, it has to be ramped to steady-state. +- Ingest of 1G rows yields an on-disk data density of 20.8 GB using default compression settings. + + + + diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-iot.yaml b/adapter-cqld4/src/main/resources/activities/baselines/cql-iot.yaml new file mode 100644 index 000000000..9e2ffe2d5 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-iot.yaml @@ -0,0 +1,140 @@ +# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost +description: | + Time-series data model and access patterns. (use cql-timeseries instead) + This is the same a cql-timeseries, which is the preferred name as it is + more canonical. This workload is retained for historic reasons. + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto +params: + instrument: TEMPLATE(instrument,false) +bindings: + machine_id: Mod(<>); ToHashedUUID() -> java.util.UUID + sensor_name: HashedLineToString('data/variable_words.txt') + time: Mul(<>L); Div(<>L); ToDate() + cell_timestamp: Mul(<>L); Div(<>L); Mul(1000L) + sensor_value: Normal(0.0,5.0); Add(100.0) -> double + station_id: Div(<>);Mod(<>); ToHashedUUID() -> java.util.UUID + data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200) +blocks: + - tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC) + AND compression = { 'sstable_compression' : '<>' } + AND compaction = { + 'class': 'TimeWindowCompactionStrategy', + 'compaction_window_size': <>, + 'compaction_window_unit': 'MINUTES' + }; + tags: + name: create-table + - truncate-table: | + truncate table <>.<>; + tags: + name: truncate-table + - tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table-astra : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC); + tags: + name: create-table-astra + - tags: + phase: rampup + params: + cl: <> + statements: + - insert-rampup: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-rampup + params: + instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false)) + - tags: + phase: verify + type: read + params: + ratio: 1 + cl: <> + statements: + - select-verify: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} and time={time}; + verify-fields: "*, -cell_timestamp" + tags: + name: select-verify + params: + instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false)) + - tags: + phase: main + type: read + params: + ratio: <> + cl: <> + statements: + - select-read: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} + limit <> + tags: + name: select-read + params: + instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false)) + + - tags: + phase: main + type: write + params: + ratio: <> + cl: <> + statements: + - insert-main: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-main + params: + instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false)) + diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-keyvalue.md b/adapter-cqld4/src/main/resources/activities/baselines/cql-keyvalue.md new file mode 100644 index 000000000..4afda612a --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-keyvalue.md @@ -0,0 +1,77 @@ +--- +title: CQL Key-Value +weight: 1 +--- + +## Description + +The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring +system capacity most directly in terms of raw operations. As a reference point, provides some insight around types of +workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput. + +During preload, all keys are set with a value. During the main phase of the workload, random keys from the known +population are replaced with new values which never repeat. During the main phase, random partitions are selected for +upsert, with row values never repeating. + +## Operations + +### insert (rampup, main) + + insert into baselines.keyvalue (key, value) values (?,?); + +### read (main) + + select * from baselines.keyvalue where key=?key; + +## Data Set + +### baselines.keyvalue insert (rampup) + +- key - text, number as string, selected sequentially up to keycount +- value - text, number as string, selected sequentially up to valuecount + +### baselines.keyvalue insert (main) + +- key - text, number as string, selected uniformly within keycount +- value - text, number as string, selected uniformly within valuecount + +### baselines.keyvalue read (main) + +- key - text, number as string, selected uniformly within keycount + +## Workload Parameters + +This workload has no adjustable parameters when used in the baseline tests. + +When used for additional testing, the following parameters should be supported: + +- keycount - the number of unique keys +- valuecount - the number of unique values + +## Key Performance Metrics + +Client side metrics are a more accurate measure of the system behavior from a user's perspective. For microbench and +baseline tests, these are the only required metrics. When gathering metrics from multiple server nodes, they should be +kept in aggregate form, for min, max, and average for each time interval in monitoring. For example, the avg p99 latency +for reads should be kept, as well as the min p99 latency for reads. If possible metrics, should be kept in plot form, +with discrete histogram values per interval. + +### Client-Side + +- read ops/s +- write ops/s +- read latency histograms +- write latency histograms +- exception counts + +### Server-Side + +- pending compactions +- bytes compacted +- active data on disk +- total data on disk + +# Notes on Interpretation + +Once the average ratio of overwrites starts to balance with the rate of compaction, a steady state should be achieved. +At this point, pending compactions and bytes compacted should be mostly flat over time. diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-keyvalue.yaml b/adapter-cqld4/src/main/resources/activities/baselines/cql-keyvalue.yaml new file mode 100644 index 000000000..d93a68708 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-keyvalue.yaml @@ -0,0 +1,102 @@ +description: A workload with only text keys and text values + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + +bindings: + seq_key: Mod(<>); ToString() -> String + seq_value: Hash(); Mod(<>); ToString() -> String + rw_key: <int>>; ToString() -> String + rw_value: Hash(); <int>>; ToString() -> String + +blocks: + - name: schema + tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table: | + create table if not exists <>.<> ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table + - name: schema-astra + tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table: | + create table if not exists <>.<> ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table-astra + - name: rampup + tags: + phase: rampup + params: + cl: <> + statements: + - rampup-insert: | + insert into <>.<> + (key, value) + values ({seq_key},{seq_value}); + tags: + name: rampup-insert + - name: verify + tags: + phase: verify + type: read + params: + cl: <> + statements: + - verify-select: | + select * from <>.<> where key={seq_key}; + verify-fields: key->seq_key, value->seq_value + tags: + name: verify + - name: main-read + tags: + phase: main + type: read + params: + ratio: 5 + cl: <> + statements: + - main-select: | + select * from <>.<> where key={rw_key}; + tags: + name: main-select + - name: main-write + tags: + phase: main + type: write + params: + ratio: 5 + cl: <> + statements: + - main-insert: | + insert into <>.<> + (key, value) values ({rw_key}, {rw_value}); + tags: + name: main-insert diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-tabular.yaml b/adapter-cqld4/src/main/resources/activities/baselines/cql-tabular.yaml new file mode 100644 index 000000000..0476cbc8f --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-tabular.yaml @@ -0,0 +1,112 @@ +description: A tabular workload with partitions, clusters, and data fields + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + +bindings: + # for ramp-up and verify + part_layout: Div(<>); ToString() -> String + clust_layout: Mod(<>); ToString() -> String + data: HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150) + # for read + limit: Uniform(1,10) -> int + part_read: Uniform(0,<>)->int; ToString() -> String + clust_read: Add(1); Uniform(0,<>)->int; ToString() -> String + # for write + part_write: Hash(); Uniform(0,<>)->int; ToString() -> String + clust_write: Hash(); Add(1); Uniform(0,<>)->int; ToString() -> String + data_write: Hash(); HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150) -> String + +blocks: + - name: schema + tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table: | + create table if not exists <>.<> ( + part text, + clust text, + data text, + PRIMARY KEY (part,clust) + ); + tags: + name: create-table + - name: schema-astra + tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table: | + create table if not exists <>.<> ( + part text, + clust text, + data text, + PRIMARY KEY (part,clust) + ); + tags: + name: create-table-astra + - name: rampup + tags: + phase: rampup + params: + cl: <> + statements: + - rampup-insert: | + insert into <>.<> + (part,clust,data) + values ({part_layout},{clust_layout},{data}) + tags: + name: rampup-insert + - name: verify + tags: + phase: verify + type: read + params: + cl: <> + statements: + - verify-select: | + select * from <>.<> where part={part_layout} and clust={clust_layout} + tags: + name: verify-select + - name: main-read + tags: + phase: main + type: read + params: + ratio: 5 + cl: <> + statements: + - main-select: | + select * from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select + - name: main-write + tags: + phase: main + type: write + params: + ratio: 5 + cl: <> + statements: + - main-write: | + insert into <>.<> + (part, clust, data) + values ({part_write},{clust_write},{data_write}) + tags: + name: main-write diff --git a/adapter-cqld4/src/main/resources/activities/baselines/cql-widerows.md b/adapter-cqld4/src/main/resources/activities/baselines/cql-widerows.md new file mode 100644 index 000000000..bb40a9f25 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/cql-widerows.md @@ -0,0 +1,86 @@ +--- +title: CQL Wide Rows +weight: 3 +--- + +## Description + +The CQL Wide Rows workload provides a way to tax a system with wide rows of a given size. This is useful to help +understand underlying performance differences between version and configuration options when using data models that have +wide rows. + +For in-depth testing, this workload needs significant density of partitions in combination with fully populated wide +rows. For exploratory or parameter contrasting tests, ensure that the rampup phase is configured correctly to establish +this initial state. + +## Data Set + +### baselines.widerows dataset (rampup) + +- part - text, number in string form, sequentially from 1..1E9 +- clust - text, number in string form, sequentially from 1..1E9 +- data - text, extract from lorem ipsum between 50 and 150 characters + +### baselines.widerows dataset (main) + +- part - text, number in string form, sequentially from 1..1E9 +- clust - text, number in string form, sequentially from 1.. +- data - text, extract from lorem ipsum between 50 and 150 characters + +- machine_id - 1000 unique values +- sensor_name - 100 symbolic names, from a seed file +- time - monotonically increasing timestamp +- station_id - 100 unique values +- sensor_value - normal distribution, median 100, stddev 5.0 + +## Operations + +### insert (rampup, main) + + insert into baselines.iot + (machine_id, sensor_name, time, sensor_value, station_id) + values (?,?,?,?,?) + +### query (main) + + select * from baselines.iot + where machine_id=? and sensor_name=? + limit 10 + +## Workload Parameters + +This workload has no adjustable parameters when used in the baseline tests. + +When used for additional testing, the following parameters should be supported: + +- partcount - the number of unique partitions +- partsize - the number of logical rows within a CQL partition + +## Key Performance Metrics + +Client side metrics are a more accurate measure of the system behavior from a user's perspective. For microbench and +baseline tests, these are the only required metrics. When gathering metrics from multiple server nodes, they should be +kept in aggregate form, for min, max, and average for each time interval in monitoring. For example, the avg p99 latency +for reads should be kept, as well as the min p99 latency for reads. If possible metrics, should be kept in plot form, +with discrete histogram values per interval. + +### Client-Side + +- read ops/s +- write ops/s +- read latency histograms +- write latency histograms +- exception counts + +### Server-Side + +- bytes compacted over time +- pending compactions +- active data on disk +- total data on disk + +## Notes on Interpretation + + + + diff --git a/adapter-cqld4/src/main/resources/activities/baselines/incremental.yaml b/adapter-cqld4/src/main/resources/activities/baselines/incremental.yaml new file mode 100644 index 000000000..b8eb07264 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselines/incremental.yaml @@ -0,0 +1,121 @@ +description: | + This is a workload which creates an incrementally growing dataset over cycles. + + Rows will be added incrementally in both rampup and main phases. However, during + the main phase, reads will also occur at the same rate, with the read patterns + selecting from the size of data written up to that point. + In order to ensure that the reads and writes operate against the same set of + identifiers, it is crucial that the ratios are not adjusted unless the binding + recipes are adjusted to match. With write:read ratio of 1:1 and a prefix function + Div(2L) at the front of the main phase bindings, the writes and reads will address + the same rows rather than playing leap-frog on the cycle values. + The main phase can be run without the rampup phase for this workload, as long + as your test is defined as an incremental write and read test. If you need + background data pre-loaded to ensure realistic read times against pre-indexed + data, then you may use the rampup phase before the main phase. However, be aware + that these are simply different test definitions, and are both valid in different ways. + Due to how this workload is meant to be used, you must specify main-cycles= when + invoking the main phase. + The cycles value for the main test includes operations for both writes and reads, + thus the logical number of rows in the dataset will be effectively half of that. + This workload is intended to be run with a sufficiently high number of cycles. + Two key details should be obvious in the read latency metrics -- 1) the relationship + between dataset size, request rate, and response times and 2) inflection points + between any hot and cold access modes for LRU or other caching mechanisms as + the primary cache layer is saturated. + +scenarios: + default: + schema: run tags=phase:schema threads==1 + # rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto + main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto + default-schema: run tags=phase:schema threads==1 + # default-rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto + default-main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto + astra: + schema: run tags=phase:astra-schema threads==1 + # rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,0) threads=auto + main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto + +params: + instrument: true + +bindings: + seq_key: ToString() + rampup_value: Hash(); ToString(); + read_key: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString(); + read_value: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString(); + write_key: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString(); + write_value: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString(); + + +blocks: + - name: schema + tags: + phase: schema + statements: + - create-keyspace: | + create keyspace if not exists TEMPLATE(keyspace,baselines) + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 'TEMPLATE(rf,1)'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table: | + create table if not exists TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) ( + key text, + value text, + PRIMARY KEY (key) + ); + - name: schema-astra + tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table: | + create table if not exists TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table-astra + + - name: rampup + tags: + phase: rampup + params: + cl: TEMPLATE(write_cl,LOCAL_QUORUM) + statements: + - rampup-insert: | + insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) + (key, value) + values ({rampup_key},{rampup_value}); + tags: + name: rampup-insert + - name: main-read + tags: + phase: main + type: read + params: + ratio: 1 + cl: TEMPLATE(read_cl,LOCAL_QUORUM) + statements: + - main-select: | + select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={read_key}; + tags: + name: main-select + - name: main-write + tags: + phase: main + type: write + params: + ratio: 1 + cl: TEMPLATE(write_cl,LOCAL_QUORUM) + statements: + - main-insert: | + insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) + (key, value) values ({write_key}, {write_value}); + tags: + name: main-insert + diff --git a/adapter-cqld4/src/main/resources/activities/baselinesv2/README.md b/adapter-cqld4/src/main/resources/activities/baselinesv2/README.md new file mode 100644 index 000000000..b0279cc09 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselinesv2/README.md @@ -0,0 +1,18 @@ +# Baselines Version 2 + +In order to avoid changing those tests and possibly impacting +results without warning, the baseline sets are being kept +in separate directories. + +## Status + +This directory is for baselines version 2. These files are the current +in-development set of baselines, and may change in minor ways, or have +additional workloads added, for example. If you are performing baselines +over a period of time and need the workloads to be perfectly stable, +it is best to copy these to your test assets under a distinct name and +call them from there. + +To further disambiguate the workloads, each one has a version '2' +appended to the filename. + diff --git a/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml new file mode 100644 index 000000000..9c5f243cb --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-keyvalue2.yaml @@ -0,0 +1,109 @@ +description: | + A workload with only text keys and text values. + The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring + system capacity most directly in terms of raw operations. As a reference point, it provides some insight around types of + workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput. + During preload, all keys are set with a value. During the main phase of the workload, random keys from the known + population are replaced with new values which never repeat. During the main phase, random partitions are selected for + upsert, with row values never repeating. + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + +bindings: + seq_key: Mod(<>); ToString() -> String + seq_value: Hash(); Mod(<>); ToString() -> String + rw_key: <int>>; ToString() -> String + rw_value: Hash(); <int>>; ToString() -> String + +blocks: + - name: schema + tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table: | + create table if not exists <>.<> ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table + - name: schema-astra + tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table: | + create table if not exists <>.<> ( + key text, + value text, + PRIMARY KEY (key) + ); + tags: + name: create-table-astra + - name: rampup + tags: + phase: rampup + params: + cl: <> + statements: + - rampup-insert: | + insert into <>.<> + (key, value) + values ({seq_key},{seq_value}); + tags: + name: rampup-insert + - name: verify + tags: + phase: verify + type: read + params: + cl: <> + statements: + - verify-select: | + select * from <>.<> where key={seq_key}; + verify-fields: key->seq_key, value->seq_value + tags: + name: verify + - name: main-read + tags: + phase: main + type: read + params: + ratio: 5 + cl: <> + statements: + - main-select: | + select * from <>.<> where key={rw_key}; + tags: + name: main-select + - name: main-write + tags: + phase: main + type: write + params: + ratio: 5 + cl: <> + statements: + - main-insert: | + insert into <>.<> + (key, value) values ({rw_key}, {rw_value}); + tags: + name: main-insert diff --git a/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-tabular2.yaml b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-tabular2.yaml new file mode 100644 index 000000000..a818b3bd6 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-tabular2.yaml @@ -0,0 +1,176 @@ +description: | + A tabular workload with partitions, clusters, and data fields + This workload contains partitioning and cluster along with a set + of 8 fields of varying length. The field values vary in size according + to the fibonacci sequence times a base size factor of 10, with + an additional 10% variance for each field. + The read patterns have a variety of field subsets specified. + + During rampup, all rows will be written partition by partition, + filling in all rows of that partition before moving on to the next. + Example: With a partition size of 1000 and 1B rows, there will be + 1000000 partitions. + + During main phase, the read patterns are varied with different + field sets. As well, the number of rows which will be returned + is varied betweeen 1 and 10. + + By default, reads occur at the same ratio as writes, with main + phase writes writing full rows. + + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10B) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,100M) threads=auto + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10B) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,100M) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + +params: + instrument: true +bindings: + + # for ramp-up and verify phases + # + part_layout: Div(<>); ToString() -> String + clust_layout: Mod(<>); ToString() -> String + # todo: update these definitions to use the simpler 10,0.1, 20, 0.2, ... + data0: Add(10); HashedFileExtractToString('data/lorem_ipsum_full.txt',9,11) + data1: Add(20); HashedFileExtractToString('data/lorem_ipsum_full.txt',18,22) + data2: Add(30); HashedFileExtractToString('data/lorem_ipsum_full.txt',27,33) + data3: Add(40); HashedFileExtractToString('data/lorem_ipsum_full.txt',45,55) + data4: Add(50); HashedFileExtractToString('data/lorem_ipsum_full.txt',72,88) + data5: Add(60); HashedFileExtractToString('data/lorem_ipsum_full.txt',107,143) + data6: Add(70); HashedFileExtractToString('data/lorem_ipsum_full.txt',189,231) + data7: Add(80); HashedFileExtractToString('data/lorem_ipsum_full.txt',306,374) + + # for main phase + # for write + part_write: Hash(); Uniform(0,<>)->int; ToString() -> String + clust_write: Hash(); Add(1); Uniform(0,<>)->int; ToString() -> String + data_write: Hash(); HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150) -> String + + # for read + limit: Uniform(1,10) -> int + part_read: Uniform(0,<>)->int; ToString() -> String + clust_read: Add(1); Uniform(0,<>)->int; ToString() -> String + +blocks: + - name: schema + tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table: | + create table if not exists <>.<> ( + part text, + clust text, + data0 text, data1 text, data2 text, data3 text, + data4 text, data5 text, data6 text, data7 text, + PRIMARY KEY (part,clust) + ); + tags: + name: create-table + - name: schema-astra + tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table: | + create table if not exists <>.<> ( + part text, + clust text, + data0 text, data1 text, data2 text, data3 text, + data4 text, data5 text, data6 text, data7 text, + PRIMARY KEY (part,clust) + ); + tags: + name: create-table-astra + - name: rampup + tags: + phase: rampup + params: + cl: <> + statements: + - rampup-insert: | + insert into <>.<> + (part,clust,data0,data1,data2,data3,data4,data5,data6,data7) + values ({part_layout},{clust_layout},{data0},{data1},{data2},{data3},{data4},{data5},{data6},{data7}) + tags: + name: rampup-insert + - name: verify + tags: + phase: verify + type: read + params: + cl: <> + statements: + - verify-select: | + select * from <>.<> where part={part_layout} and clust={clust_layout} + tags: + name: verify-select + - name: main-read + tags: + phase: main + type: read + params: + ratio: 1 + cl: <> + statements: + - main-select-all: | + select * from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select-01 + - main-select-01: | + select data0,data1 from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select-0246 + - main-select-0246: | + select data0,data2,data4,data6 from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select-1357 + - main-select-1357: | + select data1,data3,data5,data7 from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select-0123 + - main-select-0123: | + select data0,data1,data2,data3 from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select-4567 + - main-select-4567: | + select data4,data5,data6,data7 from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select-01234567 + - main-select: | + select data0,data1,data2,data3,data4,data5,data6,data7 from <>.<> where part={part_read} limit {limit}; + tags: + name: main-select + - name: main-write + tags: + phase: main + type: write + params: + ratio: 8 + cl: <> + statements: + - main-write: | + insert into <>.<> + (part, clust, data0,data1,data2,data3,data4,data5,data6,data7) + values ({part_write},{clust_write},{data0},{data1},{data2},{data3},{data4},{data5},{data6},{data7}) + tags: + name: main-write + diff --git a/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml new file mode 100644 index 000000000..4e0aa6a9b --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql-timeseries2.yaml @@ -0,0 +1,138 @@ +# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost +description: | + This workload emulates a time-series data model and access patterns. + +scenarios: + default: + schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto + astra: + schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF + rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto + main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto +params: + instrument: TEMPLATE(instrument,false) +bindings: + machine_id: Mod(<>); ToHashedUUID() -> java.util.UUID + sensor_name: HashedLineToString('data/variable_words.txt') + time: Mul(<>L); Div(<>L); ToDate() + cell_timestamp: Mul(<>L); Div(<>L); Mul(1000L) + sensor_value: Normal(0.0,5.0); Add(100.0) -> double + station_id: Div(<>);Mod(<>); ToHashedUUID() -> java.util.UUID + data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200) +blocks: + - tags: + phase: schema + params: + prepared: false + statements: + - create-keyspace: | + create keyspace if not exists <> + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<>'} + AND durable_writes = true; + tags: + name: create-keyspace + - create-table : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC) + AND compression = { 'sstable_compression' : '<>' } + AND compaction = { + 'class': 'TimeWindowCompactionStrategy', + 'compaction_window_size': <>, + 'compaction_window_unit': 'MINUTES' + }; + tags: + name: create-table + - truncate-table: | + truncate table <>.<>; + tags: + name: truncate-table + - tags: + phase: schema-astra + params: + prepared: false + statements: + - create-table-astra : | + create table if not exists <>.<> ( + machine_id UUID, // source machine + sensor_name text, // sensor name + time timestamp, // timestamp of collection + sensor_value double, // + station_id UUID, // source location + data text, + PRIMARY KEY ((machine_id, sensor_name), time) + ) WITH CLUSTERING ORDER BY (time DESC); + tags: + name: create-table-astra + - tags: + phase: rampup + params: + cl: <> + statements: + - insert-rampup: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-rampup + params: + instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false)) + - tags: + phase: verify + type: read + params: + ratio: 1 + cl: <> + statements: + - select-verify: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} and time={time}; + verify-fields: "*, -cell_timestamp" + tags: + name: select-verify + params: + instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false)) + - tags: + phase: main + type: read + params: + ratio: <> + cl: <> + statements: + - select-read: | + select * from <>.<> + where machine_id={machine_id} and sensor_name={sensor_name} + limit <> + tags: + name: select-read + params: + instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false)) + + - tags: + phase: main + type: write + params: + ratio: <> + cl: <> + statements: + - insert-main: | + insert into <>.<> + (machine_id, sensor_name, time, sensor_value, station_id, data) + values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data}) + using timestamp {cell_timestamp} + idempotent: true + tags: + name: insert-main + params: + instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false)) + diff --git a/adapter-cqld4/src/main/resources/activities/examples/bindings/date.yaml b/adapter-cqld4/src/main/resources/activities/examples/bindings/date.yaml new file mode 100644 index 000000000..f5fa655c0 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/bindings/date.yaml @@ -0,0 +1,39 @@ +# You can run this file with this command line to see the values printed to stdout: +# ./ebdse run driver=stdout yaml=bindings/date.yaml cycles=10 + +# This file demonstrates different types of timestamp recipes +# that you can use with virtdata. (The bindings used in ebdse) + +# If you want to control the output, uncomment and edit the statement template below +# and modify the named anchors to suit your output requirements. + +#statements: +# example1: "{fullname}\n" + +bindings: +# All uncommented lines under this are indented, so they become named bindings below +# the entry above + +# Normally, the value that you get with a cycle starts at 0. + + cycleNum: Identity(); + +# here we convert the cycle number to a Date by casting. + + id: Identity(); ToDate(); + +# Date during 2017 (number of milliseconds in a year: 31,536,000,000) + date: StartingEpochMillis('2017-01-01 23:59:59'); AddHashRange(0L,31536000000L); StringDateWrapper("YYYY-MM-dd") + +# Example output: + +# date : 2017-09-17 +# date : 2017-08-01 +# date : 2017-04-22 +# date : 2017-04-09 +# date : 2017-05-28 +# date : 2017-08-06 +# date : 2017-07-05 +# date : 2017-02-07 +# date : 2017-05-25 +# date : 2017-12-02 diff --git a/adapter-cqld4/src/main/resources/activities/examples/bindings/expr.yaml b/adapter-cqld4/src/main/resources/activities/examples/bindings/expr.yaml new file mode 100644 index 000000000..a4b34004a --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/bindings/expr.yaml @@ -0,0 +1,28 @@ + +# You can run this file with this command line to see the values printed to stdout: +# ./ebdse run driver=stdout yaml=bindings/expr.yaml cycles=10 + +# This file demonstrates different types of timestamp recipes +# that you can use with virtdata. (The bindings used in ebdse) + +# If you want to control the output, uncomment and edit the statement template below +# and modify the named anchors to suit your output requirements. + +#statements: +# example1: "{fullname}\n" + +bindings: +# flight times based on hour / minute / second computation + hour: HashRange(0,2); ToInt() + minute: Shuffle(0,2); ToInt() + second: HashRange(0,60); ToInt() + flightDate: HashRange(0,2); Mul(3600000); Save('hour'); Shuffle(0,2); Mul(60000); Save('minute'); HashRange(0,60); Mul(1000); Save('second'); Expr('hour + minute + second'); StartingEpochMillis('2018-10-02 04:00:00'); ToDate(); ToString() + flightDateFixed: Save('cycle'); HashRange(0,2); Mul(3600000); Load('cycle'); Save('hour'); Shuffle(0,2); Mul(60000); Save('minute'); Load('cycle'); HashRange(0,60); Mul(1000); Save('second'); Expr('hour + minute + second'); StartingEpochMillis('2018-10-02 04:00:00'); ToDate(); ToString() + flightDateLong: Save('cycle'); HashRange(0,2); Mul(3600000); Load('cycle'); Save('hour'); Shuffle(0,2); Mul(60000); Save('minute'); Load('cycle'); HashRange(0,60); Mul(1000); Save('second'); Expr('hour + minute + second'); ToString() + +# status that depends on score + riskScore: Normal(0.0,5.0); Clamp(1, 100); Save('riskScore') -> int + status: | + Expr('riskScore > 90 ? 0 : 1') -> long; ToBoolean(); ToString() + status_2: | + ToInt(); Expr('riskScore >90 ? 0 : 1') -> int; WeightedStrings('accepted:1;rejected:1') diff --git a/adapter-cqld4/src/main/resources/activities/examples/bindings/text.yaml b/adapter-cqld4/src/main/resources/activities/examples/bindings/text.yaml new file mode 100644 index 000000000..16711d961 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/bindings/text.yaml @@ -0,0 +1,172 @@ +# You can run this file with this command line to see the values printed to stdout: +# ./ebdse run driver=stdout yaml=bindings/text.yaml cycles=10 + +# This file demonstrates different types of timestamp recipes +# that you can use with virtdata. (The bindings used in ebdse) + +# If you want to control the output, uncomment and edit the statement template below +# and modify the named anchors to suit your output requirements. + +#statements: +# example1: "{fullname}\n" + +bindings: + +# All uncommented lines under this are indented, so they become named bindings below +# the entry above + +# Normally, the value that you get with a cycle starts at 0. + + cycleNum: Identity(); + +# here we convert the cycle number to a text by casting. + id: Identity(); ToString() + +## Names +# See http://docs.virtdata.io/functions/funcref_premade/ +# Full name + fullname: FullNames() + +# Example output: + +# fullname : Norman Wolf +# fullname : Lisa Harris +# fullname : John Williams +# fullname : Freda Gaytan +# fullname : Violet Ferguson +# fullname : Larry Roberts +# fullname : Andrew Daniels +# fullname : Jean Keys +# fullname : Mark Cole +# fullname : Roberta Bounds + + +# Name with last name first + fullname_lastname_first: Template('{}, {}', LastNames(), FirstNames()) + +# Example output: + +# fullname_lastname_first : Miracle, Lisa +# fullname_lastname_first : Wolf, John +# fullname_lastname_first : Harris, Freda +# fullname_lastname_first : Williams, Violet +# fullname_lastname_first : Gaytan, Larry +# fullname_lastname_first : Ferguson, Andrew +# fullname_lastname_first : Roberts, Jean +# fullname_lastname_first : Daniels, Mark +# fullname_lastname_first : Keys, Roberta +# fullname_lastname_first : Cole, Timothy + +# Phone + phone: compose HashRange(10000000000L,99999999999L); Combinations('0-9;0-9;0-9;-;0-9;0-9;0-9;-;0-9;0-9;0-9;0-9') + +# Example output: + +# $ ebdse run driver=stdout yaml=example-bindings format=readout cycles=10 +# phone : 241-478-6787 +# phone : 784-482-7668 +# phone : 804-068-5502 +# phone : 044-195-5579 +# phone : 237-202-5601 +# phone : 916-390-8911 +# phone : 550-943-7851 +# phone : 762-031-1362 +# phone : 234-050-2563 +# phone : 312-672-0039 + +## Career + career: HashedLineToString('data/careers.txt') + +# Example output: + +# career : Paper Goods Machine Setters, Operators, and Tenders +# career : Training and Development Specialists +# career : Embossing Machine Set-Up Operators +# career : Airframe-and-Power-Plant Mechanics +# career : Sales Representatives, Agricultural +# career : Automotive Body and Related Repairers +# career : Community Health Workers +# career : Billing, Posting, and Calculating Machine Operators +# career : Data Processing Equipment Repairers +# career : Sawing Machine Setters and Set-Up Operators + +## Job Description + jobdescription: Add(0); HashedLineToString('data/jobdescription.txt') + +# Example output: + +# jobdescription: Add(0); HashedLineToString('data/jobdescription.txt') + +## Weighted enumerated values +# Sorting hat (even distribution) + house: WeightedStrings('Gryffindor:0.2;Hufflepuff:0.2;Ravenclaw:0.2;Slytherin:0.2') +# Example output: + +# house : Hufflepuff +# house : Ravenclaw +# house : Slytherin +# house : Slytherin +# house : Gryffindor +# house : Hufflepuff +# house : Ravenclaw +# house : Ravenclaw +# house : Hufflepuff +# house : Hufflepuff + +## Weighted prefixes + prefix: WeightedStrings('Mr:0.45;Mrs:0.25;Ms:0.1;Miss:0.1;Dr:0.05') + +# Example output: + +# prefix : Mr +# prefix : Mrs +# prefix : Miss +# prefix : Miss +# prefix : Mr +# prefix : Mrs +# prefix : Mrs +# prefix : Mrs +# prefix : Mr +# prefix : Mr +# prefix : Mr +# prefix : Mr +# prefix : Mrs +# prefix : Mrs +# prefix : Mr +# prefix : Mr +# prefix : Mrs +# prefix : Miss +# prefix : Ms +# prefix : Dr + +## Current Employer + current_employer: HashedLineToString('data/companies.txt') + +# Example output: + +# current_employer : Monsanto Company +# current_employer : International Flavors & Fragrances +# current_employer : Carpenter Technology Corporation +# current_employer : Union Pacific Corporation +# current_employer : Rush Enterprises +# current_employer : Peabody Energy Corporation +# current_employer : Rockwell Automation +# current_employer : Auto-Owners Insurance Group +# current_employer : ArcBest Corporation +# current_employer : WGL Holdings + +## Sensor + sensor_name: HashedLineToString('data/variable_words.txt') + +# Example output: + +# sensor_name : rotational_latency +# sensor_name : half_life +# sensor_name : clarity +# sensor_name : fairness +# sensor_name : diversity +# sensor_name : turbulence +# sensor_name : mode +# sensor_name : current +# sensor_name : rating +# sensor_name : stall_speed diff --git a/adapter-cqld4/src/main/resources/activities/examples/bindings/timestamp.yaml b/adapter-cqld4/src/main/resources/activities/examples/bindings/timestamp.yaml new file mode 100644 index 000000000..fd25c6a89 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/bindings/timestamp.yaml @@ -0,0 +1,72 @@ +# You can run this file with this command line to see the values printed to stdout: +# ./ebdse run driver=stdout yaml=bindings/timestamp.yaml cycles=10 + +# This file demonstrates different types of timestamp recipes +# that you can use with virtdata. (The bindings used in ebdse) + +# If you want to control the output, uncomment and edit the statement template below +# and modify the named anchors to suit your output requirements. + +#statements: +# example1: "{epochMillis}\n" + +bindings: + +# All uncommented lines under this are indented, so they become named bindings below +# the entry above + +# Normally, the value that you get with a cycle starts at 0. + + cycleNum: Identity(); + +# So far, we've only been dealing in milliseconds. This is important to get working +# before adding the next step, converting to a more specific type. +# You can take any millisecond output and add conversion functions as shown below. + +# this one converts to a java.util.Time + + randomDateWithinFeb2018: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); ToDate(); + +# ToDate(...) supports a few argument forms that you can experiment with. +# ToDate(int) will space the dates apart by this many milliseconds. +# ToDate(int,int) will space the dates apart by some millis and also repeat the value for some number of cycles. + +# Alternately, you might want to use a org.joda.time.DateTime instead of a java.util.Time: + + randomJodaDateWithinFeb2018: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); ToJodaDateTime(); + +# ToJodaDateTime(...) also supports the space and repeat forms as shown above for ToDate(...) + +# You can also have the dates in order, but with some limited out-of-order pertubation. +# In this case, we are swizzling the offset by some pseudo-random amount, up to an hour (in millis) + + randomDateWithinFeb2018Jittery: AddHashRange(0,3600000L); StartingEpochMillis('2018-02-01 05:00:00'); ToDate(); + +# If you want to have the result be a string-formatted date representation for testing, try this: +# You can use any formatter from here: http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html + + timeuuid_string: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); StringDateWrapper("yyyy-MM-dd HH:mm:ss.SSS"); + + + + +# ebdse bundles some specialized mapping functions in addition to those explained above, which +# come with eb. These are shown below. + +# You can create a com.datastax.driver.core.LocalDate for use with the java driver. +# This takes as its input, the number of days since the unix epoch. + + localdate: LongToLocalDateDays() + +# You can also take the millis from any of the examples above which provide epoch millis, +# and convert the output to a millisecond-stable value, analogous to the CQL functions +# that do the same. + + minUUID: AddHashRange(0,3600000); StartingEpochMillis('2018-02-01 05:00:00'); ToTimeUUIDMin(); + + maxUUID: AddHashRange(0,3600000); StartingEpochMillis('2018-02-01 05:00:00'); ToTimeUUIDMax(); + +# If you find useful recipes which are needed by others, please contribute them back to our examples! + + + diff --git a/adapter-cqld4/src/main/resources/activities/examples/bindings/timeuuid.yaml b/adapter-cqld4/src/main/resources/activities/examples/bindings/timeuuid.yaml new file mode 100644 index 000000000..7664e8107 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/bindings/timeuuid.yaml @@ -0,0 +1,62 @@ +# You can run this file with this command line to see the values printed to stdout: +# ./ebdse run driver=stdout yaml=bindings/timeuuid.yaml cycles=10 + +# This file demonstrates different types of timestamp recipes +# that you can use with virtdata. (The bindings used in ebdse) + +# If you want to control the output, uncomment and edit the statement template below +# and modify the named anchors to suit your output requirements. + +#statements: +# example1: "{fullname}\n" + +bindings: + +# All uncommented lines under this are indented, so they become named bindings below +# the entry above + +# Normally, the value that you get with a cycle starts at 0. + + cycleNum: Identity(); +# here we convert the cycle number to a TIMEUUID by casting. + + id: Identity(); ToEpochTimeUUID() + +## Client ID + client_id: AddHashRange(0L, 2000000000000L); ToEpochTimeUUID() + +# Example output: + +# client_id : 4eb369b0-91de-11bd-8000-000000000000 +# client_id : 0b9edab0-5401-11e7-8000-000000000000 +# client_id : 58f21c30-0eec-11f3-8000-000000000000 +# client_id : 4f547e60-a48a-11ca-8000-000000000000 +# client_id : 42db8510-cad8-11bb-8000-000000000000 +# client_id : 78cc7790-529c-11d6-8000-000000000000 +# client_id : 55382200-9cfd-11d7-8000-000000000000 +# client_id : 1ebdbef0-b6dc-11b7-8000-000000000000 +# client_id : 8bc58ba0-57fe-11da-8000-000000000000 +# client_id : 03d1b690-ba64-11f5-8000-000000000000 + +# If you wanted a java.util.UUID instead of a java.util.Date type, you can use something like below. +# This form avoids setting the non-time fields in the timeuuid value. This makes testing determinstically +# possible, when the basic data type as used in practice, is designed specifically to avoid repeatability. + + timeuuid1: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); ToEpochTimeUUID(); + +# There is a shortcut for this version supported directly by ToEpochTimeUUID(..) as seen here: + + timeuuid2: AddHashRange(0,2419200000L); ToEpochTimeUUID('2018-02-01 05:00:00'); + +# You can also access the finest level of resolution of the timeuuid type, where each cycle value represents +# the smallest possible change for a timeuuid. Bear in mind that this represents many many sub-millisecond +# level timestamp values which may not be easy to see in normal timestamp formats. In this case, millisecond +# semantics are not appropriate, so make sure you adjust the input values accordingly. + + timeuuid_finest1: ToFinestTimeUUID(); + +# However, since starting at some reference time is a popular option, ToFinestTimeUUID(...) also supports +# the shortcut version just like ToEpochTimeUUID(). This is provided because converting between epoch +# millis and timeuuid ticks is not fun. + + timeuuid_finest_relative: ToFinestTimeUUID('2018-02-01 05:00:00'); diff --git a/adapter-cqld4/src/main/resources/activities/examples/bindings/uuid.yaml b/adapter-cqld4/src/main/resources/activities/examples/bindings/uuid.yaml new file mode 100644 index 000000000..62e4a76d4 --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/bindings/uuid.yaml @@ -0,0 +1,39 @@ +# You can run this file with this command line to see the values printed to stdout: +# ./ebdse run driver=stdout yaml=bindings/uuid.yaml cycles=10 + +# This file demonstrates different types of timestamp recipes +# that you can use with virtdata. (The bindings used in ebdse) + +# If you want to control the output, uncomment and edit the statement template below +# and modify the named anchors to suit your output requirements. + +#statements: +# example1: "{fullname}\n" + +bindings: + +# All uncommented lines under this are indented, so they become named bindings below +# the entry above + +# Normally, the value that you get with a cycle starts at 0. + + cycleNum: Identity(); + +# here we convert the cycle number to a UUID by casting. + id: Identity(); ToHashedUUID() + +## Station ID (100 unique UUID values, can override stations on the command-line) + station_id: Mod(<>); ToHashedUUID() + +# Example output: + +# station_id : 28df63b7-cc57-43cb-9752-fae69d1653da +# station_id : 5752fae6-9d16-43da-b20f-557a1dd5c571 +# station_id : 720f557a-1dd5-4571-afb2-0dd47d657943 +# station_id : 6fb20dd4-7d65-4943-9967-459343efafdd +# station_id : 19674593-43ef-4fdd-bdf4-98b19568b584 +# station_id : 3df498b1-9568-4584-96fd-76f6081da01a +# station_id : 56fd76f6-081d-401a-85eb-b1d9e5bba058 +# station_id : 45ebb1d9-e5bb-4058-b75d-d51547d31952 +# station_id : 375dd515-47d3-4952-a49d-236be9a5c070 +# station_id : 249d236b-e9a5-4070-9afa-8fae9060d959 diff --git a/adapter-cqld4/src/main/resources/activities/examples/cql-rowops.yaml b/adapter-cqld4/src/main/resources/activities/examples/cql-rowops.yaml new file mode 100644 index 000000000..5874b937d --- /dev/null +++ b/adapter-cqld4/src/main/resources/activities/examples/cql-rowops.yaml @@ -0,0 +1,54 @@ +scenarios: + default: + schema: run driver=cql tags==phase:schema cycles==UNDEF threads==1 + rampup: run driver=cql tags==phase:rampup cycles=TEMPLATE(rampup-cycles,100K) threads=auto + +bindings: + userid: Template('user-{}',ToString()); SaveString('userid'); + interest: Template('interest-{}',ToString()); + +blocks: + - name: schema + tags: + phase: schema + statements: + - create-keyspace: | + create KEYSPACE if not exists TEMPLATE(keyspace,examples) + WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} + AND durable_writes = 'true'; + - create-users-table: | + create table if not exists TEMPLATE(keyspace,examples).users ( + userid text PRIMARY KEY + ); + - create-interests-table: | + create table if not exists TEMPLATE(keyspace,examples).interests ( + userid text, + interest text, + primary key (interest, userid) + ); + - name: rampup + tags: + phase: rampup + statements: + - insert-users: | + insert into TEMPLATE(keyspace,examples).users (userid) VALUES ({userid}); + tags: + entity: users + - insert-interests: | + insert into TEMPLATE(keyspace,examples).interests( + interest, userid + ) VALUES ( + {interest}, {userid} + ); + tags: + entity: interests + - name: main + tags: + phase: main + statements: + - read-user: | + select * from TEMPLATE(keyspace,examples).users + where userid={userid}; + - read interests: | + select * from TEMPLATE(keyspace,examples).interests + where interest={interest}; diff --git a/adapter-cqld4/src/main/resources/curate_docs/README.md b/adapter-cqld4/src/main/resources/curate_docs/README.md new file mode 100644 index 000000000..b42aeaa67 --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/README.md @@ -0,0 +1,3 @@ +These docs are carried over from the prior cql 1.9 and cql 3.* drivers. They do not describe +current behavior, but are here as a reference point for closing the implementation gap +in the new cqld4 driver before it is moved from prerelease status to mainline releases. diff --git a/adapter-cqld4/src/main/resources/curate_docs/advanced-cql.md b/adapter-cqld4/src/main/resources/curate_docs/advanced-cql.md new file mode 100644 index 000000000..0968086a3 --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/advanced-cql.md @@ -0,0 +1,97 @@ +# cql driver - advanced features + +This is an addendum to the standard CQL Activity Type docs. For that, +see "cql". Use the features in this guide carefully. They do not come +with as much documentation as they are less used than the main CQL +features. + +### ResultSet and Row operators + +Within the CQL Activity type, synchronous mode (activities with out the +async= parameter), you have the ability to attach operators to a given +statement such that it will get per-statement handling. These operators +are ways of interrogating the result of an operation, saving values, or +managing other side-effects for specific types of testing. + +When enabled for a statement, operators are applied in this order: + +1. Activity-level ResultSet operators are applied in specified order. +2. Statement-level ResultSet operators are applied in specified order. +3. Activity-level Row operators are applied in specified order. +4. Statement-level Row operators are applied in specified order. + +The result set handling does not go to any extra steps of making +a copy of the data. When a row is read from the result set, +it is consumed from it. Thus, if you want to do anything with +row data, you must apply a row operator as explained below. + + +### CQL Statement Parameters + +- **rsoperators** - If provided as a CQL statement param, then the + list of operator names that follow, separated by a comma, will + be used to attach ResultSet operators to the given statement. + Such operators act on the whole result set of a statement. + +- **rowoperators** - If provided as a CQL statement param, then the + list of operator names that follow, separated by a comma, will + be used to attache Row operators to the given statement. + +## Available ResultSet Operators + +- pushvars - Push a copy of the current thread local variables onto + the thread-local stack. This does nothing with the ResultSet data, + but is meant to be used for stateful management of these in + conjunction with the row operators below. +- popvars - Pop the last thread local variable set from the thread-local + stack into vars, replacing the previous content. This does nothing + with the ResultSet data. +- clearvars - Clears the contents of the thread local variables. This + does nothign with the ResultSet data. +- trace - Flags a statement to be traced on the server-side and then + logs the details of the trace to the trace log file. +- log - Logs basic data to the main log. This is useful to verify that + operators are loading and triggering as expected. +- assert_singlerow - Throws an exception (ResultSetVerificationException) + if the ResultSet has more or less than one row. + +Examples: + +```yaml + statements: + - s1: | + a statement + rsoperators: pushvars, clearvars +``` +## Available Row Operators: + +- savevars - Copies the values of the row into the thread-local variables. +- saverows - Copies the rows into a special CQL-only thread local row state. + +Examples: + +```yaml + statements: + - s2: | + a statement + rowoperators: saverows +``` + +## Injecting additional Queries (Future) + +It is possible to inject new operations to an activity. However, such operations are _indirect_ to cycles, since they +must be based on the results of other operations. As such, they will not be represented in cycle output or other +advanced features. This is a specific feature for the CQL activity -- implemented internal to the way a CQL cycle is +processed. A future version of NoSQLBench will provide a more uniform way to achieve this result across activity types. +For now, remember that this is a CQL-only capability. + +- subquery-statement - Adds additional operations to the current cycle, based + on the contents of the thread-local row state. The value to this parameter + is a name of a statement in the current YAML. + +local thread based on contents + of the CQL-only thread local row state. Each row is consumed from this list, + and a new operation is added to the current cycle. +- subquery-concurrency - Allow subqueries to execute with concurrency, up to + the level specified. + default: 1 diff --git a/adapter-cqld4/src/main/resources/curate_docs/cql-errors.md b/adapter-cqld4/src/main/resources/curate_docs/cql-errors.md new file mode 100644 index 000000000..cc17a96bb --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/cql-errors.md @@ -0,0 +1,201 @@ +# cql error handling + +The error handling facility utilizes a type-aware error handler +provided by nosqlbench. However, it is much more modular and configurable +than most error handlers found in other testing tools. The trade-off here +is that so many options may bewilder newer users. If you agree, then +simply use one of these basic recipes in your activity parameters: + + # error and stop on *any exception + # incidentally, this is the same as the deprecated diagnose=true option + errors=stop + + # error and stop for (usually) unrecoverable errors + # warn and retry everything else (this is actually the default) + + errors=stop,retryable->retry + + # record histograms for WriteTimeoutException, error and stop + # for everything else. + + errors=stop,WriteTimeoutException:histogram + +As you can see, the error handling format is pretty basic. Behind this basic +format is modular and flexible configuration scheme that should allow for either +simple or advanced testing setups. The errors value is simply a list of error to +hander verbs mappings, but also allows for a simple verb to be specified to +cover all error types. Going from left to right, each mapping is applied in +order. You can use any of ':', '->', or '=' for the error to verb assignment +operator. + +Anytime you assign a value to the *errors* parameter for a cql activity, you are +replacing the default 'stop,retryable->retry,unverified->stop' configuration. +That is, each time this value is assigned, a new error handler is configured and +installed according to the new value. + +### errors= parameter format + +The errors parameter contains a comma-separated list of one or more +handler assignments where the error can be in any of these forms: + +- group name ( "unapplied" | "retryable" | "unverified" ) +- a single exception name like 'WriteTimeoutException', or a substring of + that which is long enough to avoid ambiguity (only one match allowed) +- A regex, like '.*WriteTimeout.*' (multiple matches allowed) + +The verb can be any of the named starting points in the error handler +stack, as explained below. + +As a special case, if the handler assignment consists of only a single word, +then it is assumed to be the default handler verb. This gets applied +as a last resort to any errors which do not match another handler by class +type or parent class type. This allows for simple hard wiring of a +handler default for all non-specific errors in the form: + + # force the test to stop with any error, even retryable ones + errors=stop + +### Error Handler Verbs + +When an error occurs, you can control how it is handled for the most part. +This is the error handler stack: + +- **stop** - logs an error, and then rethrows the causing exception, + causing nosqlbench to shutdown the current scenario. +- **warn** - log a warning in the log, with details about the error and + associated statement. +- **retry** - Retry the operation if the number of retries hasn't been + used up *and* the causing exception falls in the set of + *retryable* errors. +- **histogram** - keep a histogram of the exception counts, under the name + errorhistos.classname, using the simple class name. The magnitude of + these histos is how long the operation was pending before the related + error occurred. +- **count** - keep a count in metrics for the exception, under the name + errorcounts.classname, using the simple class name. +- **counter** - same as **count**, added for compatibility with the newer + universal error handler. This one is the preferred name. +- **ignore** - do nothing, do not even retry or count + +Each handling verb above is ordered from the most invasive to least +invasive starting at the top. With the exception of the **stop** +handler, the rest of them will be applied to an error all the way to the +bottom. For now, the error handling stack is exactly as above. You can't +modify it, although it may be made configurable in the future. + +One way to choose the right handler is to say "How serious is this type of +error to the test results if it happens?" In general, it is best to be +more conservative and choose a more aggressive setting unless you are +specifically wanting to measure how often a given error happens, for +example. + +Each exception type will have one and only one error handler at all times. +No matter how you set an error handler for a class, only the most recently +assigned handler stack will be active for it. This might be important to +keep in mind when you make multiple assignments to potentially overlapping +sets of error types. In any case, the default 'stop' handler will always +catch an error that does not otherwise have a more specific handler +assigned to it. + +##### Error Types + +The errors that can be handled are simply all the exception types that can +be thrown by either the DataStax Java Driver for DSE, *or* the nosqlbench +client itself. This includes errors that indicate a potentially +intermittent failure condition. It also includes errors that are more +permanent in nature, like WriteFailure, which would continue to occur on +subsequent retries without some form of intervention. The nosqlbench +application will also generate some additional exceptions that capture +common error cases that the Java driver doesn't or shouldn't have a +special case for, but which may be important for nosqlbench testing +purposes. + +In nosqlbench, all error handlers are specific to a particular kind of +exception that you would catch in a typical application that uses DSE, +although you can tell a handler to take care of a whole category of +problems as long as you know the right name to use. + +##### Assigned by Java Exception Type + +Error handlers can be assigned to a common parent type in order to also handle +all known subtypes, hence the default on the top line applies to all of the +driver exceptions that do not have a more specific handler assigned, either +by a closer parent or directly. + +##### Assigning by Error Group Name + +Error types for which you would commonly assign the same handling behavior +are also grouped in predefined names. If a handler is assigned to one of +the group names, then the handler is assigned all of the exceptions in the +group individually. For example, 'errors=retryable=stop' + +### Recognized Exceptions + +The whole hierarchy of exceptions as of DSE Driver 3.2.0 is as follows, +with the default configuration shown. + + DriverException -> stop + FrameTooLongException + CodecNotFoundException + AuthenticationException + TraceRetrievalException + UnsupportedProtocolVersionException + NoHostAvailableException -> retry (group: retryable) + QueryValidationException (abstract) + InvalidQueryException + InvalidConfigurationInQueryException + UnauthorizedException + SyntaxError + AlreadyExistsException + UnpreparedException + InvalidTypeException + QueryExecutionException (abstract) + UnavailableException + BootstrappingException -> retry (group: retryable) + OverloadedException -> retry (group: retryable) + TruncateException + QueryConsistencyException (abstract) + WriteTimeoutException -> retry (group: retryable) + WriteFailureException -> retry (group: retryable) + ReadFailureException + ReadTimeoutException + FunctionExecutionException + DriverInternalError + ProtocolError + ServerError + BusyPoolException + ConnectionException + TransportException + OperationTimedOutException -> retry (group: retryable) + PagingStateException + UnresolvedUserTypeException + UnsupportedFeatureException + BusyConnectionException + EbdseException (abstract) -> stop + CQLResultSetException (abstract) + UnexpectedPagingException + ResultSetVerificationException + RowVerificationException + ChangeUnappliedCycleException (group:unapplied) + RetriesExhaustedCycleException -> count + +##### Additional Exceptions + +The following exceptions are synthesized directly by nosqlbench, but get +handled alongside the normal exceptions as explained above. + +1. ChangeUnappliedException - The change unapplied condition is important to + detect when it is not expected, although some testing may intentionally send + changes that can't be applied. For this reason, it is kept as a separately + controllable error group "unapplied". +2. UnexpectedPaging - The UnexpectedPaging exception is meant to keep users from + being surprised when there is paging activity in the workload, as this can have + other implications for tuning and performance. See the details on the + **maxpages** parameter, and the *fetch size* parameter in the java + driver for details. +3. Unverified\* Exceptions - For data set verification; These exceptions + indicate when a cqlverify activity has found rows that differ from what + was expected. +4. RetriesExhaustedException - Indicates that all retries were exhausted before + a given operation could complete successfully. + diff --git a/adapter-cqld4/src/main/resources/curate_docs/cql-exception-list.md b/adapter-cqld4/src/main/resources/curate_docs/cql-exception-list.md new file mode 100644 index 000000000..9e37b0c56 --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/cql-exception-list.md @@ -0,0 +1,42 @@ +DriverException -> stop + 1 FrameTooLongException + 2 CodecNotFoundException + 3 AuthenticationException + 4 TraceRetrievalException + 5 UnsupportedProtocolVersionException + 6 NoHostAvailableException + 7 QueryValidationException (abstract) + 8 InvalidQueryException + 9 InvalidConfigurationInQueryException + 10 UnauthorizedException + 11 SyntaxError + 12 AlreadyExistsException + 13 UnpreparedException + 14 InvalidTypeException + 15 QueryExecutionException (abstract) -> retry + 16 UnavailableException + 17 BootstrappingException + 18 OverloadedException + 19 TruncateException + 20 QueryConsistencyException (abstract) + 21 WriteTimeoutException + 22 WriteFailureException + 23 ReadFailureException + 24 ReadTimeoutException + 25 FunctionExecutionException + 26 DriverInternalError + 27 ProtocolError + 28 ServerError + 29 BusyPoolException + 30 ConnectionException + 31 TransportException + 32 OperationTimedOutException + 33 PagingStateException + 34 UnresolvedUserTypeException + 35 UnsupportedFeatureException + 36 BusyConnectionException + 41 EbdseCycleException (abstract) -> stop + 37 ChangeUnappliedCycleException + 38 ResultSetVerificationException + 39 RowVerificationException (abstract) + 40 UnexpectedPagingException diff --git a/adapter-cqld4/src/main/resources/curate_docs/cql-loadbalancing.md b/adapter-cqld4/src/main/resources/curate_docs/cql-loadbalancing.md new file mode 100644 index 000000000..f6b86cf29 --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/cql-loadbalancing.md @@ -0,0 +1,83 @@ +# CQL Load Balancing Options + +WIth the CQL driver, you may configure the load balancing with the same options you might use in +client code. However, they are expressed here in a command-line friendly form. + +## Combining Policies + +To apply these load balancer policies, set the activity parameter `lbp` with a comma-separated list +of policies from the examples below. + +They are build as a nested set of polices, with the semantics of "and then". For example, the +TokenAwarePolicy followed by the LatencyAwarePolicy looks like `TAP(...),LAP(...)` which means +`TokenAwarePolicy(...)` and then `LatencyAwarePolicy(...)`. This is equivalent to Java code which +first constructs a LatencyAwarePolicy and then wraps it with a TokenAwarePolicy. This follows the +notion that the outer-most policy has primary control over options presented to child policies, and +thus you can think of the routing process as "TokenAwarePolicy decides ... " *and then* with what it +shares with the wrapped child policy, "LatencyAwarePolicy decides...", and so on. + +Even though you can use the simple pollicy descriptions above, they are constructed in the same +programmatic way in Java that you would use to nest them in the specified order. + +For example, a token aware policy wrapping a white list policy might look like this on your command +line: + + lbp=TAP(),WLP(127.0.0.1) + +## Supported Load Balancer Policies + +Each supported policy is described in detail below, with the options supported. + +### RRP: Round Robin Policy + +Format: `RRP()` + +**note** You can't wrap another policy with RRP. + +### WLP: White List Policy + +Format: `WLP(addr,...)` + +### TAP: Token Aware Policy + +Format: `TAP()` + +### LAP: Latency Aware Policy + +This policy has many optional parameters, so if you use it you must set them by name. + +Format: `LAP(options...)`, where each option is one of the following: + +- `exclusion_threshold` (or `et`) - The exclusion threshold, or how much worse a node has to be to + be excluded for awhile. Javadoc: The default exclusion threshold (if this method is not called) is + `2`. In other words, the resulting policy excludes nodes that are more than twice slower than the + fastest node. +- `minimum_measurements` (or `mm`) - The minimum number of measurements to take before penalizing a + host. Javadoc: The default for this option (if this method is not called) is `50`. Note that it is + probably not a good idea to put this option too low if only to avoid the influence of JVM warm-up + on newly restarted nodes. +- `retry_period` (or `rp`) - The retry period, in seconds. Javadoc: The retry period defines how + long a node may be penalized by the policy before it is given a 2nd chance. This is 10 seconds by + default. +- `retry_period_ms` (or `rp_ms`) - The retry period, in milliseconds. This is the same as above, but + allows you to have more precise control if needed. +- `scale` (or `s`) - The scale parameter adjusts how abruptly the most recent measurements are + scaled down in the moving average over time. 100ms is the default. Higher values reduce the + significance of more recent measurements, lower values increase it. The default is 100ms. +- `scale_ms` - The scale parameter, in milliseconds. This is the same as above, but allows you to + have more prcise control if needed. +- `update_rate` (or `ur`) - How often a node's latency average is computed. The default is 1/10 + second. +- `update_rate_ms` (or `ur_ms`) - The update rate, in milliseconds. + +Examples: +- `lbp="LAP(mm=10,rp_ms=10000)"` +- `lbp="LatencyAwarePolicy(minimum_measurements=10,retry_period_ms=10000)"` + +### DCARRP: DC-Aware Round Robin Policy + +Format: `DCARRP(localdc=somedcname)` + +This load balancing policy does not expose other non-deprecated options in the bundled version of +the driver, and the datacenter name is required. + diff --git a/adapter-cqld4/src/main/resources/curate_docs/cql.md b/adapter-cqld4/src/main/resources/curate_docs/cql.md new file mode 100644 index 000000000..60b9a3563 --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/cql.md @@ -0,0 +1,419 @@ +# cql driver + +This is a driver which allows for the execution of CQL statements. This driver supports both sync and async modes, with +detailed metrics provided for both. + +### Example activity definitions + +Run a cql activity named 'cql1', with definitions from activities/cqldefs.yaml + + ... driver=cql alias=cql1 workload=cqldefs + +Run a cql activity defined by cqldefs.yaml, but with shortcut naming + + ... driver=cql workload=cqldefs + +Only run statement groups which match a tag regex + + ... driver=cql workload=cqldefs tags=group:'ddl.*' + +Run the matching 'dml' statements, with 100 cycles, from [1000..1100) + + ... driver=cql workload=cqldefs tags=group:'dml.*' cycles=1000..1100 + +This last example shows that the cycle range is [inclusive..exclusive), +to allow for stacking test intervals. This is standard across all +activity types. + +### CQL ActivityType Parameters + +- **cqldriver** - default: dse - The type of driver to use, either dse, or oss. If you need DSE-specific features, use + the dse driver. If you are connecting to an OSS Apache Cassandra cluster, you must use the oss driver. The oss driver + option is only available in nosqlbench. + +- **host** - The host or hosts to use for connection points to + the cluster. If you specify multiple values here, use commas + with no spaces. + Examples: + - `host=192.168.1.25` + - `host=192.168.1.25,testhost42` + +- **workload** - The workload definition which holds the schema and statement defs. + see workload yaml location for additional details + (no default, required) + +- **port** - The port to connect with + +- **cl** - An override to consistency levels for the activity. If + this option is used, then all consistency levels will be replaced + by this one for the current activity, and a log line explaining + the difference with respect to the yaml will be emitted. + This is not a dynamic parameter. It will only be applied at + activity start. + +- **cbopts** - default: none - this is how you customize the cluster + settings for the client, including policies, compression, etc. This + is a string of *Java*-like method calls just as you would use them + in the Cluster.Builder fluent API. They are evaluated inline + with the default Cluster.Builder options not covered below. + Example: cbopts=".withCompression(ProtocolOptions.Compression.NONE)" + +- **whitelist** default: none - Applies a whitelist policy to the load balancing + policy in the driver. If used, a WhitelistPolicy(RoundRobinPolicy()) + will be created and added to the cluster builder on startup. + Examples: + - `whitelist=127.0.0.1` + - `whitelist=127.0.0.1:9042,127.0.0.2:1234` + +- **retrypolicy** default: none - Applies a retry policy in the driver + The only option supported for this version is `retrypolicy=logging`, + which uses the default retry policy, but with logging added. + +- **reconnectpolicy** default: none - Applies a reconnection policy in the + driver Supports + either `reconnectpolicy=exponential(minDelayInMs,maxDelayInMs)` + or `reconnectpolicy=constant(delayInMs)`. The driver reconnects using + this policy when the entire cluster becomes unavailable. + +- **protocol_version** default: unset, defaults to driver default behavior + - Set the CQL protocol version. Valid values are V1, V2, V3, V4, V5, + DSE_V1, DSE_V2. Protocol is usually auto-negotiated, however, the + initial connection may use a lower protocol to ensure connectivity to + older server versions. If you know you are running on a newer server + version, you can set this to match. + +- **pooling** default: none - Applies the connection pooling options to + the policy. Examples: + - `pooling=4:10` + keep between 4 and 10 connections to LOCAL hosts + - `pooling=4:10,2:5` + keep 4-10 connections to LOCAL hosts and 2-5 to REMOTE + - `pooling=4:10:2000` + keep between 4-10 connections to LOCAL hosts with + up to 2000 requests per connection + - `pooling=5:10:2000,2:4:1000` keep between 5-10 connections to + LOCAL hosts with up to 2000 requests per connection, and 2-4 + connection to REMOTE hosts with up to 1000 requests per connection + + Additionally, you may provide the following options on pooling. Any + of these that are provided must appear in this order: + `,heartbeat_interval_s:n,idle_timeout_s:n,pool_timeout_ms:n`, so a + full example with all options set would appear as: + `pooling=5:10:2000,2:4:1000,heartbeat_interval_s:30,idle_timeout_s:120,pool_timeout_ms:5` + +- **socketoptions** default: none - Applies any of the valid socket + options to the client when the session is built. Each of the options + uses the long form of the name, with either a numeric or boolean + value. Individual sub-parameters should be separated by a comma, and + the parameter names and values can be separated by either equals or a + colon. All of these values may be changed: + - read_timeout_ms + - connect_timeout_ms + - keep_alive + - reuse_address + - so_linger + - tcp_no_delay + - receive_buffer_size + - send_buffer_size + + Examples: + - `socketoptions=read_timeout_ms=23423,connect_timeout_ms=4444` + - `socketoptions=tcp_no_delay=true` + +- **tokens** default: unset - Only executes statements that fall within + any of the specified token ranges. Others are counted in metrics + as skipped-tokens, with a histogram value of the cycle number. + Examples: + - tokens=1:10000,100000:1000000 + - tokens=1:123456 + +- **maxtries** - default: 10 - how many times an operation may be + attempted before it is disregarded + +- **maxpages** - default: 1 - how many pages can be read from a query which + is larger than the fetchsize. If more than this number of pages + is required for such a query, then an UnexpectedPaging excpetion + is passed to the error handler as explained below. + +- **fetchsize** - controls the driver parameter of the same name. + Suffixed units can be used here, such as "50K". If this parameter + is not present, then the driver option is not set. + +- **cycles** - standard, however the cql activity type will default + this to however many statements are included in the current + activity, after tag filtering, etc. + +- **username** - the user to authenticate as. This option requires + that one of **password** or **passfile** also be defined. + +- **password** - the password to authenticate with. This will be + ignored if passfile is also present. + +- **passfile** - the file to read the password from. The first + line of this file is used as the password. + +- **ssl** - specifies the type of the SSL implementation. + Disabled by default, possible values are `jdk` and `openssl`. + + [Additional parameters may need to be provided](ssl.md). + +- **jmxreporting** - enable JMX reporting if needed. + Examples: + - `jmxreporting=true` + - `jmxreporting=false` (the default) + +- **alias** - this is a standard nosqlbench parameter, however the cql type will use the workload value also as the + alias value when not specified. + +- **errors** - error handler configuration. + (default errors=stop,retryable->retry,unverified->stop) + Examples: + - errors=stop,WriteTimeoutException=histogram + - errors=count + - errors=warn,retryable=count + See the separate help on 'cqlerrors' for detailed + configuration options. + +- **defaultidempotence** - sets default idempotence on the + driver options, but only if it has a value. + (default unset, valid values: true or false) + +- **speculative** - sets the speculative retry policy on the cluster. + (default unset) + This can be in one of the following forms: + - pT:E:L - where :L is optional and + T is a floating point threshold between 0.0 and 100.0 and + E is an allowed number of concurrent speculative executions and + L is the maximum latency tracked in the tracker instance + (L defaults to 15000 when left out) + Examples: + - p99.8:5:15000ms - 99.8 percentile, 5 executions, 15000ms max tracked + - p98:2:10000ms - 98.0 percentile, 2 executions allowed, 10s max tracked + - Tms:E - where :E is optional and + T is a constant threshold latency and + E is the allowed number of concurrent speculative retries + (E default to 5 when left out) + Examples: + - 100ms:5 - constant threshold of 100ms and 5 allowed executions + +- **seq** - selects the statement sequencer used with statement ratios. + (default: bucket) + (options: concat | bucket | interval) + The concat sequencer repeats each statement in order until the ratio + is achieved. + The bucket sequencer uses simple round-robin distribution to plan + statement ratios, a simple but unbalanced form of interleaving. + The interval sequencer apportions statements over time and then by + order of appearance for ties. This has the effect of interleaving + statements from an activity more evenly, but is less obvious in how + it works. + All of the sequencers create deterministic schedules which use an internal + lookup table for indexing into a list of possible statements. + +- **trace** - enables a trace on a subset of operations. This is disabled + by default. + Examples: + `trace=modulo:100,filename:trace.log` + The above traces every 100th cycle to a file named trace.log. + `trace=modulo:1000,filename:stdout` + The above traces every 1000th cycle to stdout. + If the trace log is not specified, then 'tracelog' is assumed. + If the filename is specified as stdout, then traces are dumped to stdout. + +- **sessionid** - names the configuration to be used for this activity. Within a given scenario, any activities that use + the same name for clusterid will share a session and cluster. default: 'default' + +- **drivermetrics** - enable reporting of driver metrics. + default: false + +- **driverprefix** - set the metrics name that will prefix all CQL driver metrics. + default: 'driver.*clusterid*.' + The clusterid specified is included so that separate cluster and session + contexts can be reported independently for advanced tests. + +- **usercodecs** - enable the loading of user codec libraries for more + details see: com.datastax.codecs.framework.UDTCodecInjector in the + nosqlbench code base. This is for dynamic codec loading with + user-provided codecs mapped via the internal UDT APIs. default: false + +- **secureconnectbundle** - used to connect to CaaS, accepts a path to the + secure connect bundle that is downloaded from the CaaS UI. Examples: + - `secureconnectbundle=/tmp/secure-connect-my_db.zip` + - `secureconnectbundle="/home/automaton/secure-connect-my_db.zip"` + + Check + out [Astra Documentation](https://docs.astra.datastax.com/docs/test-loading-data-with-nosqlbench) + for samples + +- **insights** - Set to false to disable the driver from sending insights + monitoring information + - `insights=false` + +- **tickduration** - sets the tickDuration (milliseconds) of + HashedWheelTimer of the java driver. This timer is used to schedule + speculative requests. Examples: + - `tickduration=10` + - `tickduration=100` (driver default value) + +- **compression** - sets the transport compression to use for this + activity. Valid values are 'LZ4' and 'SNAPPY'. Both types are bundled + with EBDSE. + +- **showcql** - logs cql statements as INFO (to see INFO messages in stdout use -v or greater) Note: this is expensive + and should only be done to troubleshoot workloads. Do not use `showcql` for your tests. + +- **lbp** - configures the load balancing policies for the Java driver. With this parameter, you can + configure nested load balancing policies in short-hand form. + + The policies available are documented in detail under the help topic `cql-loadbalancing`. See that + guide if you need more than the examples below. + + Examples: + - `lbp=LAP(retry_period=3,scale=10)` - Latency aware policy with retry period of 3 seconds. + (Seconds is the default time unit, unless _ms parameter is used) and scale 10. + - `lbp=LAP(rp=3,s=10)` - Same as above, using the equivalent but terser form. + - `lbp=LAP(rp_ms=3000,s_ms=10000)` - Same as above, with milliseconds instead of + seconds. + - `loadbalancing=LAP(s=10),TAP()` - Latency aware policy, followed by + token aware policy. + +### CQL YAML Parameters + +A uniform YAML configuration format was introduced with engineblock 2.0. +As part of this format, statement parameters were added for the CQL Activity Type. +These parameters will be consolidated with the above parameters in time, but for +now **they are limited to a YAML params block**: + + params: + + ratio: 1 + # Sets the statement ratio within the operation sequencer + # scheme. Integers only. + # When preparing the operation order (AKA sequencing), + # frequency of the associated statements. + + cl: ONE + # Sets the consistency level, using any of the standard + # identifiers from com.datastax.driver.core.ConsistencyLevel, + # any one of: + # LOCAL_QUORUM, ANY, ONE, TWO, THREE, QUORUM, ALL, + # EACH_QUORUM, SERIAL, LOCAL_SERIAL, LOCAL_ONE + + prepared: true + # By default, all statements are prepared. If you are + # creating schema, set this to false. + + idempotent: false + # For statements that are known to be idempotent, set this + # to true + + instrument: false + # If a statement has instrument set to true, then + # individual Timer metrics will be tracked for + # that statement for both successes and errors, + # using the given statement name. + + verify: * + compare: all + # Adds two operators to the operation: + # 1) verify that there is a single row result set in the response. + # 2) verify some or all of the field values by name and/or value. + # If this option is used on any statement, then the activity will + # provide verification metrics and exceptions, including details + # of verification in the log once the activity is completed. + # For full details on this field, see the docs on cqlverify. + + /// Cross-verify all fields and field values between the reference data and + /// the actual data. + all(0x1|0x1<<1|0x1<<2); + + + logresultcsv: true + OR + logresultcsv: myfilename.csv + # If a statement has logresultcsv set to true, + # then individual operations will be logged to a CSV file. + # In this case the CSV file will be named as + # --results.csv. + # If the value is present and not "true", then the value will + # be used as the name of the file. + # + # The format of the file is: + # ,(SUCCESS|FAILURE),,,(.cycles - (provided by core input) A timer around the whole cycle +- \.bind - A timer which tracks the performance of the statement + binding logic, including the generation of data immediately prior +- \.execute - A timer which tracks the performance of op submission + only. This is the async execution call, broken out as a separate step. +- \.result - A timer which tracks the performance of an op result only. + This is the async get on the future, broken out as a separate step. +- \.tries - A histogram of how many tries were required to get a + completed operation + +## YAML Format + +The YAML file for a DSE Graph activity has one or more logical yaml documents, +each separted by tree dashes: --- the standard yaml document separator. Each +yaml document may contain a tags section for the purpose of including or +excluding statements for a given activity: + +~~~ (optional) +tags: + tagname: value + ... +~~~ +If no tags are provided in a document section, then it will be matched by +all possible tag filters. Conversely, if no tag filter is applied in +the activity definition, all tagged documents will match. + +Statements can be specified at the top level or within named blocks. When +you have simple needs to just put a few statements into the yaml, the top-level +style will suffice: + +~~~ +name: statement-top-level-example +statements: +- statement 1 +- statement 2 +~~~ + +If you need to represent multiple blocks of statements in the same activity, +you might want to group them into blocks: +~~~ +blocks: +- name: statement-block-1 + statements: + - statement 1 + - statement 2 +~~~ + +At any level that you can specify statements, you can also specify data bindings: + +~~~ +statements: +- statement 1 +- statement 2 +bindings: + bindto1: foo + bindto2: bar + +blocks: +- name: statement-block-1 + statements: + - statement 1 + bindings: + bindto1: foo +~~~ + +Data bindings specify how values are generated to plug into each operation. More +details on data bindings are available in the activity usage guide. + +### Parameter Templating + +Double angle brackets may be used to drop parameters into the YAML +arbitrarily. When the YAML file is loaded, and only then, these parameters +are interpolated from activity parameters like those above. This allows you +to create activity templates that can be customized simply by providing +additional parameters to the activity. There are two forms, +\<\\> and \<\\>. The first +form contains a default value. In any case, if one of these parameters is +encountered and a qualifying value is not found, an error will be thrown. + +### YAML Location + +The YAML file referenced in the yaml= parameter will be searched for in the following places, in this order: +1. A URL, if it starts with 'http:' or 'https:' +2. The local filesystem, if it exists there +3. The internal classpath and assets in the ebdse jar. + +The '.yaml' suffix is not required in the yaml= parameter, however it is +required on the actual file. As well, the logical search path "activities/" +will be used if necessary to locate the file, both on the filesystem and in +the classpath. + +This is a basic example below that can be copied as a starting template. + +## YAML Example + --- + CONTENT TBD + diff --git a/adapter-cqld4/src/main/resources/curate_docs/ssl.md b/adapter-cqld4/src/main/resources/curate_docs/ssl.md new file mode 100644 index 000000000..b6b4ece26 --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/ssl.md @@ -0,0 +1,56 @@ +# SSL + +Supported options: + +- **ssl** - specifies the type of the SSL implementation. + Disabled by default, possible values are `jdk`, and `openssl`. + +- **tlsversion** - specify the TLS version to use for SSL. + + Examples: + - `tlsversion=TLSv1.2` (the default) + +For `jdk` type, the following options are available: + +- **truststore** - specify the path to the SSL truststore. + + Examples: + - `truststore=file.truststore` + +- **tspass** - specify the password for the SSL truststore. + + Examples: + - `tspass=truststore_pass` + +- **keystore** - specify the path to the SSL keystore. + + Examples: + - `keystore=file.keystore` + +- **kspass** - specify the password for the SSL keystore. + + Examples: + - `kspass=keystore_pass` + +- **keyPassword** - specify the password for the key. + + Examples: + - `keyPassword=password` + + +For `openssl` type, the following options are available: + +- **caCertFilePath** - path to the X509 CA certificate file. + + Examples: + - `caCertFilePath=cacert.crt` + +- **certFilePath** - path to the X509 certificate file. + + Examples: + - `certFilePath=ca.pem` + +- **keyFilePath** - path to the OpenSSL key file. + + Examples: + - `keyFilePath=file.key` diff --git a/adapter-cqld4/src/main/resources/curate_docs/topics.md b/adapter-cqld4/src/main/resources/curate_docs/topics.md new file mode 100644 index 000000000..06a3b52fa --- /dev/null +++ b/adapter-cqld4/src/main/resources/curate_docs/topics.md @@ -0,0 +1,4 @@ +# cql help topics +- cql +- cql-errors +- cql-exception-list diff --git a/driver-dsegraph-shaded/src/main/resources/activities/baselines/graph-wheels.yaml b/driver-dsegraph-shaded/src/main/resources/activities/baselines/graph-wheels.yaml index ac0903dbc..7d0fb6066 100644 --- a/driver-dsegraph-shaded/src/main/resources/activities/baselines/graph-wheels.yaml +++ b/driver-dsegraph-shaded/src/main/resources/activities/baselines/graph-wheels.yaml @@ -1,12 +1,17 @@ -description: creates local graphs which resemble a wagon-wheel topology +description: creates local graphs which resemble a wagon-wheel topology, using + DSE Graph, version 6.8 or newer scenarios: + creategraph: run driver=cqld4 graphname=graph_wheels tags=phase:create-graph cycles===UNDEF + schema: run driver=cqld4 graphname=graph_wheels tags=phase:graph-schema cycles===UNDEF + disable-verify: run driver=cqld4 graphname=graph_wheels tags=phase:disable-verify cycles===UNDEF + rampup: run driver==cqld4 graphname=graph_wheels tags=phase:rampup cycles=1000 default: - creategraph: run driver=dsegraph graphname=graph_wheels tags=phase:graph-schema - schema: run driver=dsegraph graphname=graph_wheels tags=phase:graph-schema - main: run driver==dsegraph graphname=graph_wheels tags=name:main-add cycles=100000 - devmode: run driver=dsegraph graphname=graph_wheels tags=name:dev-mode - prodmode: run driver=dsegraph graphname=graph_wheels tags=name:dev-mode + creategraph: run driver=cqld4 graphname=graph_wheels tags=phase:create-graph cycles===UNDEF + schema: run driver=cqld4 graphname=graph_wheels tags=phase:graph-schema cycles===UNDEF + rampup: run driver==cqld4 graphname=graph_wheels tags=phase:rampup cycles=1 + devmode: run driver=cqld4 graphname=graph_wheels tags=name:dev-mode + prodmode: run driver=cqld4 graphname=graph_wheels tags=name:dev-mode bindings: sessionid: ToEpochTimeUUID()->java.util.UUID; ToString(); @@ -16,54 +21,86 @@ bindings: osversion: WeightedStrings('nougat:3;oreo:1;jellybean:2;4:1;4c:1;5:1;5c:1;trusty:1;xenial:1;yosemite:1;el capitan:2;sierra:3;high sierra:1;7:1;10:2') ipaddress: Combinations('1;7;0-3;.;0-2;0-2;0-5;.;0-2;0-2;0-5') createdtime: Add(1505256898) + diag_ten_pct: WeightedLongs('1:1;0:9') + diag_one_pct: WeightedLongs('1:1;0:99') blocks: - - name: create-graph + create-graph: tags: phase: create-graph statements: - - creategraph: >- - system.graph('<>').ifNotExists().create() - - name: create-schema + creategraph: + type: gremlin + script: >- + system.graph('<>').ifNotExists().create() + create-schema: tags: phase: graph-schema statements: - - graph-schema: >- - schema.propertyKey('sessionid').Uuid().ifNotExists().create(); - schema.propertyKey('deviceid').Uuid().ifNotExists().create(); - schema.propertyKey('ipaddress').Text().ifNotExists().create(); - schema.propertyKey('createdtime').Bigint().ifNotExists().create(); - schema.vertexLabel('session').partitionKey('sessionid').properties('ipaddress', 'deviceid', 'createdtime').ifNotExists().create(); - schema.propertyKey('type').Text().ifNotExists().create(); - schema.propertyKey('os').Text().ifNotExists().create(); - schema.propertyKey('osversion').Text().ifNotExists().create(); - schema.vertexLabel('device').partitionKey('deviceid').properties('type', 'os', 'osversion').ifNotExists().create(); - schema.edgeLabel('using').single().connection('session','device').ifNotExists().create(); - tags: - name: graph-schema - - name: dev-mode + graph-schema: + type: gremlin + graphname: <> + script: >- + schema.vertexLabel('session') + .ifNotExists() + .partitionBy('sessionid', Uuid) + .property('ipaddress', Text) + .property('deviceid', Uuid) + .property('createdtime', Bigint) + .create(); + + schema.vertexLabel('device') + .ifNotExists() + .partitionBy('deviceid', Uuid) + .property('type', Text) + .property('os', Text) + .property('osversion', Text) + .create(); + + schema.edgeLabel('using') + .ifNotExists() + .from('session') + .to('device') + .create() + dev-mode: tags: phase: dev-mode statements: - - dev-mode: >- - schema.config().option('graph.schema_mode').set('Development'); - tags: - name: dev-mode - - name: prod-mode + dev-mode: + type: gremlin + graphname: <> + script: >- + schema.config().option('graph.schema_mode').set('Development'); + prod-mode: tags: phase: prod-mode statements: - - prod-mode: >- - schema.config().option('graph.schema_mode').set('Production'); - tags: - name: prod-mode - - name: main + prod-mode: + type: gremlin + graphname: <> + script: >- + schema.config().option('graph.schema_mode').set('Production'); + rampup: tags: - phase: main + phase: rampup statements: - - main-add: >- - device = graph.addVertex(label, 'device','deviceid', {deviceid}, 'type', {type}, 'os', {os}, 'osversion', {osversion}); - session = graph.addVertex(label, 'session', 'sessionid', {sessionid}, 'ipaddress', {ipaddress}, 'deviceid', {deviceid}, 'createdtime', {createdtime}); - session.addEdge('using', device); - tags: - name: main-add + main-add: + type: gremlin + diag: "{diag_one_pct}" + graphname: <> + script: >- + device = g.addV('device') + .property('deviceid', '{deviceid}' as UUID) + .property('type', '{type}') + .property('os', '{os}') + .property('osversion', '{osversion}') + .as('d') + .addV('session') + .property('sessionid', '{sessionid}' as UUID) + .property('ipaddress', '{ipaddress}') + .property('deviceid', '{deviceid}' as UUID) + .property('createdtime', {createdtime}) + .as('s') + .addE('using').from('s').to('d'); + +