mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
update graph wheels to graph workload
This commit is contained in:
parent
b12e2afa66
commit
d54e75ce63
@ -0,0 +1,12 @@
|
||||
# Baselines Version 1
|
||||
|
||||
In order to avoid changing those tests and possibly impacting
|
||||
results without warning, the baseline sets are being kept
|
||||
in separate directories.
|
||||
|
||||
## Status
|
||||
|
||||
This directory is for baselines version 1. The files in this directory
|
||||
should not be modified arbitrarily. They need to remain stable so that
|
||||
comparisons to previous results based on these workloads are still valid.
|
||||
|
@ -0,0 +1,107 @@
|
||||
# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
|
||||
description: An IOT workload with more optimal settings for DSE
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
|
||||
bindings:
|
||||
machine_id: Mod(<<sources:10000>>); ToHashedUUID() -> java.util.UUID
|
||||
sensor_name: HashedLineToString('data/variable_words.txt')
|
||||
time: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); ToDate()
|
||||
cell_timestamp: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); Mul(1000L)
|
||||
sensor_value: Normal(0.0,5.0); Add(100.0) -> double
|
||||
station_id: Div(<<sources:10000>>);Mod(<<stations:100>>); ToHashedUUID() -> java.util.UUID
|
||||
data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200)
|
||||
blocks:
|
||||
- tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table : |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
|
||||
machine_id UUID, // source machine
|
||||
sensor_name text, // sensor name
|
||||
time timestamp, // timestamp of collection
|
||||
sensor_value double, //
|
||||
station_id UUID, // source location
|
||||
data text,
|
||||
PRIMARY KEY ((machine_id, sensor_name), time)
|
||||
) WITH CLUSTERING ORDER BY (time DESC)
|
||||
AND compression = { 'sstable_compression' : '<<compression:LZ4Compressor>>' }
|
||||
AND nodesync={'enabled': 'true'}
|
||||
AND compaction = {
|
||||
'class': 'TimeWindowCompactionStrategy',
|
||||
'compaction_window_size': <<expiry_minutes:60>>,
|
||||
'compaction_window_unit': 'MINUTES',
|
||||
'split_during_flush': true
|
||||
};
|
||||
tags:
|
||||
name: create-table
|
||||
- truncate-table: |
|
||||
truncate table <<keyspace:baselines>>.<<table:iot>>;
|
||||
tags:
|
||||
name: truncate-table
|
||||
- tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- insert-rampup: |
|
||||
insert into <<keyspace:baselines>>.<<table:iot>>
|
||||
(machine_id, sensor_name, time, sensor_value, station_id, data)
|
||||
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
|
||||
using timestamp {cell_timestamp}
|
||||
idempotent: true
|
||||
tags:
|
||||
name: insert-rampup
|
||||
- tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
ratio: 1
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- select-verify: |
|
||||
select * from <<keyspace:baselines>>.<<table:iot>>
|
||||
where machine_id={machine_id} and sensor_name={sensor_name} and time={time};
|
||||
verify-fields: "*, -cell_timestamp"
|
||||
tags:
|
||||
name: select-verify
|
||||
- tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: <<read_ratio:1>>
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- select-read: |
|
||||
select * from <<keyspace:baselines>>.<<table:iot>>
|
||||
where machine_id={machine_id} and sensor_name={sensor_name}
|
||||
limit <<limit:10>>
|
||||
tags:
|
||||
name: select-read
|
||||
- tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: <<write_ratio:9>>
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- insert-main: |
|
||||
insert into <<keyspace:baselines>>.<<table:iot>>
|
||||
(machine_id, sensor_name, time, sensor_value, station_id, data)
|
||||
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
|
||||
using timestamp {cell_timestamp}
|
||||
idempotent: true
|
||||
tags:
|
||||
name: insert-main
|
@ -0,0 +1,93 @@
|
||||
---
|
||||
title: CQL IoT
|
||||
weight: 2
|
||||
---
|
||||
|
||||
# CQL IoT
|
||||
|
||||
## Description
|
||||
|
||||
The CQL IoT workload demonstrates a time-series telemetry system as typically found in IoT applications. The bulk of the
|
||||
traffic is telemetry ingest. This is useful for establishing steady-state capacity with an actively managed data
|
||||
lifecycle. This is a steady-state workload, where inserts are 90% of the operations and queries are the remaining 10%.
|
||||
|
||||
## Named Scenarios
|
||||
|
||||
### default
|
||||
|
||||
The default scenario for cql-iot.yaml runs the conventional test phases: schema, rampup, main
|
||||
|
||||
## Testing Considerations
|
||||
|
||||
For in-depth testing, this workload will take some time to build up data density where TTLs begin purging expired data.
|
||||
At this point, the test should be considered steady-state.
|
||||
|
||||
## Data Set
|
||||
|
||||
### baselines.iot dataset (rampup,main)
|
||||
|
||||
- machine_id - 1000 unique values
|
||||
- sensor_name - 100 symbolic names, from a seed file
|
||||
- time - monotonically increasing timestamp
|
||||
- station_id - 100 unique values
|
||||
- sensor_value - normal distribution, median 100, stddev 5.0
|
||||
|
||||
## Operations
|
||||
|
||||
### insert (rampup, main)
|
||||
|
||||
insert into baselines.iot
|
||||
(machine_id, sensor_name, time, sensor_value, station_id)
|
||||
values (?,?,?,?,?)
|
||||
|
||||
### query (main)
|
||||
|
||||
select * from baselines.iot
|
||||
where machine_id=? and sensor_name=?
|
||||
limit 10
|
||||
|
||||
## Workload Parameters
|
||||
|
||||
This workload has no adjustable parameters when used in the baseline tests.
|
||||
|
||||
When used for additional testing, the following parameters should be supported:
|
||||
|
||||
- machines - the number of unique sources (default: 1000)
|
||||
- stations - the number of unique stations (default: 100)
|
||||
- limit - the limit for rows in reads (default: 10)
|
||||
- expiry_minutes - the TTL for data in minutes.
|
||||
- compression - enabled or disabled, to disable, set compression=''
|
||||
- write_cl - the consistency level for writes (default: LOCAL_QUORUM)
|
||||
- read_cl - the consistency level for reads (defaultL LOCAL_QUORUM)
|
||||
|
||||
## Key Performance Metrics
|
||||
|
||||
Client side metrics are a more accurate measure of the system behavior from a user's perspective. For microbench and
|
||||
baseline tests, these are the only required metrics. When gathering metrics from multiple server nodes, they should be
|
||||
kept in aggregate form, for min, max, and average for each time interval in monitoring. For example, the avg p99 latency
|
||||
for reads should be kept, as well as the min p99 latency for reads. If possible metrics, should be kept in plot form,
|
||||
with discrete histogram values per interval.
|
||||
|
||||
### Client-Side
|
||||
|
||||
- read ops/s
|
||||
- write ops/s
|
||||
- read latency histograms
|
||||
- write latency histograms
|
||||
- exception counts
|
||||
|
||||
### Server-Side
|
||||
|
||||
- bytes compacted over time
|
||||
- pending compactions
|
||||
- active data on disk
|
||||
- total data on disk
|
||||
|
||||
## Notes on Interpretation
|
||||
|
||||
- In order for this test to show useful performance contrasts, it has to be ramped to steady-state.
|
||||
- Ingest of 1G rows yields an on-disk data density of 20.8 GB using default compression settings.
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,140 @@
|
||||
# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
|
||||
description: |
|
||||
Time-series data model and access patterns. (use cql-timeseries instead)
|
||||
This is the same a cql-timeseries, which is the preferred name as it is
|
||||
more canonical. This workload is retained for historic reasons.
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
astra:
|
||||
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
params:
|
||||
instrument: TEMPLATE(instrument,false)
|
||||
bindings:
|
||||
machine_id: Mod(<<sources:10000>>); ToHashedUUID() -> java.util.UUID
|
||||
sensor_name: HashedLineToString('data/variable_words.txt')
|
||||
time: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); ToDate()
|
||||
cell_timestamp: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); Mul(1000L)
|
||||
sensor_value: Normal(0.0,5.0); Add(100.0) -> double
|
||||
station_id: Div(<<sources:10000>>);Mod(<<stations:100>>); ToHashedUUID() -> java.util.UUID
|
||||
data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200)
|
||||
blocks:
|
||||
- tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table : |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
|
||||
machine_id UUID, // source machine
|
||||
sensor_name text, // sensor name
|
||||
time timestamp, // timestamp of collection
|
||||
sensor_value double, //
|
||||
station_id UUID, // source location
|
||||
data text,
|
||||
PRIMARY KEY ((machine_id, sensor_name), time)
|
||||
) WITH CLUSTERING ORDER BY (time DESC)
|
||||
AND compression = { 'sstable_compression' : '<<compression:LZ4Compressor>>' }
|
||||
AND compaction = {
|
||||
'class': 'TimeWindowCompactionStrategy',
|
||||
'compaction_window_size': <<expiry_minutes:60>>,
|
||||
'compaction_window_unit': 'MINUTES'
|
||||
};
|
||||
tags:
|
||||
name: create-table
|
||||
- truncate-table: |
|
||||
truncate table <<keyspace:baselines>>.<<table:iot>>;
|
||||
tags:
|
||||
name: truncate-table
|
||||
- tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table-astra : |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
|
||||
machine_id UUID, // source machine
|
||||
sensor_name text, // sensor name
|
||||
time timestamp, // timestamp of collection
|
||||
sensor_value double, //
|
||||
station_id UUID, // source location
|
||||
data text,
|
||||
PRIMARY KEY ((machine_id, sensor_name), time)
|
||||
) WITH CLUSTERING ORDER BY (time DESC);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
- tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- insert-rampup: |
|
||||
insert into <<keyspace:baselines>>.<<table:iot>>
|
||||
(machine_id, sensor_name, time, sensor_value, station_id, data)
|
||||
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
|
||||
using timestamp {cell_timestamp}
|
||||
idempotent: true
|
||||
tags:
|
||||
name: insert-rampup
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
|
||||
- tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
ratio: 1
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- select-verify: |
|
||||
select * from <<keyspace:baselines>>.<<table:iot>>
|
||||
where machine_id={machine_id} and sensor_name={sensor_name} and time={time};
|
||||
verify-fields: "*, -cell_timestamp"
|
||||
tags:
|
||||
name: select-verify
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
|
||||
- tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: <<read_ratio:1>>
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- select-read: |
|
||||
select * from <<keyspace:baselines>>.<<table:iot>>
|
||||
where machine_id={machine_id} and sensor_name={sensor_name}
|
||||
limit <<limit:10>>
|
||||
tags:
|
||||
name: select-read
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
|
||||
|
||||
- tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: <<write_ratio:9>>
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- insert-main: |
|
||||
insert into <<keyspace:baselines>>.<<table:iot>>
|
||||
(machine_id, sensor_name, time, sensor_value, station_id, data)
|
||||
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
|
||||
using timestamp {cell_timestamp}
|
||||
idempotent: true
|
||||
tags:
|
||||
name: insert-main
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
|
||||
|
@ -0,0 +1,77 @@
|
||||
---
|
||||
title: CQL Key-Value
|
||||
weight: 1
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring
|
||||
system capacity most directly in terms of raw operations. As a reference point, provides some insight around types of
|
||||
workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput.
|
||||
|
||||
During preload, all keys are set with a value. During the main phase of the workload, random keys from the known
|
||||
population are replaced with new values which never repeat. During the main phase, random partitions are selected for
|
||||
upsert, with row values never repeating.
|
||||
|
||||
## Operations
|
||||
|
||||
### insert (rampup, main)
|
||||
|
||||
insert into baselines.keyvalue (key, value) values (?,?);
|
||||
|
||||
### read (main)
|
||||
|
||||
select * from baselines.keyvalue where key=?key;
|
||||
|
||||
## Data Set
|
||||
|
||||
### baselines.keyvalue insert (rampup)
|
||||
|
||||
- key - text, number as string, selected sequentially up to keycount
|
||||
- value - text, number as string, selected sequentially up to valuecount
|
||||
|
||||
### baselines.keyvalue insert (main)
|
||||
|
||||
- key - text, number as string, selected uniformly within keycount
|
||||
- value - text, number as string, selected uniformly within valuecount
|
||||
|
||||
### baselines.keyvalue read (main)
|
||||
|
||||
- key - text, number as string, selected uniformly within keycount
|
||||
|
||||
## Workload Parameters
|
||||
|
||||
This workload has no adjustable parameters when used in the baseline tests.
|
||||
|
||||
When used for additional testing, the following parameters should be supported:
|
||||
|
||||
- keycount - the number of unique keys
|
||||
- valuecount - the number of unique values
|
||||
|
||||
## Key Performance Metrics
|
||||
|
||||
Client side metrics are a more accurate measure of the system behavior from a user's perspective. For microbench and
|
||||
baseline tests, these are the only required metrics. When gathering metrics from multiple server nodes, they should be
|
||||
kept in aggregate form, for min, max, and average for each time interval in monitoring. For example, the avg p99 latency
|
||||
for reads should be kept, as well as the min p99 latency for reads. If possible metrics, should be kept in plot form,
|
||||
with discrete histogram values per interval.
|
||||
|
||||
### Client-Side
|
||||
|
||||
- read ops/s
|
||||
- write ops/s
|
||||
- read latency histograms
|
||||
- write latency histograms
|
||||
- exception counts
|
||||
|
||||
### Server-Side
|
||||
|
||||
- pending compactions
|
||||
- bytes compacted
|
||||
- active data on disk
|
||||
- total data on disk
|
||||
|
||||
# Notes on Interpretation
|
||||
|
||||
Once the average ratio of overwrites starts to balance with the rate of compaction, a steady state should be achieved.
|
||||
At this point, pending compactions and bytes compacted should be mostly flat over time.
|
@ -0,0 +1,102 @@
|
||||
description: A workload with only text keys and text values
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
astra:
|
||||
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
|
||||
bindings:
|
||||
seq_key: Mod(<<keycount:1000000000>>); ToString() -> String
|
||||
seq_value: Hash(); Mod(<<valuecount:1000000000>>); ToString() -> String
|
||||
rw_key: <<keydist:Uniform(0,1000000000)->int>>; ToString() -> String
|
||||
rw_value: Hash(); <<valdist:Uniform(0,1000000000)->int>>; ToString() -> String
|
||||
|
||||
blocks:
|
||||
- name: schema
|
||||
tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
|
||||
key text,
|
||||
value text,
|
||||
PRIMARY KEY (key)
|
||||
);
|
||||
tags:
|
||||
name: create-table
|
||||
- name: schema-astra
|
||||
tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
|
||||
key text,
|
||||
value text,
|
||||
PRIMARY KEY (key)
|
||||
);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
- name: rampup
|
||||
tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- rampup-insert: |
|
||||
insert into <<keyspace:baselines>>.<<table:keyvalue>>
|
||||
(key, value)
|
||||
values ({seq_key},{seq_value});
|
||||
tags:
|
||||
name: rampup-insert
|
||||
- name: verify
|
||||
tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- verify-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={seq_key};
|
||||
verify-fields: key->seq_key, value->seq_value
|
||||
tags:
|
||||
name: verify
|
||||
- name: main-read
|
||||
tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: 5
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={rw_key};
|
||||
tags:
|
||||
name: main-select
|
||||
- name: main-write
|
||||
tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: 5
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-insert: |
|
||||
insert into <<keyspace:baselines>>.<<table:keyvalue>>
|
||||
(key, value) values ({rw_key}, {rw_value});
|
||||
tags:
|
||||
name: main-insert
|
@ -0,0 +1,112 @@
|
||||
description: A tabular workload with partitions, clusters, and data fields
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
astra:
|
||||
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
|
||||
bindings:
|
||||
# for ramp-up and verify
|
||||
part_layout: Div(<<partsize:1000000>>); ToString() -> String
|
||||
clust_layout: Mod(<<partsize:1000000>>); ToString() -> String
|
||||
data: HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150)
|
||||
# for read
|
||||
limit: Uniform(1,10) -> int
|
||||
part_read: Uniform(0,<<partcount:100>>)->int; ToString() -> String
|
||||
clust_read: Add(1); Uniform(0,<<partsize:1000000>>)->int; ToString() -> String
|
||||
# for write
|
||||
part_write: Hash(); Uniform(0,<<partcount:100>>)->int; ToString() -> String
|
||||
clust_write: Hash(); Add(1); Uniform(0,<<partsize:1000000>>)->int; ToString() -> String
|
||||
data_write: Hash(); HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150) -> String
|
||||
|
||||
blocks:
|
||||
- name: schema
|
||||
tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:tabular>> (
|
||||
part text,
|
||||
clust text,
|
||||
data text,
|
||||
PRIMARY KEY (part,clust)
|
||||
);
|
||||
tags:
|
||||
name: create-table
|
||||
- name: schema-astra
|
||||
tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:tabular>> (
|
||||
part text,
|
||||
clust text,
|
||||
data text,
|
||||
PRIMARY KEY (part,clust)
|
||||
);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
- name: rampup
|
||||
tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- rampup-insert: |
|
||||
insert into <<keyspace:baselines>>.<<table:tabular>>
|
||||
(part,clust,data)
|
||||
values ({part_layout},{clust_layout},{data})
|
||||
tags:
|
||||
name: rampup-insert
|
||||
- name: verify
|
||||
tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- verify-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:tabular>> where part={part_layout} and clust={clust_layout}
|
||||
tags:
|
||||
name: verify-select
|
||||
- name: main-read
|
||||
tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: 5
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select
|
||||
- name: main-write
|
||||
tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: 5
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-write: |
|
||||
insert into <<keyspace:baselines>>.<<table:tabular>>
|
||||
(part, clust, data)
|
||||
values ({part_write},{clust_write},{data_write})
|
||||
tags:
|
||||
name: main-write
|
@ -0,0 +1,86 @@
|
||||
---
|
||||
title: CQL Wide Rows
|
||||
weight: 3
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The CQL Wide Rows workload provides a way to tax a system with wide rows of a given size. This is useful to help
|
||||
understand underlying performance differences between version and configuration options when using data models that have
|
||||
wide rows.
|
||||
|
||||
For in-depth testing, this workload needs significant density of partitions in combination with fully populated wide
|
||||
rows. For exploratory or parameter contrasting tests, ensure that the rampup phase is configured correctly to establish
|
||||
this initial state.
|
||||
|
||||
## Data Set
|
||||
|
||||
### baselines.widerows dataset (rampup)
|
||||
|
||||
- part - text, number in string form, sequentially from 1..1E9
|
||||
- clust - text, number in string form, sequentially from 1..1E9
|
||||
- data - text, extract from lorem ipsum between 50 and 150 characters
|
||||
|
||||
### baselines.widerows dataset (main)
|
||||
|
||||
- part - text, number in string form, sequentially from 1..1E9
|
||||
- clust - text, number in string form, sequentially from 1..<partsize>
|
||||
- data - text, extract from lorem ipsum between 50 and 150 characters
|
||||
|
||||
- machine_id - 1000 unique values
|
||||
- sensor_name - 100 symbolic names, from a seed file
|
||||
- time - monotonically increasing timestamp
|
||||
- station_id - 100 unique values
|
||||
- sensor_value - normal distribution, median 100, stddev 5.0
|
||||
|
||||
## Operations
|
||||
|
||||
### insert (rampup, main)
|
||||
|
||||
insert into baselines.iot
|
||||
(machine_id, sensor_name, time, sensor_value, station_id)
|
||||
values (?,?,?,?,?)
|
||||
|
||||
### query (main)
|
||||
|
||||
select * from baselines.iot
|
||||
where machine_id=? and sensor_name=?
|
||||
limit 10
|
||||
|
||||
## Workload Parameters
|
||||
|
||||
This workload has no adjustable parameters when used in the baseline tests.
|
||||
|
||||
When used for additional testing, the following parameters should be supported:
|
||||
|
||||
- partcount - the number of unique partitions
|
||||
- partsize - the number of logical rows within a CQL partition
|
||||
|
||||
## Key Performance Metrics
|
||||
|
||||
Client side metrics are a more accurate measure of the system behavior from a user's perspective. For microbench and
|
||||
baseline tests, these are the only required metrics. When gathering metrics from multiple server nodes, they should be
|
||||
kept in aggregate form, for min, max, and average for each time interval in monitoring. For example, the avg p99 latency
|
||||
for reads should be kept, as well as the min p99 latency for reads. If possible metrics, should be kept in plot form,
|
||||
with discrete histogram values per interval.
|
||||
|
||||
### Client-Side
|
||||
|
||||
- read ops/s
|
||||
- write ops/s
|
||||
- read latency histograms
|
||||
- write latency histograms
|
||||
- exception counts
|
||||
|
||||
### Server-Side
|
||||
|
||||
- bytes compacted over time
|
||||
- pending compactions
|
||||
- active data on disk
|
||||
- total data on disk
|
||||
|
||||
## Notes on Interpretation
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,121 @@
|
||||
description: |
|
||||
This is a workload which creates an incrementally growing dataset over cycles.
|
||||
|
||||
Rows will be added incrementally in both rampup and main phases. However, during
|
||||
the main phase, reads will also occur at the same rate, with the read patterns
|
||||
selecting from the size of data written up to that point.
|
||||
In order to ensure that the reads and writes operate against the same set of
|
||||
identifiers, it is crucial that the ratios are not adjusted unless the binding
|
||||
recipes are adjusted to match. With write:read ratio of 1:1 and a prefix function
|
||||
Div(2L) at the front of the main phase bindings, the writes and reads will address
|
||||
the same rows rather than playing leap-frog on the cycle values.
|
||||
The main phase can be run without the rampup phase for this workload, as long
|
||||
as your test is defined as an incremental write and read test. If you need
|
||||
background data pre-loaded to ensure realistic read times against pre-indexed
|
||||
data, then you may use the rampup phase before the main phase. However, be aware
|
||||
that these are simply different test definitions, and are both valid in different ways.
|
||||
Due to how this workload is meant to be used, you must specify main-cycles= when
|
||||
invoking the main phase.
|
||||
The cycles value for the main test includes operations for both writes and reads,
|
||||
thus the logical number of rows in the dataset will be effectively half of that.
|
||||
This workload is intended to be run with a sufficiently high number of cycles.
|
||||
Two key details should be obvious in the read latency metrics -- 1) the relationship
|
||||
between dataset size, request rate, and response times and 2) inflection points
|
||||
between any hot and cold access modes for LRU or other caching mechanisms as
|
||||
the primary cache layer is saturated.
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run tags=phase:schema threads==1
|
||||
# rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
|
||||
main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
|
||||
default-schema: run tags=phase:schema threads==1
|
||||
# default-rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,100000) threads=auto
|
||||
default-main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
|
||||
astra:
|
||||
schema: run tags=phase:astra-schema threads==1
|
||||
# rampup: run tags=phase:rampup cycles===TEMPLATE(rampup-cycles,0) threads=auto
|
||||
main: run tags=phase:main cycles===TEMPLATE(main-cycles,0) threads=auto
|
||||
|
||||
params:
|
||||
instrument: true
|
||||
|
||||
bindings:
|
||||
seq_key: ToString()
|
||||
rampup_value: Hash(); ToString();
|
||||
read_key: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
|
||||
read_value: Div(2L); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
|
||||
write_key: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); ToString();
|
||||
write_value: Div(2L); Hash(); HashRangeScaled(TEMPLATE(scalefactor,1.0d)); Hash(); ToString();
|
||||
|
||||
|
||||
blocks:
|
||||
- name: schema
|
||||
tags:
|
||||
phase: schema
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists TEMPLATE(keyspace,baselines)
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 'TEMPLATE(rf,1)'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table: |
|
||||
create table if not exists TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) (
|
||||
key text,
|
||||
value text,
|
||||
PRIMARY KEY (key)
|
||||
);
|
||||
- name: schema-astra
|
||||
tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table: |
|
||||
create table if not exists TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) (
|
||||
key text,
|
||||
value text,
|
||||
PRIMARY KEY (key)
|
||||
);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
|
||||
- name: rampup
|
||||
tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: TEMPLATE(write_cl,LOCAL_QUORUM)
|
||||
statements:
|
||||
- rampup-insert: |
|
||||
insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental)
|
||||
(key, value)
|
||||
values ({rampup_key},{rampup_value});
|
||||
tags:
|
||||
name: rampup-insert
|
||||
- name: main-read
|
||||
tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: 1
|
||||
cl: TEMPLATE(read_cl,LOCAL_QUORUM)
|
||||
statements:
|
||||
- main-select: |
|
||||
select * from TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental) where key={read_key};
|
||||
tags:
|
||||
name: main-select
|
||||
- name: main-write
|
||||
tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: 1
|
||||
cl: TEMPLATE(write_cl,LOCAL_QUORUM)
|
||||
statements:
|
||||
- main-insert: |
|
||||
insert into TEMPLATE(keyspace,baselines).TEMPLATE(table,incremental)
|
||||
(key, value) values ({write_key}, {write_value});
|
||||
tags:
|
||||
name: main-insert
|
||||
|
@ -0,0 +1,18 @@
|
||||
# Baselines Version 2
|
||||
|
||||
In order to avoid changing those tests and possibly impacting
|
||||
results without warning, the baseline sets are being kept
|
||||
in separate directories.
|
||||
|
||||
## Status
|
||||
|
||||
This directory is for baselines version 2. These files are the current
|
||||
in-development set of baselines, and may change in minor ways, or have
|
||||
additional workloads added, for example. If you are performing baselines
|
||||
over a period of time and need the workloads to be perfectly stable,
|
||||
it is best to copy these to your test assets under a distinct name and
|
||||
call them from there.
|
||||
|
||||
To further disambiguate the workloads, each one has a version '2'
|
||||
appended to the filename.
|
||||
|
@ -0,0 +1,109 @@
|
||||
description: |
|
||||
A workload with only text keys and text values.
|
||||
The CQL Key-Value workload demonstrates the simplest possible schema with payload data. This is useful for measuring
|
||||
system capacity most directly in terms of raw operations. As a reference point, it provides some insight around types of
|
||||
workloads that are constrained around messaging, threading, and tasking, rather than bulk throughput.
|
||||
During preload, all keys are set with a value. During the main phase of the workload, random keys from the known
|
||||
population are replaced with new values which never repeat. During the main phase, random partitions are selected for
|
||||
upsert, with row values never repeating.
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
astra:
|
||||
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
|
||||
bindings:
|
||||
seq_key: Mod(<<keycount:1000000000>>); ToString() -> String
|
||||
seq_value: Hash(); Mod(<<valuecount:1000000000>>); ToString() -> String
|
||||
rw_key: <<keydist:Uniform(0,1000000000)->int>>; ToString() -> String
|
||||
rw_value: Hash(); <<valdist:Uniform(0,1000000000)->int>>; ToString() -> String
|
||||
|
||||
blocks:
|
||||
- name: schema
|
||||
tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
|
||||
key text,
|
||||
value text,
|
||||
PRIMARY KEY (key)
|
||||
);
|
||||
tags:
|
||||
name: create-table
|
||||
- name: schema-astra
|
||||
tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:keyvalue>> (
|
||||
key text,
|
||||
value text,
|
||||
PRIMARY KEY (key)
|
||||
);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
- name: rampup
|
||||
tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- rampup-insert: |
|
||||
insert into <<keyspace:baselines>>.<<table:keyvalue>>
|
||||
(key, value)
|
||||
values ({seq_key},{seq_value});
|
||||
tags:
|
||||
name: rampup-insert
|
||||
- name: verify
|
||||
tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- verify-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={seq_key};
|
||||
verify-fields: key->seq_key, value->seq_value
|
||||
tags:
|
||||
name: verify
|
||||
- name: main-read
|
||||
tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: 5
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:keyvalue>> where key={rw_key};
|
||||
tags:
|
||||
name: main-select
|
||||
- name: main-write
|
||||
tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: 5
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-insert: |
|
||||
insert into <<keyspace:baselines>>.<<table:keyvalue>>
|
||||
(key, value) values ({rw_key}, {rw_value});
|
||||
tags:
|
||||
name: main-insert
|
@ -0,0 +1,176 @@
|
||||
description: |
|
||||
A tabular workload with partitions, clusters, and data fields
|
||||
This workload contains partitioning and cluster along with a set
|
||||
of 8 fields of varying length. The field values vary in size according
|
||||
to the fibonacci sequence times a base size factor of 10, with
|
||||
an additional 10% variance for each field.
|
||||
The read patterns have a variety of field subsets specified.
|
||||
|
||||
During rampup, all rows will be written partition by partition,
|
||||
filling in all rows of that partition before moving on to the next.
|
||||
Example: With a partition size of 1000 and 1B rows, there will be
|
||||
1000000 partitions.
|
||||
|
||||
During main phase, the read patterns are varied with different
|
||||
field sets. As well, the number of rows which will be returned
|
||||
is varied betweeen 1 and 10.
|
||||
|
||||
By default, reads occur at the same ratio as writes, with main
|
||||
phase writes writing full rows.
|
||||
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10B) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,100M) threads=auto
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10B) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,100M) threads=auto
|
||||
astra:
|
||||
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
|
||||
params:
|
||||
instrument: true
|
||||
bindings:
|
||||
|
||||
# for ramp-up and verify phases
|
||||
#
|
||||
part_layout: Div(<<partsize:1000>>); ToString() -> String
|
||||
clust_layout: Mod(<<partsize:1000>>); ToString() -> String
|
||||
# todo: update these definitions to use the simpler 10,0.1, 20, 0.2, ...
|
||||
data0: Add(10); HashedFileExtractToString('data/lorem_ipsum_full.txt',9,11)
|
||||
data1: Add(20); HashedFileExtractToString('data/lorem_ipsum_full.txt',18,22)
|
||||
data2: Add(30); HashedFileExtractToString('data/lorem_ipsum_full.txt',27,33)
|
||||
data3: Add(40); HashedFileExtractToString('data/lorem_ipsum_full.txt',45,55)
|
||||
data4: Add(50); HashedFileExtractToString('data/lorem_ipsum_full.txt',72,88)
|
||||
data5: Add(60); HashedFileExtractToString('data/lorem_ipsum_full.txt',107,143)
|
||||
data6: Add(70); HashedFileExtractToString('data/lorem_ipsum_full.txt',189,231)
|
||||
data7: Add(80); HashedFileExtractToString('data/lorem_ipsum_full.txt',306,374)
|
||||
|
||||
# for main phase
|
||||
# for write
|
||||
part_write: Hash(); Uniform(0,<<partcount:100>>)->int; ToString() -> String
|
||||
clust_write: Hash(); Add(1); Uniform(0,<<partsize:1000000>>)->int; ToString() -> String
|
||||
data_write: Hash(); HashedFileExtractToString('data/lorem_ipsum_full.txt',50,150) -> String
|
||||
|
||||
# for read
|
||||
limit: Uniform(1,10) -> int
|
||||
part_read: Uniform(0,<<partcount:100>>)->int; ToString() -> String
|
||||
clust_read: Add(1); Uniform(0,<<partsize:1000000>>)->int; ToString() -> String
|
||||
|
||||
blocks:
|
||||
- name: schema
|
||||
tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:tabular>> (
|
||||
part text,
|
||||
clust text,
|
||||
data0 text, data1 text, data2 text, data3 text,
|
||||
data4 text, data5 text, data6 text, data7 text,
|
||||
PRIMARY KEY (part,clust)
|
||||
);
|
||||
tags:
|
||||
name: create-table
|
||||
- name: schema-astra
|
||||
tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table: |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:tabular>> (
|
||||
part text,
|
||||
clust text,
|
||||
data0 text, data1 text, data2 text, data3 text,
|
||||
data4 text, data5 text, data6 text, data7 text,
|
||||
PRIMARY KEY (part,clust)
|
||||
);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
- name: rampup
|
||||
tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- rampup-insert: |
|
||||
insert into <<keyspace:baselines>>.<<table:tabular>>
|
||||
(part,clust,data0,data1,data2,data3,data4,data5,data6,data7)
|
||||
values ({part_layout},{clust_layout},{data0},{data1},{data2},{data3},{data4},{data5},{data6},{data7})
|
||||
tags:
|
||||
name: rampup-insert
|
||||
- name: verify
|
||||
tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- verify-select: |
|
||||
select * from <<keyspace:baselines>>.<<table:tabular>> where part={part_layout} and clust={clust_layout}
|
||||
tags:
|
||||
name: verify-select
|
||||
- name: main-read
|
||||
tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: 1
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-select-all: |
|
||||
select * from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select-01
|
||||
- main-select-01: |
|
||||
select data0,data1 from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select-0246
|
||||
- main-select-0246: |
|
||||
select data0,data2,data4,data6 from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select-1357
|
||||
- main-select-1357: |
|
||||
select data1,data3,data5,data7 from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select-0123
|
||||
- main-select-0123: |
|
||||
select data0,data1,data2,data3 from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select-4567
|
||||
- main-select-4567: |
|
||||
select data4,data5,data6,data7 from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select-01234567
|
||||
- main-select: |
|
||||
select data0,data1,data2,data3,data4,data5,data6,data7 from <<keyspace:baselines>>.<<table:tabular>> where part={part_read} limit {limit};
|
||||
tags:
|
||||
name: main-select
|
||||
- name: main-write
|
||||
tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: 8
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- main-write: |
|
||||
insert into <<keyspace:baselines>>.<<table:tabular>>
|
||||
(part, clust, data0,data1,data2,data3,data4,data5,data6,data7)
|
||||
values ({part_write},{clust_write},{data0},{data1},{data2},{data3},{data4},{data5},{data6},{data7})
|
||||
tags:
|
||||
name: main-write
|
||||
|
@ -0,0 +1,138 @@
|
||||
# nb -v run driver=cql yaml=cql-iot tags=phase:schema host=dsehost
|
||||
description: |
|
||||
This workload emulates a time-series data model and access patterns.
|
||||
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
astra:
|
||||
schema: run driver=cql tags==phase:schema-astra threads==1 cycles==UNDEF
|
||||
rampup: run driver=cql tags==phase:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
main: run driver=cql tags==phase:main cycles===TEMPLATE(main-cycles,10000000) threads=auto
|
||||
params:
|
||||
instrument: TEMPLATE(instrument,false)
|
||||
bindings:
|
||||
machine_id: Mod(<<sources:10000>>); ToHashedUUID() -> java.util.UUID
|
||||
sensor_name: HashedLineToString('data/variable_words.txt')
|
||||
time: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); ToDate()
|
||||
cell_timestamp: Mul(<<timespeed:100>>L); Div(<<sources:10000>>L); Mul(1000L)
|
||||
sensor_value: Normal(0.0,5.0); Add(100.0) -> double
|
||||
station_id: Div(<<sources:10000>>);Mod(<<stations:100>>); ToHashedUUID() -> java.util.UUID
|
||||
data: HashedFileExtractToString('data/lorem_ipsum_full.txt',800,1200)
|
||||
blocks:
|
||||
- tags:
|
||||
phase: schema
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create keyspace if not exists <<keyspace:baselines>>
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '<<rf:1>>'}
|
||||
AND durable_writes = true;
|
||||
tags:
|
||||
name: create-keyspace
|
||||
- create-table : |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
|
||||
machine_id UUID, // source machine
|
||||
sensor_name text, // sensor name
|
||||
time timestamp, // timestamp of collection
|
||||
sensor_value double, //
|
||||
station_id UUID, // source location
|
||||
data text,
|
||||
PRIMARY KEY ((machine_id, sensor_name), time)
|
||||
) WITH CLUSTERING ORDER BY (time DESC)
|
||||
AND compression = { 'sstable_compression' : '<<compression:LZ4Compressor>>' }
|
||||
AND compaction = {
|
||||
'class': 'TimeWindowCompactionStrategy',
|
||||
'compaction_window_size': <<expiry_minutes:60>>,
|
||||
'compaction_window_unit': 'MINUTES'
|
||||
};
|
||||
tags:
|
||||
name: create-table
|
||||
- truncate-table: |
|
||||
truncate table <<keyspace:baselines>>.<<table:iot>>;
|
||||
tags:
|
||||
name: truncate-table
|
||||
- tags:
|
||||
phase: schema-astra
|
||||
params:
|
||||
prepared: false
|
||||
statements:
|
||||
- create-table-astra : |
|
||||
create table if not exists <<keyspace:baselines>>.<<table:iot>> (
|
||||
machine_id UUID, // source machine
|
||||
sensor_name text, // sensor name
|
||||
time timestamp, // timestamp of collection
|
||||
sensor_value double, //
|
||||
station_id UUID, // source location
|
||||
data text,
|
||||
PRIMARY KEY ((machine_id, sensor_name), time)
|
||||
) WITH CLUSTERING ORDER BY (time DESC);
|
||||
tags:
|
||||
name: create-table-astra
|
||||
- tags:
|
||||
phase: rampup
|
||||
params:
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- insert-rampup: |
|
||||
insert into <<keyspace:baselines>>.<<table:iot>>
|
||||
(machine_id, sensor_name, time, sensor_value, station_id, data)
|
||||
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
|
||||
using timestamp {cell_timestamp}
|
||||
idempotent: true
|
||||
tags:
|
||||
name: insert-rampup
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
|
||||
- tags:
|
||||
phase: verify
|
||||
type: read
|
||||
params:
|
||||
ratio: 1
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- select-verify: |
|
||||
select * from <<keyspace:baselines>>.<<table:iot>>
|
||||
where machine_id={machine_id} and sensor_name={sensor_name} and time={time};
|
||||
verify-fields: "*, -cell_timestamp"
|
||||
tags:
|
||||
name: select-verify
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
|
||||
- tags:
|
||||
phase: main
|
||||
type: read
|
||||
params:
|
||||
ratio: <<read_ratio:1>>
|
||||
cl: <<read_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- select-read: |
|
||||
select * from <<keyspace:baselines>>.<<table:iot>>
|
||||
where machine_id={machine_id} and sensor_name={sensor_name}
|
||||
limit <<limit:10>>
|
||||
tags:
|
||||
name: select-read
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-reads,TEMPLATE(instrument,false))
|
||||
|
||||
- tags:
|
||||
phase: main
|
||||
type: write
|
||||
params:
|
||||
ratio: <<write_ratio:9>>
|
||||
cl: <<write_cl:LOCAL_QUORUM>>
|
||||
statements:
|
||||
- insert-main: |
|
||||
insert into <<keyspace:baselines>>.<<table:iot>>
|
||||
(machine_id, sensor_name, time, sensor_value, station_id, data)
|
||||
values ({machine_id}, {sensor_name}, {time}, {sensor_value}, {station_id}, {data})
|
||||
using timestamp {cell_timestamp}
|
||||
idempotent: true
|
||||
tags:
|
||||
name: insert-main
|
||||
params:
|
||||
instrument: TEMPLATE(instrument-writes,TEMPLATE(instrument,false))
|
||||
|
@ -0,0 +1,39 @@
|
||||
# You can run this file with this command line to see the values printed to stdout:
|
||||
# ./ebdse run driver=stdout yaml=bindings/date.yaml cycles=10
|
||||
|
||||
# This file demonstrates different types of timestamp recipes
|
||||
# that you can use with virtdata. (The bindings used in ebdse)
|
||||
|
||||
# If you want to control the output, uncomment and edit the statement template below
|
||||
# and modify the named anchors to suit your output requirements.
|
||||
|
||||
#statements:
|
||||
# example1: "{fullname}\n"
|
||||
|
||||
bindings:
|
||||
# All uncommented lines under this are indented, so they become named bindings below
|
||||
# the entry above
|
||||
|
||||
# Normally, the value that you get with a cycle starts at 0.
|
||||
|
||||
cycleNum: Identity();
|
||||
|
||||
# here we convert the cycle number to a Date by casting.
|
||||
|
||||
id: Identity(); ToDate();
|
||||
|
||||
# Date during 2017 (number of milliseconds in a year: 31,536,000,000)
|
||||
date: StartingEpochMillis('2017-01-01 23:59:59'); AddHashRange(0L,31536000000L); StringDateWrapper("YYYY-MM-dd")
|
||||
|
||||
# Example output:
|
||||
|
||||
# date : 2017-09-17
|
||||
# date : 2017-08-01
|
||||
# date : 2017-04-22
|
||||
# date : 2017-04-09
|
||||
# date : 2017-05-28
|
||||
# date : 2017-08-06
|
||||
# date : 2017-07-05
|
||||
# date : 2017-02-07
|
||||
# date : 2017-05-25
|
||||
# date : 2017-12-02
|
@ -0,0 +1,28 @@
|
||||
|
||||
# You can run this file with this command line to see the values printed to stdout:
|
||||
# ./ebdse run driver=stdout yaml=bindings/expr.yaml cycles=10
|
||||
|
||||
# This file demonstrates different types of timestamp recipes
|
||||
# that you can use with virtdata. (The bindings used in ebdse)
|
||||
|
||||
# If you want to control the output, uncomment and edit the statement template below
|
||||
# and modify the named anchors to suit your output requirements.
|
||||
|
||||
#statements:
|
||||
# example1: "{fullname}\n"
|
||||
|
||||
bindings:
|
||||
# flight times based on hour / minute / second computation
|
||||
hour: HashRange(0,2); ToInt()
|
||||
minute: Shuffle(0,2); ToInt()
|
||||
second: HashRange(0,60); ToInt()
|
||||
flightDate: HashRange(0,2); Mul(3600000); Save('hour'); Shuffle(0,2); Mul(60000); Save('minute'); HashRange(0,60); Mul(1000); Save('second'); Expr('hour + minute + second'); StartingEpochMillis('2018-10-02 04:00:00'); ToDate(); ToString()
|
||||
flightDateFixed: Save('cycle'); HashRange(0,2); Mul(3600000); Load('cycle'); Save('hour'); Shuffle(0,2); Mul(60000); Save('minute'); Load('cycle'); HashRange(0,60); Mul(1000); Save('second'); Expr('hour + minute + second'); StartingEpochMillis('2018-10-02 04:00:00'); ToDate(); ToString()
|
||||
flightDateLong: Save('cycle'); HashRange(0,2); Mul(3600000); Load('cycle'); Save('hour'); Shuffle(0,2); Mul(60000); Save('minute'); Load('cycle'); HashRange(0,60); Mul(1000); Save('second'); Expr('hour + minute + second'); ToString()
|
||||
|
||||
# status that depends on score
|
||||
riskScore: Normal(0.0,5.0); Clamp(1, 100); Save('riskScore') -> int
|
||||
status: |
|
||||
Expr('riskScore > 90 ? 0 : 1') -> long; ToBoolean(); ToString()
|
||||
status_2: |
|
||||
ToInt(); Expr('riskScore >90 ? 0 : 1') -> int; WeightedStrings('accepted:1;rejected:1')
|
@ -0,0 +1,172 @@
|
||||
# You can run this file with this command line to see the values printed to stdout:
|
||||
# ./ebdse run driver=stdout yaml=bindings/text.yaml cycles=10
|
||||
|
||||
# This file demonstrates different types of timestamp recipes
|
||||
# that you can use with virtdata. (The bindings used in ebdse)
|
||||
|
||||
# If you want to control the output, uncomment and edit the statement template below
|
||||
# and modify the named anchors to suit your output requirements.
|
||||
|
||||
#statements:
|
||||
# example1: "{fullname}\n"
|
||||
|
||||
bindings:
|
||||
|
||||
# All uncommented lines under this are indented, so they become named bindings below
|
||||
# the entry above
|
||||
|
||||
# Normally, the value that you get with a cycle starts at 0.
|
||||
|
||||
cycleNum: Identity();
|
||||
|
||||
# here we convert the cycle number to a text by casting.
|
||||
id: Identity(); ToString()
|
||||
|
||||
## Names
|
||||
# See http://docs.virtdata.io/functions/funcref_premade/
|
||||
# Full name
|
||||
fullname: FullNames()
|
||||
|
||||
# Example output:
|
||||
|
||||
# fullname : Norman Wolf
|
||||
# fullname : Lisa Harris
|
||||
# fullname : John Williams
|
||||
# fullname : Freda Gaytan
|
||||
# fullname : Violet Ferguson
|
||||
# fullname : Larry Roberts
|
||||
# fullname : Andrew Daniels
|
||||
# fullname : Jean Keys
|
||||
# fullname : Mark Cole
|
||||
# fullname : Roberta Bounds
|
||||
|
||||
|
||||
# Name with last name first
|
||||
fullname_lastname_first: Template('{}, {}', LastNames(), FirstNames())
|
||||
|
||||
# Example output:
|
||||
|
||||
# fullname_lastname_first : Miracle, Lisa
|
||||
# fullname_lastname_first : Wolf, John
|
||||
# fullname_lastname_first : Harris, Freda
|
||||
# fullname_lastname_first : Williams, Violet
|
||||
# fullname_lastname_first : Gaytan, Larry
|
||||
# fullname_lastname_first : Ferguson, Andrew
|
||||
# fullname_lastname_first : Roberts, Jean
|
||||
# fullname_lastname_first : Daniels, Mark
|
||||
# fullname_lastname_first : Keys, Roberta
|
||||
# fullname_lastname_first : Cole, Timothy
|
||||
|
||||
# Phone
|
||||
phone: compose HashRange(10000000000L,99999999999L); Combinations('0-9;0-9;0-9;-;0-9;0-9;0-9;-;0-9;0-9;0-9;0-9')
|
||||
|
||||
# Example output:
|
||||
|
||||
# $ ebdse run driver=stdout yaml=example-bindings format=readout cycles=10
|
||||
# phone : 241-478-6787
|
||||
# phone : 784-482-7668
|
||||
# phone : 804-068-5502
|
||||
# phone : 044-195-5579
|
||||
# phone : 237-202-5601
|
||||
# phone : 916-390-8911
|
||||
# phone : 550-943-7851
|
||||
# phone : 762-031-1362
|
||||
# phone : 234-050-2563
|
||||
# phone : 312-672-0039
|
||||
|
||||
## Career
|
||||
career: HashedLineToString('data/careers.txt')
|
||||
|
||||
# Example output:
|
||||
|
||||
# career : Paper Goods Machine Setters, Operators, and Tenders
|
||||
# career : Training and Development Specialists
|
||||
# career : Embossing Machine Set-Up Operators
|
||||
# career : Airframe-and-Power-Plant Mechanics
|
||||
# career : Sales Representatives, Agricultural
|
||||
# career : Automotive Body and Related Repairers
|
||||
# career : Community Health Workers
|
||||
# career : Billing, Posting, and Calculating Machine Operators
|
||||
# career : Data Processing Equipment Repairers
|
||||
# career : Sawing Machine Setters and Set-Up Operators
|
||||
|
||||
## Job Description
|
||||
jobdescription: Add(0); HashedLineToString('data/jobdescription.txt')
|
||||
|
||||
# Example output:
|
||||
|
||||
# jobdescription: Add(0); HashedLineToString('data/jobdescription.txt')
|
||||
|
||||
## Weighted enumerated values
|
||||
# Sorting hat (even distribution)
|
||||
house: WeightedStrings('Gryffindor:0.2;Hufflepuff:0.2;Ravenclaw:0.2;Slytherin:0.2')
|
||||
# Example output:
|
||||
|
||||
# house : Hufflepuff
|
||||
# house : Ravenclaw
|
||||
# house : Slytherin
|
||||
# house : Slytherin
|
||||
# house : Gryffindor
|
||||
# house : Hufflepuff
|
||||
# house : Ravenclaw
|
||||
# house : Ravenclaw
|
||||
# house : Hufflepuff
|
||||
# house : Hufflepuff
|
||||
|
||||
## Weighted prefixes
|
||||
prefix: WeightedStrings('Mr:0.45;Mrs:0.25;Ms:0.1;Miss:0.1;Dr:0.05')
|
||||
|
||||
# Example output:
|
||||
|
||||
# prefix : Mr
|
||||
# prefix : Mrs
|
||||
# prefix : Miss
|
||||
# prefix : Miss
|
||||
# prefix : Mr
|
||||
# prefix : Mrs
|
||||
# prefix : Mrs
|
||||
# prefix : Mrs
|
||||
# prefix : Mr
|
||||
# prefix : Mr
|
||||
# prefix : Mr
|
||||
# prefix : Mr
|
||||
# prefix : Mrs
|
||||
# prefix : Mrs
|
||||
# prefix : Mr
|
||||
# prefix : Mr
|
||||
# prefix : Mrs
|
||||
# prefix : Miss
|
||||
# prefix : Ms
|
||||
# prefix : Dr
|
||||
|
||||
## Current Employer
|
||||
current_employer: HashedLineToString('data/companies.txt')
|
||||
|
||||
# Example output:
|
||||
|
||||
# current_employer : Monsanto Company
|
||||
# current_employer : International Flavors & Fragrances
|
||||
# current_employer : Carpenter Technology Corporation
|
||||
# current_employer : Union Pacific Corporation
|
||||
# current_employer : Rush Enterprises
|
||||
# current_employer : Peabody Energy Corporation
|
||||
# current_employer : Rockwell Automation
|
||||
# current_employer : Auto-Owners Insurance Group
|
||||
# current_employer : ArcBest Corporation
|
||||
# current_employer : WGL Holdings
|
||||
|
||||
## Sensor
|
||||
sensor_name: HashedLineToString('data/variable_words.txt')
|
||||
|
||||
# Example output:
|
||||
|
||||
# sensor_name : rotational_latency
|
||||
# sensor_name : half_life
|
||||
# sensor_name : clarity
|
||||
# sensor_name : fairness
|
||||
# sensor_name : diversity
|
||||
# sensor_name : turbulence
|
||||
# sensor_name : mode
|
||||
# sensor_name : current
|
||||
# sensor_name : rating
|
||||
# sensor_name : stall_speed
|
@ -0,0 +1,72 @@
|
||||
# You can run this file with this command line to see the values printed to stdout:
|
||||
# ./ebdse run driver=stdout yaml=bindings/timestamp.yaml cycles=10
|
||||
|
||||
# This file demonstrates different types of timestamp recipes
|
||||
# that you can use with virtdata. (The bindings used in ebdse)
|
||||
|
||||
# If you want to control the output, uncomment and edit the statement template below
|
||||
# and modify the named anchors to suit your output requirements.
|
||||
|
||||
#statements:
|
||||
# example1: "{epochMillis}\n"
|
||||
|
||||
bindings:
|
||||
|
||||
# All uncommented lines under this are indented, so they become named bindings below
|
||||
# the entry above
|
||||
|
||||
# Normally, the value that you get with a cycle starts at 0.
|
||||
|
||||
cycleNum: Identity();
|
||||
|
||||
# So far, we've only been dealing in milliseconds. This is important to get working
|
||||
# before adding the next step, converting to a more specific type.
|
||||
# You can take any millisecond output and add conversion functions as shown below.
|
||||
|
||||
# this one converts to a java.util.Time
|
||||
|
||||
randomDateWithinFeb2018: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); ToDate();
|
||||
|
||||
# ToDate(...) supports a few argument forms that you can experiment with.
|
||||
# ToDate(int) will space the dates apart by this many milliseconds.
|
||||
# ToDate(int,int) will space the dates apart by some millis and also repeat the value for some number of cycles.
|
||||
|
||||
# Alternately, you might want to use a org.joda.time.DateTime instead of a java.util.Time:
|
||||
|
||||
randomJodaDateWithinFeb2018: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); ToJodaDateTime();
|
||||
|
||||
# ToJodaDateTime(...) also supports the space and repeat forms as shown above for ToDate(...)
|
||||
|
||||
# You can also have the dates in order, but with some limited out-of-order pertubation.
|
||||
# In this case, we are swizzling the offset by some pseudo-random amount, up to an hour (in millis)
|
||||
|
||||
randomDateWithinFeb2018Jittery: AddHashRange(0,3600000L); StartingEpochMillis('2018-02-01 05:00:00'); ToDate();
|
||||
|
||||
# If you want to have the result be a string-formatted date representation for testing, try this:
|
||||
# You can use any formatter from here: http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html
|
||||
|
||||
timeuuid_string: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); StringDateWrapper("yyyy-MM-dd HH:mm:ss.SSS");
|
||||
|
||||
|
||||
|
||||
|
||||
# ebdse bundles some specialized mapping functions in addition to those explained above, which
|
||||
# come with eb. These are shown below.
|
||||
|
||||
# You can create a com.datastax.driver.core.LocalDate for use with the java driver.
|
||||
# This takes as its input, the number of days since the unix epoch.
|
||||
|
||||
localdate: LongToLocalDateDays()
|
||||
|
||||
# You can also take the millis from any of the examples above which provide epoch millis,
|
||||
# and convert the output to a millisecond-stable value, analogous to the CQL functions
|
||||
# that do the same.
|
||||
|
||||
minUUID: AddHashRange(0,3600000); StartingEpochMillis('2018-02-01 05:00:00'); ToTimeUUIDMin();
|
||||
|
||||
maxUUID: AddHashRange(0,3600000); StartingEpochMillis('2018-02-01 05:00:00'); ToTimeUUIDMax();
|
||||
|
||||
# If you find useful recipes which are needed by others, please contribute them back to our examples!
|
||||
|
||||
|
||||
|
@ -0,0 +1,62 @@
|
||||
# You can run this file with this command line to see the values printed to stdout:
|
||||
# ./ebdse run driver=stdout yaml=bindings/timeuuid.yaml cycles=10
|
||||
|
||||
# This file demonstrates different types of timestamp recipes
|
||||
# that you can use with virtdata. (The bindings used in ebdse)
|
||||
|
||||
# If you want to control the output, uncomment and edit the statement template below
|
||||
# and modify the named anchors to suit your output requirements.
|
||||
|
||||
#statements:
|
||||
# example1: "{fullname}\n"
|
||||
|
||||
bindings:
|
||||
|
||||
# All uncommented lines under this are indented, so they become named bindings below
|
||||
# the entry above
|
||||
|
||||
# Normally, the value that you get with a cycle starts at 0.
|
||||
|
||||
cycleNum: Identity();
|
||||
# here we convert the cycle number to a TIMEUUID by casting.
|
||||
|
||||
id: Identity(); ToEpochTimeUUID()
|
||||
|
||||
## Client ID
|
||||
client_id: AddHashRange(0L, 2000000000000L); ToEpochTimeUUID()
|
||||
|
||||
# Example output:
|
||||
|
||||
# client_id : 4eb369b0-91de-11bd-8000-000000000000
|
||||
# client_id : 0b9edab0-5401-11e7-8000-000000000000
|
||||
# client_id : 58f21c30-0eec-11f3-8000-000000000000
|
||||
# client_id : 4f547e60-a48a-11ca-8000-000000000000
|
||||
# client_id : 42db8510-cad8-11bb-8000-000000000000
|
||||
# client_id : 78cc7790-529c-11d6-8000-000000000000
|
||||
# client_id : 55382200-9cfd-11d7-8000-000000000000
|
||||
# client_id : 1ebdbef0-b6dc-11b7-8000-000000000000
|
||||
# client_id : 8bc58ba0-57fe-11da-8000-000000000000
|
||||
# client_id : 03d1b690-ba64-11f5-8000-000000000000
|
||||
|
||||
# If you wanted a java.util.UUID instead of a java.util.Date type, you can use something like below.
|
||||
# This form avoids setting the non-time fields in the timeuuid value. This makes testing determinstically
|
||||
# possible, when the basic data type as used in practice, is designed specifically to avoid repeatability.
|
||||
|
||||
timeuuid1: AddHashRange(0,2419200000L); StartingEpochMillis('2018-02-01 05:00:00'); ToEpochTimeUUID();
|
||||
|
||||
# There is a shortcut for this version supported directly by ToEpochTimeUUID(..) as seen here:
|
||||
|
||||
timeuuid2: AddHashRange(0,2419200000L); ToEpochTimeUUID('2018-02-01 05:00:00');
|
||||
|
||||
# You can also access the finest level of resolution of the timeuuid type, where each cycle value represents
|
||||
# the smallest possible change for a timeuuid. Bear in mind that this represents many many sub-millisecond
|
||||
# level timestamp values which may not be easy to see in normal timestamp formats. In this case, millisecond
|
||||
# semantics are not appropriate, so make sure you adjust the input values accordingly.
|
||||
|
||||
timeuuid_finest1: ToFinestTimeUUID();
|
||||
|
||||
# However, since starting at some reference time is a popular option, ToFinestTimeUUID(...) also supports
|
||||
# the shortcut version just like ToEpochTimeUUID(). This is provided because converting between epoch
|
||||
# millis and timeuuid ticks is not fun.
|
||||
|
||||
timeuuid_finest_relative: ToFinestTimeUUID('2018-02-01 05:00:00');
|
@ -0,0 +1,39 @@
|
||||
# You can run this file with this command line to see the values printed to stdout:
|
||||
# ./ebdse run driver=stdout yaml=bindings/uuid.yaml cycles=10
|
||||
|
||||
# This file demonstrates different types of timestamp recipes
|
||||
# that you can use with virtdata. (The bindings used in ebdse)
|
||||
|
||||
# If you want to control the output, uncomment and edit the statement template below
|
||||
# and modify the named anchors to suit your output requirements.
|
||||
|
||||
#statements:
|
||||
# example1: "{fullname}\n"
|
||||
|
||||
bindings:
|
||||
|
||||
# All uncommented lines under this are indented, so they become named bindings below
|
||||
# the entry above
|
||||
|
||||
# Normally, the value that you get with a cycle starts at 0.
|
||||
|
||||
cycleNum: Identity();
|
||||
|
||||
# here we convert the cycle number to a UUID by casting.
|
||||
id: Identity(); ToHashedUUID()
|
||||
|
||||
## Station ID (100 unique UUID values, can override stations on the command-line)
|
||||
station_id: Mod(<<stations:100>>); ToHashedUUID()
|
||||
|
||||
# Example output:
|
||||
|
||||
# station_id : 28df63b7-cc57-43cb-9752-fae69d1653da
|
||||
# station_id : 5752fae6-9d16-43da-b20f-557a1dd5c571
|
||||
# station_id : 720f557a-1dd5-4571-afb2-0dd47d657943
|
||||
# station_id : 6fb20dd4-7d65-4943-9967-459343efafdd
|
||||
# station_id : 19674593-43ef-4fdd-bdf4-98b19568b584
|
||||
# station_id : 3df498b1-9568-4584-96fd-76f6081da01a
|
||||
# station_id : 56fd76f6-081d-401a-85eb-b1d9e5bba058
|
||||
# station_id : 45ebb1d9-e5bb-4058-b75d-d51547d31952
|
||||
# station_id : 375dd515-47d3-4952-a49d-236be9a5c070
|
||||
# station_id : 249d236b-e9a5-4070-9afa-8fae9060d959
|
@ -0,0 +1,54 @@
|
||||
scenarios:
|
||||
default:
|
||||
schema: run driver=cql tags==phase:schema cycles==UNDEF threads==1
|
||||
rampup: run driver=cql tags==phase:rampup cycles=TEMPLATE(rampup-cycles,100K) threads=auto
|
||||
|
||||
bindings:
|
||||
userid: Template('user-{}',ToString()); SaveString('userid');
|
||||
interest: Template('interest-{}',ToString());
|
||||
|
||||
blocks:
|
||||
- name: schema
|
||||
tags:
|
||||
phase: schema
|
||||
statements:
|
||||
- create-keyspace: |
|
||||
create KEYSPACE if not exists TEMPLATE(keyspace,examples)
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}
|
||||
AND durable_writes = 'true';
|
||||
- create-users-table: |
|
||||
create table if not exists TEMPLATE(keyspace,examples).users (
|
||||
userid text PRIMARY KEY
|
||||
);
|
||||
- create-interests-table: |
|
||||
create table if not exists TEMPLATE(keyspace,examples).interests (
|
||||
userid text,
|
||||
interest text,
|
||||
primary key (interest, userid)
|
||||
);
|
||||
- name: rampup
|
||||
tags:
|
||||
phase: rampup
|
||||
statements:
|
||||
- insert-users: |
|
||||
insert into TEMPLATE(keyspace,examples).users (userid) VALUES ({userid});
|
||||
tags:
|
||||
entity: users
|
||||
- insert-interests: |
|
||||
insert into TEMPLATE(keyspace,examples).interests(
|
||||
interest, userid
|
||||
) VALUES (
|
||||
{interest}, {userid}
|
||||
);
|
||||
tags:
|
||||
entity: interests
|
||||
- name: main
|
||||
tags:
|
||||
phase: main
|
||||
statements:
|
||||
- read-user: |
|
||||
select * from TEMPLATE(keyspace,examples).users
|
||||
where userid={userid};
|
||||
- read interests: |
|
||||
select * from TEMPLATE(keyspace,examples).interests
|
||||
where interest={interest};
|
3
adapter-cqld4/src/main/resources/curate_docs/README.md
Normal file
3
adapter-cqld4/src/main/resources/curate_docs/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
These docs are carried over from the prior cql 1.9 and cql 3.* drivers. They do not describe
|
||||
current behavior, but are here as a reference point for closing the implementation gap
|
||||
in the new cqld4 driver before it is moved from prerelease status to mainline releases.
|
97
adapter-cqld4/src/main/resources/curate_docs/advanced-cql.md
Normal file
97
adapter-cqld4/src/main/resources/curate_docs/advanced-cql.md
Normal file
@ -0,0 +1,97 @@
|
||||
# cql driver - advanced features
|
||||
|
||||
This is an addendum to the standard CQL Activity Type docs. For that,
|
||||
see "cql". Use the features in this guide carefully. They do not come
|
||||
with as much documentation as they are less used than the main CQL
|
||||
features.
|
||||
|
||||
### ResultSet and Row operators
|
||||
|
||||
Within the CQL Activity type, synchronous mode (activities with out the
|
||||
async= parameter), you have the ability to attach operators to a given
|
||||
statement such that it will get per-statement handling. These operators
|
||||
are ways of interrogating the result of an operation, saving values, or
|
||||
managing other side-effects for specific types of testing.
|
||||
|
||||
When enabled for a statement, operators are applied in this order:
|
||||
|
||||
1. Activity-level ResultSet operators are applied in specified order.
|
||||
2. Statement-level ResultSet operators are applied in specified order.
|
||||
3. Activity-level Row operators are applied in specified order.
|
||||
4. Statement-level Row operators are applied in specified order.
|
||||
|
||||
The result set handling does not go to any extra steps of making
|
||||
a copy of the data. When a row is read from the result set,
|
||||
it is consumed from it. Thus, if you want to do anything with
|
||||
row data, you must apply a row operator as explained below.
|
||||
|
||||
|
||||
### CQL Statement Parameters
|
||||
|
||||
- **rsoperators** - If provided as a CQL statement param, then the
|
||||
list of operator names that follow, separated by a comma, will
|
||||
be used to attach ResultSet operators to the given statement.
|
||||
Such operators act on the whole result set of a statement.
|
||||
|
||||
- **rowoperators** - If provided as a CQL statement param, then the
|
||||
list of operator names that follow, separated by a comma, will
|
||||
be used to attache Row operators to the given statement.
|
||||
|
||||
## Available ResultSet Operators
|
||||
|
||||
- pushvars - Push a copy of the current thread local variables onto
|
||||
the thread-local stack. This does nothing with the ResultSet data,
|
||||
but is meant to be used for stateful management of these in
|
||||
conjunction with the row operators below.
|
||||
- popvars - Pop the last thread local variable set from the thread-local
|
||||
stack into vars, replacing the previous content. This does nothing
|
||||
with the ResultSet data.
|
||||
- clearvars - Clears the contents of the thread local variables. This
|
||||
does nothign with the ResultSet data.
|
||||
- trace - Flags a statement to be traced on the server-side and then
|
||||
logs the details of the trace to the trace log file.
|
||||
- log - Logs basic data to the main log. This is useful to verify that
|
||||
operators are loading and triggering as expected.
|
||||
- assert_singlerow - Throws an exception (ResultSetVerificationException)
|
||||
if the ResultSet has more or less than one row.
|
||||
|
||||
Examples:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- s1: |
|
||||
a statement
|
||||
rsoperators: pushvars, clearvars
|
||||
```
|
||||
## Available Row Operators:
|
||||
|
||||
- savevars - Copies the values of the row into the thread-local variables.
|
||||
- saverows - Copies the rows into a special CQL-only thread local row state.
|
||||
|
||||
Examples:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- s2: |
|
||||
a statement
|
||||
rowoperators: saverows
|
||||
```
|
||||
|
||||
## Injecting additional Queries (Future)
|
||||
|
||||
It is possible to inject new operations to an activity. However, such operations are _indirect_ to cycles, since they
|
||||
must be based on the results of other operations. As such, they will not be represented in cycle output or other
|
||||
advanced features. This is a specific feature for the CQL activity -- implemented internal to the way a CQL cycle is
|
||||
processed. A future version of NoSQLBench will provide a more uniform way to achieve this result across activity types.
|
||||
For now, remember that this is a CQL-only capability.
|
||||
|
||||
- subquery-statement - Adds additional operations to the current cycle, based
|
||||
on the contents of the thread-local row state. The value to this parameter
|
||||
is a name of a statement in the current YAML.
|
||||
|
||||
local thread based on contents
|
||||
of the CQL-only thread local row state. Each row is consumed from this list,
|
||||
and a new operation is added to the current cycle.
|
||||
- subquery-concurrency - Allow subqueries to execute with concurrency, up to
|
||||
the level specified.
|
||||
default: 1
|
201
adapter-cqld4/src/main/resources/curate_docs/cql-errors.md
Normal file
201
adapter-cqld4/src/main/resources/curate_docs/cql-errors.md
Normal file
@ -0,0 +1,201 @@
|
||||
# cql error handling
|
||||
|
||||
The error handling facility utilizes a type-aware error handler
|
||||
provided by nosqlbench. However, it is much more modular and configurable
|
||||
than most error handlers found in other testing tools. The trade-off here
|
||||
is that so many options may bewilder newer users. If you agree, then
|
||||
simply use one of these basic recipes in your activity parameters:
|
||||
|
||||
# error and stop on *any exception
|
||||
# incidentally, this is the same as the deprecated diagnose=true option
|
||||
errors=stop
|
||||
|
||||
# error and stop for (usually) unrecoverable errors
|
||||
# warn and retry everything else (this is actually the default)
|
||||
|
||||
errors=stop,retryable->retry
|
||||
|
||||
# record histograms for WriteTimeoutException, error and stop
|
||||
# for everything else.
|
||||
|
||||
errors=stop,WriteTimeoutException:histogram
|
||||
|
||||
As you can see, the error handling format is pretty basic. Behind this basic
|
||||
format is modular and flexible configuration scheme that should allow for either
|
||||
simple or advanced testing setups. The errors value is simply a list of error to
|
||||
hander verbs mappings, but also allows for a simple verb to be specified to
|
||||
cover all error types. Going from left to right, each mapping is applied in
|
||||
order. You can use any of ':', '->', or '=' for the error to verb assignment
|
||||
operator.
|
||||
|
||||
Anytime you assign a value to the *errors* parameter for a cql activity, you are
|
||||
replacing the default 'stop,retryable->retry,unverified->stop' configuration.
|
||||
That is, each time this value is assigned, a new error handler is configured and
|
||||
installed according to the new value.
|
||||
|
||||
### errors= parameter format
|
||||
|
||||
The errors parameter contains a comma-separated list of one or more
|
||||
handler assignments where the error can be in any of these forms:
|
||||
|
||||
- group name ( "unapplied" | "retryable" | "unverified" )
|
||||
- a single exception name like 'WriteTimeoutException', or a substring of
|
||||
that which is long enough to avoid ambiguity (only one match allowed)
|
||||
- A regex, like '.*WriteTimeout.*' (multiple matches allowed)
|
||||
|
||||
The verb can be any of the named starting points in the error handler
|
||||
stack, as explained below.
|
||||
|
||||
As a special case, if the handler assignment consists of only a single word,
|
||||
then it is assumed to be the default handler verb. This gets applied
|
||||
as a last resort to any errors which do not match another handler by class
|
||||
type or parent class type. This allows for simple hard wiring of a
|
||||
handler default for all non-specific errors in the form:
|
||||
|
||||
# force the test to stop with any error, even retryable ones
|
||||
errors=stop
|
||||
|
||||
### Error Handler Verbs
|
||||
|
||||
When an error occurs, you can control how it is handled for the most part.
|
||||
This is the error handler stack:
|
||||
|
||||
- **stop** - logs an error, and then rethrows the causing exception,
|
||||
causing nosqlbench to shutdown the current scenario.
|
||||
- **warn** - log a warning in the log, with details about the error and
|
||||
associated statement.
|
||||
- **retry** - Retry the operation if the number of retries hasn't been
|
||||
used up *and* the causing exception falls in the set of
|
||||
*retryable* errors.
|
||||
- **histogram** - keep a histogram of the exception counts, under the name
|
||||
errorhistos.classname, using the simple class name. The magnitude of
|
||||
these histos is how long the operation was pending before the related
|
||||
error occurred.
|
||||
- **count** - keep a count in metrics for the exception, under the name
|
||||
errorcounts.classname, using the simple class name.
|
||||
- **counter** - same as **count**, added for compatibility with the newer
|
||||
universal error handler. This one is the preferred name.
|
||||
- **ignore** - do nothing, do not even retry or count
|
||||
|
||||
Each handling verb above is ordered from the most invasive to least
|
||||
invasive starting at the top. With the exception of the **stop**
|
||||
handler, the rest of them will be applied to an error all the way to the
|
||||
bottom. For now, the error handling stack is exactly as above. You can't
|
||||
modify it, although it may be made configurable in the future.
|
||||
|
||||
One way to choose the right handler is to say "How serious is this type of
|
||||
error to the test results if it happens?" In general, it is best to be
|
||||
more conservative and choose a more aggressive setting unless you are
|
||||
specifically wanting to measure how often a given error happens, for
|
||||
example.
|
||||
|
||||
Each exception type will have one and only one error handler at all times.
|
||||
No matter how you set an error handler for a class, only the most recently
|
||||
assigned handler stack will be active for it. This might be important to
|
||||
keep in mind when you make multiple assignments to potentially overlapping
|
||||
sets of error types. In any case, the default 'stop' handler will always
|
||||
catch an error that does not otherwise have a more specific handler
|
||||
assigned to it.
|
||||
|
||||
##### Error Types
|
||||
|
||||
The errors that can be handled are simply all the exception types that can
|
||||
be thrown by either the DataStax Java Driver for DSE, *or* the nosqlbench
|
||||
client itself. This includes errors that indicate a potentially
|
||||
intermittent failure condition. It also includes errors that are more
|
||||
permanent in nature, like WriteFailure, which would continue to occur on
|
||||
subsequent retries without some form of intervention. The nosqlbench
|
||||
application will also generate some additional exceptions that capture
|
||||
common error cases that the Java driver doesn't or shouldn't have a
|
||||
special case for, but which may be important for nosqlbench testing
|
||||
purposes.
|
||||
|
||||
In nosqlbench, all error handlers are specific to a particular kind of
|
||||
exception that you would catch in a typical application that uses DSE,
|
||||
although you can tell a handler to take care of a whole category of
|
||||
problems as long as you know the right name to use.
|
||||
|
||||
##### Assigned by Java Exception Type
|
||||
|
||||
Error handlers can be assigned to a common parent type in order to also handle
|
||||
all known subtypes, hence the default on the top line applies to all of the
|
||||
driver exceptions that do not have a more specific handler assigned, either
|
||||
by a closer parent or directly.
|
||||
|
||||
##### Assigning by Error Group Name
|
||||
|
||||
Error types for which you would commonly assign the same handling behavior
|
||||
are also grouped in predefined names. If a handler is assigned to one of
|
||||
the group names, then the handler is assigned all of the exceptions in the
|
||||
group individually. For example, 'errors=retryable=stop'
|
||||
|
||||
### Recognized Exceptions
|
||||
|
||||
The whole hierarchy of exceptions as of DSE Driver 3.2.0 is as follows,
|
||||
with the default configuration shown.
|
||||
|
||||
DriverException -> stop
|
||||
FrameTooLongException
|
||||
CodecNotFoundException
|
||||
AuthenticationException
|
||||
TraceRetrievalException
|
||||
UnsupportedProtocolVersionException
|
||||
NoHostAvailableException -> retry (group: retryable)
|
||||
QueryValidationException (abstract)
|
||||
InvalidQueryException
|
||||
InvalidConfigurationInQueryException
|
||||
UnauthorizedException
|
||||
SyntaxError
|
||||
AlreadyExistsException
|
||||
UnpreparedException
|
||||
InvalidTypeException
|
||||
QueryExecutionException (abstract)
|
||||
UnavailableException
|
||||
BootstrappingException -> retry (group: retryable)
|
||||
OverloadedException -> retry (group: retryable)
|
||||
TruncateException
|
||||
QueryConsistencyException (abstract)
|
||||
WriteTimeoutException -> retry (group: retryable)
|
||||
WriteFailureException -> retry (group: retryable)
|
||||
ReadFailureException
|
||||
ReadTimeoutException
|
||||
FunctionExecutionException
|
||||
DriverInternalError
|
||||
ProtocolError
|
||||
ServerError
|
||||
BusyPoolException
|
||||
ConnectionException
|
||||
TransportException
|
||||
OperationTimedOutException -> retry (group: retryable)
|
||||
PagingStateException
|
||||
UnresolvedUserTypeException
|
||||
UnsupportedFeatureException
|
||||
BusyConnectionException
|
||||
EbdseException (abstract) -> stop
|
||||
CQLResultSetException (abstract)
|
||||
UnexpectedPagingException
|
||||
ResultSetVerificationException
|
||||
RowVerificationException
|
||||
ChangeUnappliedCycleException (group:unapplied)
|
||||
RetriesExhaustedCycleException -> count
|
||||
|
||||
##### Additional Exceptions
|
||||
|
||||
The following exceptions are synthesized directly by nosqlbench, but get
|
||||
handled alongside the normal exceptions as explained above.
|
||||
|
||||
1. ChangeUnappliedException - The change unapplied condition is important to
|
||||
detect when it is not expected, although some testing may intentionally send
|
||||
changes that can't be applied. For this reason, it is kept as a separately
|
||||
controllable error group "unapplied".
|
||||
2. UnexpectedPaging - The UnexpectedPaging exception is meant to keep users from
|
||||
being surprised when there is paging activity in the workload, as this can have
|
||||
other implications for tuning and performance. See the details on the
|
||||
**maxpages** parameter, and the *fetch size* parameter in the java
|
||||
driver for details.
|
||||
3. Unverified\* Exceptions - For data set verification; These exceptions
|
||||
indicate when a cqlverify activity has found rows that differ from what
|
||||
was expected.
|
||||
4. RetriesExhaustedException - Indicates that all retries were exhausted before
|
||||
a given operation could complete successfully.
|
||||
|
@ -0,0 +1,42 @@
|
||||
DriverException -> stop
|
||||
1 FrameTooLongException
|
||||
2 CodecNotFoundException
|
||||
3 AuthenticationException
|
||||
4 TraceRetrievalException
|
||||
5 UnsupportedProtocolVersionException
|
||||
6 NoHostAvailableException
|
||||
7 QueryValidationException (abstract)
|
||||
8 InvalidQueryException
|
||||
9 InvalidConfigurationInQueryException
|
||||
10 UnauthorizedException
|
||||
11 SyntaxError
|
||||
12 AlreadyExistsException
|
||||
13 UnpreparedException
|
||||
14 InvalidTypeException
|
||||
15 QueryExecutionException (abstract) -> retry
|
||||
16 UnavailableException
|
||||
17 BootstrappingException
|
||||
18 OverloadedException
|
||||
19 TruncateException
|
||||
20 QueryConsistencyException (abstract)
|
||||
21 WriteTimeoutException
|
||||
22 WriteFailureException
|
||||
23 ReadFailureException
|
||||
24 ReadTimeoutException
|
||||
25 FunctionExecutionException
|
||||
26 DriverInternalError
|
||||
27 ProtocolError
|
||||
28 ServerError
|
||||
29 BusyPoolException
|
||||
30 ConnectionException
|
||||
31 TransportException
|
||||
32 OperationTimedOutException
|
||||
33 PagingStateException
|
||||
34 UnresolvedUserTypeException
|
||||
35 UnsupportedFeatureException
|
||||
36 BusyConnectionException
|
||||
41 EbdseCycleException (abstract) -> stop
|
||||
37 ChangeUnappliedCycleException
|
||||
38 ResultSetVerificationException
|
||||
39 RowVerificationException (abstract)
|
||||
40 UnexpectedPagingException
|
@ -0,0 +1,83 @@
|
||||
# CQL Load Balancing Options
|
||||
|
||||
WIth the CQL driver, you may configure the load balancing with the same options you might use in
|
||||
client code. However, they are expressed here in a command-line friendly form.
|
||||
|
||||
## Combining Policies
|
||||
|
||||
To apply these load balancer policies, set the activity parameter `lbp` with a comma-separated list
|
||||
of policies from the examples below.
|
||||
|
||||
They are build as a nested set of polices, with the semantics of "and then". For example, the
|
||||
TokenAwarePolicy followed by the LatencyAwarePolicy looks like `TAP(...),LAP(...)` which means
|
||||
`TokenAwarePolicy(...)` and then `LatencyAwarePolicy(...)`. This is equivalent to Java code which
|
||||
first constructs a LatencyAwarePolicy and then wraps it with a TokenAwarePolicy. This follows the
|
||||
notion that the outer-most policy has primary control over options presented to child policies, and
|
||||
thus you can think of the routing process as "TokenAwarePolicy decides ... " *and then* with what it
|
||||
shares with the wrapped child policy, "LatencyAwarePolicy decides...", and so on.
|
||||
|
||||
Even though you can use the simple pollicy descriptions above, they are constructed in the same
|
||||
programmatic way in Java that you would use to nest them in the specified order.
|
||||
|
||||
For example, a token aware policy wrapping a white list policy might look like this on your command
|
||||
line:
|
||||
|
||||
lbp=TAP(),WLP(127.0.0.1)
|
||||
|
||||
## Supported Load Balancer Policies
|
||||
|
||||
Each supported policy is described in detail below, with the options supported.
|
||||
|
||||
### RRP: Round Robin Policy
|
||||
|
||||
Format: `RRP()`
|
||||
|
||||
**note** You can't wrap another policy with RRP.
|
||||
|
||||
### WLP: White List Policy
|
||||
|
||||
Format: `WLP(addr,...)`
|
||||
|
||||
### TAP: Token Aware Policy
|
||||
|
||||
Format: `TAP()`
|
||||
|
||||
### LAP: Latency Aware Policy
|
||||
|
||||
This policy has many optional parameters, so if you use it you must set them by name.
|
||||
|
||||
Format: `LAP(options...)`, where each option is one of the following:
|
||||
|
||||
- `exclusion_threshold` (or `et`) - The exclusion threshold, or how much worse a node has to be to
|
||||
be excluded for awhile. Javadoc: The default exclusion threshold (if this method is not called) is
|
||||
`2`. In other words, the resulting policy excludes nodes that are more than twice slower than the
|
||||
fastest node.
|
||||
- `minimum_measurements` (or `mm`) - The minimum number of measurements to take before penalizing a
|
||||
host. Javadoc: The default for this option (if this method is not called) is `50`. Note that it is
|
||||
probably not a good idea to put this option too low if only to avoid the influence of JVM warm-up
|
||||
on newly restarted nodes.
|
||||
- `retry_period` (or `rp`) - The retry period, in seconds. Javadoc: The retry period defines how
|
||||
long a node may be penalized by the policy before it is given a 2nd chance. This is 10 seconds by
|
||||
default.
|
||||
- `retry_period_ms` (or `rp_ms`) - The retry period, in milliseconds. This is the same as above, but
|
||||
allows you to have more precise control if needed.
|
||||
- `scale` (or `s`) - The scale parameter adjusts how abruptly the most recent measurements are
|
||||
scaled down in the moving average over time. 100ms is the default. Higher values reduce the
|
||||
significance of more recent measurements, lower values increase it. The default is 100ms.
|
||||
- `scale_ms` - The scale parameter, in milliseconds. This is the same as above, but allows you to
|
||||
have more prcise control if needed.
|
||||
- `update_rate` (or `ur`) - How often a node's latency average is computed. The default is 1/10
|
||||
second.
|
||||
- `update_rate_ms` (or `ur_ms`) - The update rate, in milliseconds.
|
||||
|
||||
Examples:
|
||||
- `lbp="LAP(mm=10,rp_ms=10000)"`
|
||||
- `lbp="LatencyAwarePolicy(minimum_measurements=10,retry_period_ms=10000)"`
|
||||
|
||||
### DCARRP: DC-Aware Round Robin Policy
|
||||
|
||||
Format: `DCARRP(localdc=somedcname)`
|
||||
|
||||
This load balancing policy does not expose other non-deprecated options in the bundled version of
|
||||
the driver, and the datacenter name is required.
|
||||
|
419
adapter-cqld4/src/main/resources/curate_docs/cql.md
Normal file
419
adapter-cqld4/src/main/resources/curate_docs/cql.md
Normal file
@ -0,0 +1,419 @@
|
||||
# cql driver
|
||||
|
||||
This is a driver which allows for the execution of CQL statements. This driver supports both sync and async modes, with
|
||||
detailed metrics provided for both.
|
||||
|
||||
### Example activity definitions
|
||||
|
||||
Run a cql activity named 'cql1', with definitions from activities/cqldefs.yaml
|
||||
|
||||
... driver=cql alias=cql1 workload=cqldefs
|
||||
|
||||
Run a cql activity defined by cqldefs.yaml, but with shortcut naming
|
||||
|
||||
... driver=cql workload=cqldefs
|
||||
|
||||
Only run statement groups which match a tag regex
|
||||
|
||||
... driver=cql workload=cqldefs tags=group:'ddl.*'
|
||||
|
||||
Run the matching 'dml' statements, with 100 cycles, from [1000..1100)
|
||||
|
||||
... driver=cql workload=cqldefs tags=group:'dml.*' cycles=1000..1100
|
||||
|
||||
This last example shows that the cycle range is [inclusive..exclusive),
|
||||
to allow for stacking test intervals. This is standard across all
|
||||
activity types.
|
||||
|
||||
### CQL ActivityType Parameters
|
||||
|
||||
- **cqldriver** - default: dse - The type of driver to use, either dse, or oss. If you need DSE-specific features, use
|
||||
the dse driver. If you are connecting to an OSS Apache Cassandra cluster, you must use the oss driver. The oss driver
|
||||
option is only available in nosqlbench.
|
||||
|
||||
- **host** - The host or hosts to use for connection points to
|
||||
the cluster. If you specify multiple values here, use commas
|
||||
with no spaces.
|
||||
Examples:
|
||||
- `host=192.168.1.25`
|
||||
- `host=192.168.1.25,testhost42`
|
||||
|
||||
- **workload** - The workload definition which holds the schema and statement defs.
|
||||
see workload yaml location for additional details
|
||||
(no default, required)
|
||||
|
||||
- **port** - The port to connect with
|
||||
|
||||
- **cl** - An override to consistency levels for the activity. If
|
||||
this option is used, then all consistency levels will be replaced
|
||||
by this one for the current activity, and a log line explaining
|
||||
the difference with respect to the yaml will be emitted.
|
||||
This is not a dynamic parameter. It will only be applied at
|
||||
activity start.
|
||||
|
||||
- **cbopts** - default: none - this is how you customize the cluster
|
||||
settings for the client, including policies, compression, etc. This
|
||||
is a string of *Java*-like method calls just as you would use them
|
||||
in the Cluster.Builder fluent API. They are evaluated inline
|
||||
with the default Cluster.Builder options not covered below.
|
||||
Example: cbopts=".withCompression(ProtocolOptions.Compression.NONE)"
|
||||
|
||||
- **whitelist** default: none - Applies a whitelist policy to the load balancing
|
||||
policy in the driver. If used, a WhitelistPolicy(RoundRobinPolicy())
|
||||
will be created and added to the cluster builder on startup.
|
||||
Examples:
|
||||
- `whitelist=127.0.0.1`
|
||||
- `whitelist=127.0.0.1:9042,127.0.0.2:1234`
|
||||
|
||||
- **retrypolicy** default: none - Applies a retry policy in the driver
|
||||
The only option supported for this version is `retrypolicy=logging`,
|
||||
which uses the default retry policy, but with logging added.
|
||||
|
||||
- **reconnectpolicy** default: none - Applies a reconnection policy in the
|
||||
driver Supports
|
||||
either `reconnectpolicy=exponential(minDelayInMs,maxDelayInMs)`
|
||||
or `reconnectpolicy=constant(delayInMs)`. The driver reconnects using
|
||||
this policy when the entire cluster becomes unavailable.
|
||||
|
||||
- **protocol_version** default: unset, defaults to driver default behavior
|
||||
- Set the CQL protocol version. Valid values are V1, V2, V3, V4, V5,
|
||||
DSE_V1, DSE_V2. Protocol is usually auto-negotiated, however, the
|
||||
initial connection may use a lower protocol to ensure connectivity to
|
||||
older server versions. If you know you are running on a newer server
|
||||
version, you can set this to match.
|
||||
|
||||
- **pooling** default: none - Applies the connection pooling options to
|
||||
the policy. Examples:
|
||||
- `pooling=4:10`
|
||||
keep between 4 and 10 connections to LOCAL hosts
|
||||
- `pooling=4:10,2:5`
|
||||
keep 4-10 connections to LOCAL hosts and 2-5 to REMOTE
|
||||
- `pooling=4:10:2000`
|
||||
keep between 4-10 connections to LOCAL hosts with
|
||||
up to 2000 requests per connection
|
||||
- `pooling=5:10:2000,2:4:1000` keep between 5-10 connections to
|
||||
LOCAL hosts with up to 2000 requests per connection, and 2-4
|
||||
connection to REMOTE hosts with up to 1000 requests per connection
|
||||
|
||||
Additionally, you may provide the following options on pooling. Any
|
||||
of these that are provided must appear in this order:
|
||||
`,heartbeat_interval_s:n,idle_timeout_s:n,pool_timeout_ms:n`, so a
|
||||
full example with all options set would appear as:
|
||||
`pooling=5:10:2000,2:4:1000,heartbeat_interval_s:30,idle_timeout_s:120,pool_timeout_ms:5`
|
||||
|
||||
- **socketoptions** default: none - Applies any of the valid socket
|
||||
options to the client when the session is built. Each of the options
|
||||
uses the long form of the name, with either a numeric or boolean
|
||||
value. Individual sub-parameters should be separated by a comma, and
|
||||
the parameter names and values can be separated by either equals or a
|
||||
colon. All of these values may be changed:
|
||||
- read_timeout_ms
|
||||
- connect_timeout_ms
|
||||
- keep_alive
|
||||
- reuse_address
|
||||
- so_linger
|
||||
- tcp_no_delay
|
||||
- receive_buffer_size
|
||||
- send_buffer_size
|
||||
|
||||
Examples:
|
||||
- `socketoptions=read_timeout_ms=23423,connect_timeout_ms=4444`
|
||||
- `socketoptions=tcp_no_delay=true`
|
||||
|
||||
- **tokens** default: unset - Only executes statements that fall within
|
||||
any of the specified token ranges. Others are counted in metrics
|
||||
as skipped-tokens, with a histogram value of the cycle number.
|
||||
Examples:
|
||||
- tokens=1:10000,100000:1000000
|
||||
- tokens=1:123456
|
||||
|
||||
- **maxtries** - default: 10 - how many times an operation may be
|
||||
attempted before it is disregarded
|
||||
|
||||
- **maxpages** - default: 1 - how many pages can be read from a query which
|
||||
is larger than the fetchsize. If more than this number of pages
|
||||
is required for such a query, then an UnexpectedPaging excpetion
|
||||
is passed to the error handler as explained below.
|
||||
|
||||
- **fetchsize** - controls the driver parameter of the same name.
|
||||
Suffixed units can be used here, such as "50K". If this parameter
|
||||
is not present, then the driver option is not set.
|
||||
|
||||
- **cycles** - standard, however the cql activity type will default
|
||||
this to however many statements are included in the current
|
||||
activity, after tag filtering, etc.
|
||||
|
||||
- **username** - the user to authenticate as. This option requires
|
||||
that one of **password** or **passfile** also be defined.
|
||||
|
||||
- **password** - the password to authenticate with. This will be
|
||||
ignored if passfile is also present.
|
||||
|
||||
- **passfile** - the file to read the password from. The first
|
||||
line of this file is used as the password.
|
||||
|
||||
- **ssl** - specifies the type of the SSL implementation.
|
||||
Disabled by default, possible values are `jdk` and `openssl`.
|
||||
|
||||
[Additional parameters may need to be provided](ssl.md).
|
||||
|
||||
- **jmxreporting** - enable JMX reporting if needed.
|
||||
Examples:
|
||||
- `jmxreporting=true`
|
||||
- `jmxreporting=false` (the default)
|
||||
|
||||
- **alias** - this is a standard nosqlbench parameter, however the cql type will use the workload value also as the
|
||||
alias value when not specified.
|
||||
|
||||
- **errors** - error handler configuration.
|
||||
(default errors=stop,retryable->retry,unverified->stop)
|
||||
Examples:
|
||||
- errors=stop,WriteTimeoutException=histogram
|
||||
- errors=count
|
||||
- errors=warn,retryable=count
|
||||
See the separate help on 'cqlerrors' for detailed
|
||||
configuration options.
|
||||
|
||||
- **defaultidempotence** - sets default idempotence on the
|
||||
driver options, but only if it has a value.
|
||||
(default unset, valid values: true or false)
|
||||
|
||||
- **speculative** - sets the speculative retry policy on the cluster.
|
||||
(default unset)
|
||||
This can be in one of the following forms:
|
||||
- pT:E:L - where :L is optional and
|
||||
T is a floating point threshold between 0.0 and 100.0 and
|
||||
E is an allowed number of concurrent speculative executions and
|
||||
L is the maximum latency tracked in the tracker instance
|
||||
(L defaults to 15000 when left out)
|
||||
Examples:
|
||||
- p99.8:5:15000ms - 99.8 percentile, 5 executions, 15000ms max tracked
|
||||
- p98:2:10000ms - 98.0 percentile, 2 executions allowed, 10s max tracked
|
||||
- Tms:E - where :E is optional and
|
||||
T is a constant threshold latency and
|
||||
E is the allowed number of concurrent speculative retries
|
||||
(E default to 5 when left out)
|
||||
Examples:
|
||||
- 100ms:5 - constant threshold of 100ms and 5 allowed executions
|
||||
|
||||
- **seq** - selects the statement sequencer used with statement ratios.
|
||||
(default: bucket)
|
||||
(options: concat | bucket | interval)
|
||||
The concat sequencer repeats each statement in order until the ratio
|
||||
is achieved.
|
||||
The bucket sequencer uses simple round-robin distribution to plan
|
||||
statement ratios, a simple but unbalanced form of interleaving.
|
||||
The interval sequencer apportions statements over time and then by
|
||||
order of appearance for ties. This has the effect of interleaving
|
||||
statements from an activity more evenly, but is less obvious in how
|
||||
it works.
|
||||
All of the sequencers create deterministic schedules which use an internal
|
||||
lookup table for indexing into a list of possible statements.
|
||||
|
||||
- **trace** - enables a trace on a subset of operations. This is disabled
|
||||
by default.
|
||||
Examples:
|
||||
`trace=modulo:100,filename:trace.log`
|
||||
The above traces every 100th cycle to a file named trace.log.
|
||||
`trace=modulo:1000,filename:stdout`
|
||||
The above traces every 1000th cycle to stdout.
|
||||
If the trace log is not specified, then 'tracelog' is assumed.
|
||||
If the filename is specified as stdout, then traces are dumped to stdout.
|
||||
|
||||
- **sessionid** - names the configuration to be used for this activity. Within a given scenario, any activities that use
|
||||
the same name for clusterid will share a session and cluster. default: 'default'
|
||||
|
||||
- **drivermetrics** - enable reporting of driver metrics.
|
||||
default: false
|
||||
|
||||
- **driverprefix** - set the metrics name that will prefix all CQL driver metrics.
|
||||
default: 'driver.*clusterid*.'
|
||||
The clusterid specified is included so that separate cluster and session
|
||||
contexts can be reported independently for advanced tests.
|
||||
|
||||
- **usercodecs** - enable the loading of user codec libraries for more
|
||||
details see: com.datastax.codecs.framework.UDTCodecInjector in the
|
||||
nosqlbench code base. This is for dynamic codec loading with
|
||||
user-provided codecs mapped via the internal UDT APIs. default: false
|
||||
|
||||
- **secureconnectbundle** - used to connect to CaaS, accepts a path to the
|
||||
secure connect bundle that is downloaded from the CaaS UI. Examples:
|
||||
- `secureconnectbundle=/tmp/secure-connect-my_db.zip`
|
||||
- `secureconnectbundle="/home/automaton/secure-connect-my_db.zip"`
|
||||
|
||||
Check
|
||||
out [Astra Documentation](https://docs.astra.datastax.com/docs/test-loading-data-with-nosqlbench)
|
||||
for samples
|
||||
|
||||
- **insights** - Set to false to disable the driver from sending insights
|
||||
monitoring information
|
||||
- `insights=false`
|
||||
|
||||
- **tickduration** - sets the tickDuration (milliseconds) of
|
||||
HashedWheelTimer of the java driver. This timer is used to schedule
|
||||
speculative requests. Examples:
|
||||
- `tickduration=10`
|
||||
- `tickduration=100` (driver default value)
|
||||
|
||||
- **compression** - sets the transport compression to use for this
|
||||
activity. Valid values are 'LZ4' and 'SNAPPY'. Both types are bundled
|
||||
with EBDSE.
|
||||
|
||||
- **showcql** - logs cql statements as INFO (to see INFO messages in stdout use -v or greater) Note: this is expensive
|
||||
and should only be done to troubleshoot workloads. Do not use `showcql` for your tests.
|
||||
|
||||
- **lbp** - configures the load balancing policies for the Java driver. With this parameter, you can
|
||||
configure nested load balancing policies in short-hand form.
|
||||
|
||||
The policies available are documented in detail under the help topic `cql-loadbalancing`. See that
|
||||
guide if you need more than the examples below.
|
||||
|
||||
Examples:
|
||||
- `lbp=LAP(retry_period=3,scale=10)` - Latency aware policy with retry period of 3 seconds.
|
||||
(Seconds is the default time unit, unless _ms parameter is used) and scale 10.
|
||||
- `lbp=LAP(rp=3,s=10)` - Same as above, using the equivalent but terser form.
|
||||
- `lbp=LAP(rp_ms=3000,s_ms=10000)` - Same as above, with milliseconds instead of
|
||||
seconds.
|
||||
- `loadbalancing=LAP(s=10),TAP()` - Latency aware policy, followed by
|
||||
token aware policy.
|
||||
|
||||
### CQL YAML Parameters
|
||||
|
||||
A uniform YAML configuration format was introduced with engineblock 2.0.
|
||||
As part of this format, statement parameters were added for the CQL Activity Type.
|
||||
These parameters will be consolidated with the above parameters in time, but for
|
||||
now **they are limited to a YAML params block**:
|
||||
|
||||
params:
|
||||
|
||||
ratio: 1
|
||||
# Sets the statement ratio within the operation sequencer
|
||||
# scheme. Integers only.
|
||||
# When preparing the operation order (AKA sequencing),
|
||||
# frequency of the associated statements.
|
||||
|
||||
cl: ONE
|
||||
# Sets the consistency level, using any of the standard
|
||||
# identifiers from com.datastax.driver.core.ConsistencyLevel,
|
||||
# any one of:
|
||||
# LOCAL_QUORUM, ANY, ONE, TWO, THREE, QUORUM, ALL,
|
||||
# EACH_QUORUM, SERIAL, LOCAL_SERIAL, LOCAL_ONE
|
||||
|
||||
prepared: true
|
||||
# By default, all statements are prepared. If you are
|
||||
# creating schema, set this to false.
|
||||
|
||||
idempotent: false
|
||||
# For statements that are known to be idempotent, set this
|
||||
# to true
|
||||
|
||||
instrument: false
|
||||
# If a statement has instrument set to true, then
|
||||
# individual Timer metrics will be tracked for
|
||||
# that statement for both successes and errors,
|
||||
# using the given statement name.
|
||||
|
||||
verify: *
|
||||
compare: all
|
||||
# Adds two operators to the operation:
|
||||
# 1) verify that there is a single row result set in the response.
|
||||
# 2) verify some or all of the field values by name and/or value.
|
||||
# If this option is used on any statement, then the activity will
|
||||
# provide verification metrics and exceptions, including details
|
||||
# of verification in the log once the activity is completed.
|
||||
# For full details on this field, see the docs on cqlverify.
|
||||
|
||||
/// Cross-verify all fields and field values between the reference data and
|
||||
/// the actual data.
|
||||
all(0x1|0x1<<1|0x1<<2);
|
||||
|
||||
|
||||
logresultcsv: true
|
||||
OR
|
||||
logresultcsv: myfilename.csv
|
||||
# If a statement has logresultcsv set to true,
|
||||
# then individual operations will be logged to a CSV file.
|
||||
# In this case the CSV file will be named as
|
||||
# <statement-name>--results.csv.
|
||||
# If the value is present and not "true", then the value will
|
||||
# be used as the name of the file.
|
||||
#
|
||||
# The format of the file is:
|
||||
# <cycle>,(SUCCESS|FAILURE),<nanos>,<rows-fetched>,(<error-class,NONE)
|
||||
# NOTES:
|
||||
# 1) BE CAREFUL with this setting. A single logged line per
|
||||
# result is not useful for high-speed testing as it will
|
||||
# impose IO loads on the client to slow it down.
|
||||
# 2) BE CAREFUL with the name. It is best to just pick good
|
||||
# names for your statement defs so that everything remains
|
||||
# coherent and nothing gets accidentally overwritten.
|
||||
# 3) If logresultcsv is provided at the activity level, it
|
||||
# applies to all statements, and the only value values
|
||||
# there are true and false.
|
||||
|
||||
start-timers: timername1, timername2, ...
|
||||
#
|
||||
# If a statement has start-timers value set, then the named
|
||||
# timers are started in the local thread before the
|
||||
# statement is executed
|
||||
#
|
||||
# Together, with the stop-timers modifier, you can measure
|
||||
# sequences of statements with specific named boundaries.
|
||||
#
|
||||
# The name of the timer is qualified with the activity alias
|
||||
# just as all other metric names.
|
||||
#
|
||||
# This is generally only useful when the async= parameter is
|
||||
# NOT used, since the scope of the timer is thread-local. When
|
||||
# async is used, many operations may overlap each other in the
|
||||
# same thread, breaking linearization guarantees which make
|
||||
# thread local scoping helpful for tracking linearized operations.
|
||||
#
|
||||
# When a timer is started, a timer context is created and stored
|
||||
# under this name in the thread. You must ensure that an
|
||||
# associated stop-timers setting is applied to another statement
|
||||
# in order to trigger the tally of these metrics.
|
||||
|
||||
stop-timers: timername1, timername2, ...
|
||||
#
|
||||
# If a statement has a stop-timers value set, then after the
|
||||
# statement is finished, whether by error or by successful
|
||||
# completion, the named timers are stopped and the resulting
|
||||
# measurement is added to metrics.
|
||||
#
|
||||
# If you add stop-timers with names that do not have a matching
|
||||
# start-timers name, or vice-versa then an error is thrown.
|
||||
|
||||
### Metrics
|
||||
|
||||
- alias.result - A timer which tracks the performance of an op result
|
||||
only. This is the async get on the future, broken out as a separate
|
||||
step.
|
||||
|
||||
- alias.result-success - A timer that records rate and histograms of the
|
||||
time it takes from submitting a query to completely reading the result
|
||||
set that it returns, across all pages. This metric is only counted for
|
||||
non-exceptional results, while the result metric above includes
|
||||
all operations.
|
||||
|
||||
- alias.bind - A timer which tracks the performance of the statement
|
||||
binding logic, including the generation of data immediately prior
|
||||
|
||||
- alias.execute - A timer which tracks the performance of op submission
|
||||
only. This is the async execution call, broken out as a separate step.
|
||||
|
||||
- alias.tries - A histogram of how many tries were required to get a
|
||||
completed operation
|
||||
|
||||
- alias.pages - A timer which tracks the performance of paging, specific
|
||||
to more than 1-page query results. i.e., if all reads return within 1
|
||||
page, this metric will not have any data.
|
||||
|
||||
- alias.strides - A timer around each stride of operations within a thread
|
||||
|
||||
- alias.skipped-tokens - A histogram that records the count and cycle values
|
||||
of skipped tokens.
|
||||
|
||||
## YAML Examples
|
||||
|
||||
Please see the bundled activities with nosqlbench for examples.
|
242
adapter-cqld4/src/main/resources/curate_docs/dsegraph.md
Normal file
242
adapter-cqld4/src/main/resources/curate_docs/dsegraph.md
Normal file
@ -0,0 +1,242 @@
|
||||
# dsegraph activity type
|
||||
|
||||
# warning; These docs are a work in progress
|
||||
This is an activity type which allows for the execution of workloads
|
||||
using DSE Graph and the DSE Java Driver.
|
||||
|
||||
This activity type is wired synchronously within each client
|
||||
thread, however the async API is used in order to expose fine-grain
|
||||
metrics about op binding, op submission, and waiting for a result.
|
||||
|
||||
## Example activity definitions
|
||||
|
||||
Run a dsegraph activity named 'a1', with definitions from activities/graphs.yaml
|
||||
~~~
|
||||
... type=dsegraph alias=a1 yaml=graphs
|
||||
~~~
|
||||
|
||||
Run a dsegraph activity defined by graphs.yaml, but with shortcut naming
|
||||
~~~
|
||||
... type=dsegraph yaml=graphs
|
||||
~~~
|
||||
|
||||
Only run statement groups which match a tag regex
|
||||
~~~
|
||||
... type=dsegraph yaml=graphs tags=group:'ddl.*'
|
||||
~~~
|
||||
|
||||
Run the matching 'dml' statements, with 100 cycles, from [1000..1100)
|
||||
~~~
|
||||
... type=dsegraph yaml=graphs tags=group:'dml.*' cycles=1000..11000
|
||||
~~~
|
||||
This last example shows that the cycle range is [inclusive..exclusive),
|
||||
to allow for stacking test intervals. This is standard across all
|
||||
activity types.
|
||||
|
||||
## dsegraph ActivityType Parameters
|
||||
|
||||
- **yaml** - The file which holds the schema and statement defs.
|
||||
(no default, required)
|
||||
~~~
|
||||
DOCS TBD FOR THIS SECTION
|
||||
- **cl** - An override to consistency levels for the activity. If
|
||||
this option is used, then all consistency levels will be replaced
|
||||
by this one for the current activity, and a log line explaining
|
||||
the difference with respect to the yaml will be emitted.
|
||||
This is not a dynamic parameter. It will only be applied at
|
||||
activity start.
|
||||
~~~~
|
||||
- **cbopts** - this is how you customize the cluster settings for
|
||||
the client, including policies, compression, etc. This is
|
||||
a string of *Java*-like method calls just as you would use them
|
||||
in the Cluster.Builder fluent API. They are evaluated inline
|
||||
with the default Cluster.Builder options not covered below.
|
||||
Example: cbopts=".withCompression(ProtocolOptions.Compression.NONE)"
|
||||
- **maxtries** - how many times an operation may be attempted
|
||||
~~~
|
||||
DOCS TBD FOR THIS SECTION
|
||||
- **diagnose** - if this is set to true, then any exception for an
|
||||
operation are thrown instead of handled internally. This can
|
||||
be useful for diagnosing exceptions during scenario development.
|
||||
In this version of ebdse, this is a shortcut for setting all the
|
||||
exception handlers to **stop**.
|
||||
~~~
|
||||
- **cycles** - standard, however the cql activity type will default
|
||||
this to however many statements are included in the current
|
||||
activity, after tag filtering, etc.
|
||||
- **username** - the user to authenticate as. This option requires
|
||||
that one of **password** or **passfile** also be defined.
|
||||
- **password** - the password to authenticate with. This will be
|
||||
ignored if passfile is also present.
|
||||
- **passfile** - the file to read the password from. The first
|
||||
line of this file is used as the password.
|
||||
- **alias** - this is a standard engineblock parameter, however
|
||||
the cql type will use the yaml value also as the alias value
|
||||
when not specified.
|
||||
- **graphson** - the version of the graphson protocol to use:
|
||||
default: 2
|
||||
|
||||
## Statement Parameters
|
||||
|
||||
- **repeat** - if specified, causes the statement blocks to be
|
||||
lexically repeated before being evaluated as statements,
|
||||
including enumerated bindings.
|
||||
|
||||
## Error Handling
|
||||
|
||||
#### Error Handlers
|
||||
|
||||
When an error occurs, you can control how it is handled.
|
||||
|
||||
This is the error handler stack:
|
||||
|
||||
- **stop** - causes the exception to be thrown to the runtime, forcing a shutdown.
|
||||
- **warn** - log a warning in the log, with details about the error and associated statement.
|
||||
- **count** - keep a count in metrics for the exception, under the name
|
||||
exceptions.classname, using the simple class name, of course.
|
||||
- **retry** - Retry the operation if the number of retries hasn't been
|
||||
used up.
|
||||
- **ignore** - do nothing, do not even retry or count
|
||||
|
||||
They are ordered from the most extreme to the most oblivious starting
|
||||
at the top. With the exception of the **stop** handler, the rest of
|
||||
them will be applied to an error all the way to the bottom. One way
|
||||
to choose the right handler is to say "How serious is this to the test
|
||||
run or the results of the test if it happens?" In general, it is best
|
||||
to be more conservative and choose a more aggressive setting unless you
|
||||
are specifically wanting to measure how often a given error happens,
|
||||
for example.
|
||||
|
||||
#### Error Types
|
||||
|
||||
The errors that can be detected are sorted into three categories:
|
||||
~~~
|
||||
DOCS TBD FOR THIS SECTION
|
||||
|
||||
- **unapplied** - This was a LWT that did not get applied. All operations
|
||||
are checked, and a ChangeUnapplied exception is thrown.
|
||||
(This is a local exception to make error handling consistent)
|
||||
This is a separate category from retryable, because you have to
|
||||
have reactive logic to properly submit a valid request when it occurs.
|
||||
~~~
|
||||
- **retryable** - NoHostAvailable, Overloaded, WriteTimeout, and
|
||||
ReadTimeout exceptions. These are all exceptions that might
|
||||
succeed if tried again with the same payload.
|
||||
- **realerrors** - ReadFailure, WriteFailure, SyntaxError, InvalidQuery.
|
||||
These represent errors that are likely a persistent issue, and
|
||||
will not likely succeed if tried again.
|
||||
|
||||
To set the error handling behavior, simply pair these categories up with
|
||||
an entry point in the error handler stack. Here is an example, showing
|
||||
also the defaults that are used if you do not specify otherwise:
|
||||
|
||||
retryable=retry realerror=stop
|
||||
|
||||
## Generic Parameters
|
||||
|
||||
*provided by the runtime*
|
||||
- **targetrate** - The target rate in ops/s
|
||||
- **linkinput** - if the name of another activity is specified, this activity
|
||||
will only go as fast as that one.
|
||||
- **tags** - optional filter for matching tags in yaml sections (detailed help
|
||||
link needed)
|
||||
- **threads** - the number of client threads driving this activity
|
||||
|
||||
## Metrics
|
||||
- \<alias\>.cycles - (provided by core input) A timer around the whole cycle
|
||||
- \<alias\>.bind - A timer which tracks the performance of the statement
|
||||
binding logic, including the generation of data immediately prior
|
||||
- \<alias\>.execute - A timer which tracks the performance of op submission
|
||||
only. This is the async execution call, broken out as a separate step.
|
||||
- \<alias\>.result - A timer which tracks the performance of an op result only.
|
||||
This is the async get on the future, broken out as a separate step.
|
||||
- \<alias\>.tries - A histogram of how many tries were required to get a
|
||||
completed operation
|
||||
|
||||
## YAML Format
|
||||
|
||||
The YAML file for a DSE Graph activity has one or more logical yaml documents,
|
||||
each separted by tree dashes: --- the standard yaml document separator. Each
|
||||
yaml document may contain a tags section for the purpose of including or
|
||||
excluding statements for a given activity:
|
||||
|
||||
~~~ (optional)
|
||||
tags:
|
||||
tagname: value
|
||||
...
|
||||
~~~
|
||||
If no tags are provided in a document section, then it will be matched by
|
||||
all possible tag filters. Conversely, if no tag filter is applied in
|
||||
the activity definition, all tagged documents will match.
|
||||
|
||||
Statements can be specified at the top level or within named blocks. When
|
||||
you have simple needs to just put a few statements into the yaml, the top-level
|
||||
style will suffice:
|
||||
|
||||
~~~
|
||||
name: statement-top-level-example
|
||||
statements:
|
||||
- statement 1
|
||||
- statement 2
|
||||
~~~
|
||||
|
||||
If you need to represent multiple blocks of statements in the same activity,
|
||||
you might want to group them into blocks:
|
||||
~~~
|
||||
blocks:
|
||||
- name: statement-block-1
|
||||
statements:
|
||||
- statement 1
|
||||
- statement 2
|
||||
~~~
|
||||
|
||||
At any level that you can specify statements, you can also specify data bindings:
|
||||
|
||||
~~~
|
||||
statements:
|
||||
- statement 1
|
||||
- statement 2
|
||||
bindings:
|
||||
bindto1: foo
|
||||
bindto2: bar
|
||||
|
||||
blocks:
|
||||
- name: statement-block-1
|
||||
statements:
|
||||
- statement 1
|
||||
bindings:
|
||||
bindto1: foo
|
||||
~~~
|
||||
|
||||
Data bindings specify how values are generated to plug into each operation. More
|
||||
details on data bindings are available in the activity usage guide.
|
||||
|
||||
### Parameter Templating
|
||||
|
||||
Double angle brackets may be used to drop parameters into the YAML
|
||||
arbitrarily. When the YAML file is loaded, and only then, these parameters
|
||||
are interpolated from activity parameters like those above. This allows you
|
||||
to create activity templates that can be customized simply by providing
|
||||
additional parameters to the activity. There are two forms,
|
||||
\<\<some_var_name:default_value\>\> and \<\<some_var_name\>\>. The first
|
||||
form contains a default value. In any case, if one of these parameters is
|
||||
encountered and a qualifying value is not found, an error will be thrown.
|
||||
|
||||
### YAML Location
|
||||
|
||||
The YAML file referenced in the yaml= parameter will be searched for in the following places, in this order:
|
||||
1. A URL, if it starts with 'http:' or 'https:'
|
||||
2. The local filesystem, if it exists there
|
||||
3. The internal classpath and assets in the ebdse jar.
|
||||
|
||||
The '.yaml' suffix is not required in the yaml= parameter, however it is
|
||||
required on the actual file. As well, the logical search path "activities/"
|
||||
will be used if necessary to locate the file, both on the filesystem and in
|
||||
the classpath.
|
||||
|
||||
This is a basic example below that can be copied as a starting template.
|
||||
|
||||
## YAML Example
|
||||
---
|
||||
CONTENT TBD
|
||||
|
56
adapter-cqld4/src/main/resources/curate_docs/ssl.md
Normal file
56
adapter-cqld4/src/main/resources/curate_docs/ssl.md
Normal file
@ -0,0 +1,56 @@
|
||||
# SSL
|
||||
|
||||
Supported options:
|
||||
|
||||
- **ssl** - specifies the type of the SSL implementation.
|
||||
Disabled by default, possible values are `jdk`, and `openssl`.
|
||||
|
||||
- **tlsversion** - specify the TLS version to use for SSL.
|
||||
|
||||
Examples:
|
||||
- `tlsversion=TLSv1.2` (the default)
|
||||
|
||||
For `jdk` type, the following options are available:
|
||||
|
||||
- **truststore** - specify the path to the SSL truststore.
|
||||
|
||||
Examples:
|
||||
- `truststore=file.truststore`
|
||||
|
||||
- **tspass** - specify the password for the SSL truststore.
|
||||
|
||||
Examples:
|
||||
- `tspass=truststore_pass`
|
||||
|
||||
- **keystore** - specify the path to the SSL keystore.
|
||||
|
||||
Examples:
|
||||
- `keystore=file.keystore`
|
||||
|
||||
- **kspass** - specify the password for the SSL keystore.
|
||||
|
||||
Examples:
|
||||
- `kspass=keystore_pass`
|
||||
|
||||
- **keyPassword** - specify the password for the key.
|
||||
|
||||
Examples:
|
||||
- `keyPassword=password`
|
||||
|
||||
|
||||
For `openssl` type, the following options are available:
|
||||
|
||||
- **caCertFilePath** - path to the X509 CA certificate file.
|
||||
|
||||
Examples:
|
||||
- `caCertFilePath=cacert.crt`
|
||||
|
||||
- **certFilePath** - path to the X509 certificate file.
|
||||
|
||||
Examples:
|
||||
- `certFilePath=ca.pem`
|
||||
|
||||
- **keyFilePath** - path to the OpenSSL key file.
|
||||
|
||||
Examples:
|
||||
- `keyFilePath=file.key`
|
4
adapter-cqld4/src/main/resources/curate_docs/topics.md
Normal file
4
adapter-cqld4/src/main/resources/curate_docs/topics.md
Normal file
@ -0,0 +1,4 @@
|
||||
# cql help topics
|
||||
- cql
|
||||
- cql-errors
|
||||
- cql-exception-list
|
@ -1,12 +1,17 @@
|
||||
description: creates local graphs which resemble a wagon-wheel topology
|
||||
description: creates local graphs which resemble a wagon-wheel topology, using
|
||||
DSE Graph, version 6.8 or newer
|
||||
|
||||
scenarios:
|
||||
creategraph: run driver=cqld4 graphname=graph_wheels tags=phase:create-graph cycles===UNDEF
|
||||
schema: run driver=cqld4 graphname=graph_wheels tags=phase:graph-schema cycles===UNDEF
|
||||
disable-verify: run driver=cqld4 graphname=graph_wheels tags=phase:disable-verify cycles===UNDEF
|
||||
rampup: run driver==cqld4 graphname=graph_wheels tags=phase:rampup cycles=1000
|
||||
default:
|
||||
creategraph: run driver=dsegraph graphname=graph_wheels tags=phase:graph-schema
|
||||
schema: run driver=dsegraph graphname=graph_wheels tags=phase:graph-schema
|
||||
main: run driver==dsegraph graphname=graph_wheels tags=name:main-add cycles=100000
|
||||
devmode: run driver=dsegraph graphname=graph_wheels tags=name:dev-mode
|
||||
prodmode: run driver=dsegraph graphname=graph_wheels tags=name:dev-mode
|
||||
creategraph: run driver=cqld4 graphname=graph_wheels tags=phase:create-graph cycles===UNDEF
|
||||
schema: run driver=cqld4 graphname=graph_wheels tags=phase:graph-schema cycles===UNDEF
|
||||
rampup: run driver==cqld4 graphname=graph_wheels tags=phase:rampup cycles=1
|
||||
devmode: run driver=cqld4 graphname=graph_wheels tags=name:dev-mode
|
||||
prodmode: run driver=cqld4 graphname=graph_wheels tags=name:dev-mode
|
||||
|
||||
bindings:
|
||||
sessionid: ToEpochTimeUUID()->java.util.UUID; ToString();
|
||||
@ -16,54 +21,86 @@ bindings:
|
||||
osversion: WeightedStrings('nougat:3;oreo:1;jellybean:2;4:1;4c:1;5:1;5c:1;trusty:1;xenial:1;yosemite:1;el capitan:2;sierra:3;high sierra:1;7:1;10:2')
|
||||
ipaddress: Combinations('1;7;0-3;.;0-2;0-2;0-5;.;0-2;0-2;0-5')
|
||||
createdtime: Add(1505256898)
|
||||
diag_ten_pct: WeightedLongs('1:1;0:9')
|
||||
diag_one_pct: WeightedLongs('1:1;0:99')
|
||||
|
||||
blocks:
|
||||
- name: create-graph
|
||||
create-graph:
|
||||
tags:
|
||||
phase: create-graph
|
||||
statements:
|
||||
- creategraph: >-
|
||||
system.graph('<<graphname:graph_wheels>>').ifNotExists().create()
|
||||
- name: create-schema
|
||||
creategraph:
|
||||
type: gremlin
|
||||
script: >-
|
||||
system.graph('<<graphname:graph_wheels>>').ifNotExists().create()
|
||||
create-schema:
|
||||
tags:
|
||||
phase: graph-schema
|
||||
statements:
|
||||
- graph-schema: >-
|
||||
schema.propertyKey('sessionid').Uuid().ifNotExists().create();
|
||||
schema.propertyKey('deviceid').Uuid().ifNotExists().create();
|
||||
schema.propertyKey('ipaddress').Text().ifNotExists().create();
|
||||
schema.propertyKey('createdtime').Bigint().ifNotExists().create();
|
||||
schema.vertexLabel('session').partitionKey('sessionid').properties('ipaddress', 'deviceid', 'createdtime').ifNotExists().create();
|
||||
schema.propertyKey('type').Text().ifNotExists().create();
|
||||
schema.propertyKey('os').Text().ifNotExists().create();
|
||||
schema.propertyKey('osversion').Text().ifNotExists().create();
|
||||
schema.vertexLabel('device').partitionKey('deviceid').properties('type', 'os', 'osversion').ifNotExists().create();
|
||||
schema.edgeLabel('using').single().connection('session','device').ifNotExists().create();
|
||||
tags:
|
||||
name: graph-schema
|
||||
- name: dev-mode
|
||||
graph-schema:
|
||||
type: gremlin
|
||||
graphname: <<graphname:graph_wheels>>
|
||||
script: >-
|
||||
schema.vertexLabel('session')
|
||||
.ifNotExists()
|
||||
.partitionBy('sessionid', Uuid)
|
||||
.property('ipaddress', Text)
|
||||
.property('deviceid', Uuid)
|
||||
.property('createdtime', Bigint)
|
||||
.create();
|
||||
|
||||
schema.vertexLabel('device')
|
||||
.ifNotExists()
|
||||
.partitionBy('deviceid', Uuid)
|
||||
.property('type', Text)
|
||||
.property('os', Text)
|
||||
.property('osversion', Text)
|
||||
.create();
|
||||
|
||||
schema.edgeLabel('using')
|
||||
.ifNotExists()
|
||||
.from('session')
|
||||
.to('device')
|
||||
.create()
|
||||
dev-mode:
|
||||
tags:
|
||||
phase: dev-mode
|
||||
statements:
|
||||
- dev-mode: >-
|
||||
schema.config().option('graph.schema_mode').set('Development');
|
||||
tags:
|
||||
name: dev-mode
|
||||
- name: prod-mode
|
||||
dev-mode:
|
||||
type: gremlin
|
||||
graphname: <<graphname:graph_wheels>>
|
||||
script: >-
|
||||
schema.config().option('graph.schema_mode').set('Development');
|
||||
prod-mode:
|
||||
tags:
|
||||
phase: prod-mode
|
||||
statements:
|
||||
- prod-mode: >-
|
||||
schema.config().option('graph.schema_mode').set('Production');
|
||||
tags:
|
||||
name: prod-mode
|
||||
- name: main
|
||||
prod-mode:
|
||||
type: gremlin
|
||||
graphname: <<graphname:graph_wheels>>
|
||||
script: >-
|
||||
schema.config().option('graph.schema_mode').set('Production');
|
||||
rampup:
|
||||
tags:
|
||||
phase: main
|
||||
phase: rampup
|
||||
statements:
|
||||
- main-add: >-
|
||||
device = graph.addVertex(label, 'device','deviceid', {deviceid}, 'type', {type}, 'os', {os}, 'osversion', {osversion});
|
||||
session = graph.addVertex(label, 'session', 'sessionid', {sessionid}, 'ipaddress', {ipaddress}, 'deviceid', {deviceid}, 'createdtime', {createdtime});
|
||||
session.addEdge('using', device);
|
||||
tags:
|
||||
name: main-add
|
||||
main-add:
|
||||
type: gremlin
|
||||
diag: "{diag_one_pct}"
|
||||
graphname: <<graphname:graph_wheels>>
|
||||
script: >-
|
||||
device = g.addV('device')
|
||||
.property('deviceid', '{deviceid}' as UUID)
|
||||
.property('type', '{type}')
|
||||
.property('os', '{os}')
|
||||
.property('osversion', '{osversion}')
|
||||
.as('d')
|
||||
.addV('session')
|
||||
.property('sessionid', '{sessionid}' as UUID)
|
||||
.property('ipaddress', '{ipaddress}')
|
||||
.property('deviceid', '{deviceid}' as UUID)
|
||||
.property('createdtime', {createdtime})
|
||||
.as('s')
|
||||
.addE('using').from('s').to('d');
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user