Merge pull request #357 from ivansenic/ise-documents-crud

added Documents API CRUD dataset scenario
This commit is contained in:
Jonathan Shook 2021-09-21 11:51:56 -05:00 committed by GitHub
commit e33e999ea6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 408 additions and 0 deletions

View File

@ -0,0 +1,67 @@
---
title: Documents API CRUD Basic
weight: 2
---
## Description
The Documents API CRUD Basic workflow targets Stargate's Documents API using generated JSON documents.
The documents used are sharing the same structure and are approximately half a kilobyte in size each:
* each document has 13 leaf values, with a maximum depth of 3
* there is at least one `string`, `boolean`, `number` and `null` leaf
* there is one array with `double` values and one with `string` values
* there is one empty array and one empty map
The example JSON looks like:
```json
{
"user_id":"56fd76f6-081d-401a-85eb-b1d9e5bba058",
"created_on":1476743286,
"gender":"F",
"full_name":"Andrew Daniels",
"married":true,
"address":{
"primary":{
"cc":"IO",
"city":"Okmulgee"
},
"secondary":{
}
},
"coordinates":[
64.65964627052323,
-122.35334535072856
],
"children":[
],
"friends":[
"3df498b1-9568-4584-96fd-76f6081da01a"
],
"debt":null
}
```
In contrast to other workflows, this one is not split into ramp-up and main phases.
Instead, there is only the main phase with 4 different load types (write, read, update and delete).
## Named Scenarios
### default
The default scenario for http-docsapi-crud-basic.yaml runs each type of the main phase sequentially: write, read, update and delete.
This means that setting cycles for each of the phases should be done using the: `write-cycles`, `read-cycles`, `update-cycles` and `delete-cycles`.
The default value for all 4 cycles variables is the amount of documents to process (see [Workload Parameters](#workload-parameters)).
Note that error handling is set to `errors=timer,warn`, which means that in case of HTTP errors the scenario is not stopped.
## Workload Parameters
- `docscount` - the number of documents to process in each step of a scenario (default: `10_000_000`)
Note that if number of documents is higher than `read-cycles` you would experience misses, which will result in `HTTP 404` and smaller latencies.

View File

@ -0,0 +1,175 @@
# nb -v run driver=http yaml=http-docsapi-crud-basic tags=phase:schema stargate_host=my_stargate_host auth_token=$AUTH_TOKEN
description: |
This workload emulates CRUD operations for the Stargate Documents API.
It generates a simple JSON document to be used for writes and updates.
Note that stargate_port should reflect the port where the Docs API is exposed (defaults to 8082).
scenarios:
default:
schema: run driver=http tags==phase:schema threads==1 cycles==UNDEF
write: run driver=http tags==phase:main,type:write cycles===TEMPLATE(write-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
read: run driver=http tags==phase:main,type:read cycles===TEMPLATE(read-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
update: run driver=http tags==phase:main,type:update cycles===TEMPLATE(update-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
delete: run driver=http tags==phase:main,type:delete cycles===TEMPLATE(delete-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
bindings:
# To enable an optional weighted set of hosts in place of a load balancer
# Examples
# single host: stargate_host=host1
# multiple hosts: stargate_host=host1,host2,host3
# multiple weighted hosts: stargate_host=host1:3,host2:7
weighted_hosts: WeightedStrings('<<stargate_host:stargate>>')
# http request id
request_id: ToHashedUUID(); ToString();
seq_key: Mod(<<docscount:10000000>>); ToString() -> String
random_key: Uniform(0,<<docscount:10000000>>); ToString() -> String
user_id: ToHashedUUID(); ToString() -> String
created_on: Uniform(1262304000,1577836800) -> long
gender: WeightedStrings('M:10;F:10;O:1')
full_name: FullNames()
married: ModuloToBoolean()
city: CSVSampler('city','n/a','name','data/simplemaps/uszips.csv')
country_code: CountryCodes()
lat: Uniform(-180d, 180d)
lng: Hash() -> long; Uniform(-180d, 180d)
friend_id: Add(-1); ToHashedUUID(); ToString() -> String
blocks:
- tags:
phase: schema
statements:
- create-keyspace: POST <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/schemas/keyspaces
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: |
{
"name": "<<keyspace:docs_crud_basic>>",
"replicas": <<rf:1>>
}
tags:
name: create-keyspace
- delete-docs-collection: DELETE <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_basic>>/collections/<<table:docs_collection>>
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
tags:
name: delete-table
ok-status: "[2-4][0-9][0-9]"
- create-docs-collection: POST <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_basic>>/collections
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: |
{
"name": "<<table:docs_collection>>"
}
tags:
name: create-table
- name: main-write
tags:
phase: main
type: write
statements:
- write-document: PUT <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_basic>>/collections/<<table:docs_collection>>/{seq_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: |
{
"user_id": "{user_id}",
"created_on": {created_on},
"gender": "{gender}",
"full_name": "{full_name}",
"married": {married},
"address": {
"primary": {
"city": "{city}",
"cc": "{country_code}"
},
"secondary": {}
},
"coordinates": [
{lat},
{lng}
],
"children": [],
"friends": [
"{friend_id}"
],
"debt": null
}
tags:
name: write-document
- name: main-read
tags:
phase: main
type: read
statements:
- read-document: GET <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_basic>>/collections/<<table:docs_collection>>/{random_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
ok-status: "[2-4][0-9][0-9]"
tags:
name: read-document
- name: main-update
tags:
phase: main
type: update
statements:
- write-document: PUT <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_basic>>/collections/<<table:docs_collection>>/{random_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: |
{
"user_id": "{user_id}",
"created_on": {created_on},
"gender": "{gender}",
"full_name": "{full_name}",
"married": {married},
"address": {
"primary": {
"city": "{city}",
"cc": "{country_code}"
},
"secondary": {}
},
"coordinates": [
{lat},
{lng}
],
"children": [],
"friends": [
"{friend_id}"
],
"debt": null
}
tags:
name: update-document
- name: main-delete
tags:
phase: main
type: delete
statements:
- write-document: DELETE <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_basic>>/collections/<<table:docs_collection>>/{seq_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
ok-status: "[2-4][0-9][0-9]"
tags:
name: delete-document

View File

@ -0,0 +1,44 @@
---
title: Documents API CRUD using an external Dataset
weight: 3
---
## Description
The Documents API CRUD Dataset workflow targets Stargate's Documents API using JSON documents from an external dataset.
The [dataset](#dataset) is mandatory and should contain a JSON document per row that should be used as the input for write and update operations.
This workflow is perfect for testing Stargate performance using your own JSON dataset or any other realistic dataset.
In contrast to other workflows, this one is not split into ramp-up and main phases.
Instead, there is only the main phase with 4 different load types (write, read, update and delete).
## Named Scenarios
### default
The default scenario for http-docsapi-crud-dataset.yaml runs each type of the main phase sequentially: write, read, update and delete.
This means that setting cycles for each of the phases should be done using the: `write-cycles`, `read-cycles`, `update-cycles` and `delete-cycles`.
The default value for all 4 cycles variables is the amount of documents to process (see [Workload Parameters](#workload-parameters)).
Note that error handling is set to `errors=timer,warn`, which means that in case of HTTP errors the scenario is not stopped.
## Dataset
### JSON Documents
As explained above, in order to run the workflow a file containing JSON documents is needed.
If you don't have a dataset at hand, please have a look at [awesome-json-datasets](https://github.com/jdorfman/awesome-json-datasets).
You can use exposed public APIs to create a realistic dataset of your choice.
For example, you can easily create a dataset containing [Bitcoin unconfirmed transactions](https://gist.github.com/ivansenic/e280a89aba6420acb4f587d3779af774).
```bash
curl 'https://blockchain.info/unconfirmed-transactions?format=json&limit=5000' | jq -c '.txs | .[]' > blockchain-unconfirmed-transactions.json
```
Above command creates a dataset with 5.000 latest unconfirmed transactions.
## Workload Parameters
- `docscount` - the number of documents to process in each step of a scenario (default: `10_000_000`)
- `dataset_file` - the file to read the JSON documents from (note that if number of documents in a file is smaller than the `docscount` parameter, the documents will be reused)

View File

@ -0,0 +1,122 @@
# nb -v run driver=http yaml=http-docsapi-crud-dataset tags=phase:schema stargate_host=my_stargate_host auth_token=$AUTH_TOKEN dataset_file=path/to/data.json
description: |
This workload emulates CRUD operations for the Stargate Documents API.
It requires a data set file, where each line is a single JSON document to be used for writes and updates.
Note that stargate_port should reflect the port where the Docs API is exposed (defaults to 8082).
scenarios:
default:
schema: run driver=http tags==phase:schema threads==1 cycles==UNDEF
write: run driver=http tags==phase:main,type:write cycles===TEMPLATE(write-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
read: run driver=http tags==phase:main,type:read cycles===TEMPLATE(read-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
update: run driver=http tags==phase:main,type:update cycles===TEMPLATE(update-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
delete: run driver=http tags==phase:main,type:delete cycles===TEMPLATE(delete-cycles,TEMPLATE(docscount,10000000)) threads=auto errors=timer,warn
bindings:
# To enable an optional weighted set of hosts in place of a load balancer
# Examples
# single host: stargate_host=host1
# multiple hosts: stargate_host=host1,host2,host3
# multiple weighted hosts: stargate_host=host1:3,host2:7
weighted_hosts: WeightedStrings('<<stargate_host:stargate>>')
# http request id
request_id: ToHashedUUID(); ToString();
seq_key: Mod(<<docscount:10000000>>); ToString() -> String
random_key: Uniform(0,<<docscount:10000000>>); ToString() -> String
blocks:
- tags:
phase: schema
statements:
- create-keyspace: POST <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/schemas/keyspaces
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: |
{
"name": "<<keyspace:docs_crud_dataset>>",
"replicas": <<rf:1>>
}
tags:
name: create-keyspace
- delete-docs-collection: DELETE <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_dataset>>/collections/<<table:docs_collection>>
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
tags:
name: delete-table
ok-status: "[2-4][0-9][0-9]"
- create-docs-collection: POST <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_dataset>>/collections
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: |
{
"name": "<<table:docs_collection>>"
}
tags:
name: create-table
- name: main-write
tags:
phase: main
type: write
statements:
- write-document: PUT <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_dataset>>/collections/<<table:docs_collection>>/{seq_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: "{document_json}"
tags:
name: write-document
bindings:
document_json: ModuloLineToString('<<dataset_file>>');
- name: main-read
tags:
phase: main
type: read
statements:
- read-document: GET <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_dataset>>/collections/<<table:docs_collection>>/{random_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
ok-status: "[2-4][0-9][0-9]"
tags:
name: read-document
- name: main-update
tags:
phase: main
type: update
statements:
- write-document: PUT <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_dataset>>/collections/<<table:docs_collection>>/{random_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
Content-Type: "application/json"
body: "{document_json}"
tags:
name: update-document
bindings:
document_json: ModuloLineToString('<<dataset_file>>');
- name: main-delete
tags:
phase: main
type: delete
statements:
- write-document: DELETE <<protocol:http>>://{weighted_hosts}:<<stargate_port:8082>><<path_prefix:>>/v2/namespaces/<<keyspace:docs_crud_dataset>>/collections/<<table:docs_collection>>/{seq_key}
Accept: "application/json"
X-Cassandra-Request-Id: "{request_id}"
X-Cassandra-Token: "<<auth_token:my_auth_token>>"
ok-status: "[2-4][0-9][0-9]"
tags:
name: delete-document