remove duplicitous doc files

This commit is contained in:
Jonathan Shook 2023-09-10 17:37:49 -05:00
parent 5d0a890e28
commit 2d1f23a8d8
57 changed files with 0 additions and 4894 deletions

View File

@ -1 +0,0 @@
[Slack Invite](https://docs.google.com/forms/d/e/1FAIpQLSdUOJ8iAPqyxsLfh1nBBsKShI53RAeuzYW4bKExmRMWjj4ufQ/viewform)

View File

@ -1,83 +0,0 @@
<!--
~ Copyright (c) 2023 nosqlbench
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>mvn-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>${revision}</version>
<relativePath>../mvn-defaults</relativePath>
</parent>
<artifactId>engine-docs</artifactId>
<packaging>jar</packaging>
<name>${project.artifactId}</name>
<description>CLI for nosqlbench.</description>
<properties>
<javadoc.name>nosqlbench Docs</javadoc.name>
</properties>
<dependencies>
<!-- <dependency>-->
<!-- <groupId>io.nosqlbench</groupId>-->
<!-- <artifactId>engine-vis</artifactId>-->
<!-- <version>2.11.31-SNAPSHOT</version>-->
<!-- </dependency>-->
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>docsys</artifactId>
<version>${revision}</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>
<profiles>
<profile>
<id>assemble</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<descriptorRefs>jar-with-dependencies</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -1,31 +0,0 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.engine.docs;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.api.markdown.providers.DocsRootDirectory;
import io.nosqlbench.api.markdown.providers.RawMarkdownSource;
@Service(value = RawMarkdownSource.class, selector = "docs-for-eb")
public class NosqlBenchRawMarkdownSource extends DocsRootDirectory {
@Override
protected String getRootPathName() {
return "docs-for-eb";
}
}

View File

@ -1,35 +0,0 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.engine.docs;
import io.nosqlbench.api.docsapi.Docs;
import io.nosqlbench.api.docsapi.DocsBinder;
import io.nosqlbench.docsys.api.DocsysDynamicManifest;
import io.nosqlbench.nb.annotations.Service;
@Service(value = DocsysDynamicManifest.class, selector = "docs-for-eb")
public class NosqlbenchMarkdownManifest implements DocsysDynamicManifest {
@Override
public DocsBinder getDocs() {
return new Docs().namespace("docs-for-eb")
.addFirstFoundPath(
"engine-docs/src/main/resources/docs-for-nb/",
"docs-for-nb/"
).setEnabledByDefault(true)
.asDocsBinder();
}
}

View File

@ -1,79 +0,0 @@
---
title: 00 YAML Organization
weight: 00
---
# YAML Organization
It is best to keep every workload self-contained within a single YAML
file, including schema, data rampup, and the main phase of testing. The
phases of testing are controlled by tags as described below.
**NOTE:**
The phase names described below have been adopted as a convention within
the built-in workloads. It is strongly advised that new workload YAMLs use
the same tagging scheme so that workload are more plugable across YAMLs.
## Schema phase
The schema phase is simply a phase of your test which creates the
necessary schema on your target system. For CQL, this generally consists
of a keyspace and one ore more table statements. There is no special
schema layer in nosqlbench. All statements executed are simply statements.
This provides the greatest flexibility in testing since every activity
type is allowed to control its DDL and DML using the same machinery.
The schema phase is normally executed with defaults for most parameters.
This means that statements will execute in the order specified in the
YAML, in serialized form, exactly once. This is a welcome side-effect of
how the initial parameters like _cycles_ is set from the statements which
are activated by tagging.
You can mark statements as schema phase statements by adding this set of
tags to the statements, either directly, or by block:
tags:
block: schema
## Rampup phase
When you run a performance test, it is very important to be aware of how
much data is present. Higher density tests are more realistic for systems
which accumulate data over time, or which have a large working set of
data. The amount of data on the system you are testing should recreate a
realistic amount of data that you would run in production, ideally. In
general, there is a triangular trade-off between service time, op rate,
and data density.
It is the purpose of the _rampup_ phase to create the backdrop data on a
target system that makes a test meaningful for some level of data density.
Data density is normally discussed as average per node, but it is also
important to consider distribution of data as it varies from the least
dense to the most dense nodes.
Because it is useful to be able to add data to a target cluster in an
incremental way, the bindings which are used with a _rampup_ phase may
actually be different from the ones used for a _main_ phase. In most
cases, you want the rampup phase to create data in a way that
incrementally adds to the population of data in the cluster. This allows
you to add some data to a cluster with `cycles=0..1M` and then decide
whether to continue adding data using the next contiguous range of cycles,
with `cycles=1M..2M` and so on.
You can mark statements as rampup phase statements by adding this set of
tags to the statements, either directly, or by block:
tags:
block: rampup
## Main phase
The main phase of a nosqlbench scenario is the one during which you really
care about the metric. This is the actual test that everything else has
prepared your system for.
You can mark statement as schema phase statements by adding this set of
tags to the statements, either directly, or by block:
tags:
block: main

View File

@ -1,55 +0,0 @@
---
title: 01 Statement Templates
weight: 01
---
# Statement Templates
A valid config file for an activity consists of statement templates, parameters for them, bindings to generate the data
to use with them, and tags for organizing them.
In essence, the config format is *all about configuring statements*. Every other element in the config format is in some
way modifying or otherwise helping create statements to be used in an activity.
Statement templates are the single most important part of a YAML config.
```yaml
# a single statement
statements:
- a single statement body
```
This is a valid activity YAML file in and of itself. It has a single statement template.
It is up to the individual activity types like _cql_, or _stdout_ to interpret the statement template in some way. The
example above is valid as a statement in the stdout activity, but it does not produce a valid CQL statement with the CQL
activity type. The contents of the statement template are free form text. If the statement template is valid CQL, then
the CQL activity type can use it without throwing an error. Each activity type determines what a statement means, and
how it will be used.
You can provide multiple statements, and you can use the YAML pipe to put them on multiple lines, indented a little
further in:
```yaml
statements:
- |
This is a statement, and the file format doesn't
know how statements will be used!
- |
submit job {alpha} on queue {beta} with options {gamma};
```
Statements can be named:
```yaml
statements:
- s1: |
This is a statement, and the file format doesn't
know how statements will be used!
- s2: |
submit job {alpha} on queue {beta} with options {gamma};
```
Actually, every statement in a YAML has a name. If you don't provide one, then a name is auto-generated for the
statement based on its position in the YAML file.

View File

@ -1,118 +0,0 @@
---
title: 02 Data Bindings
weight: 02
---
# Data Bindings
Procedural data generation is built-in to the nosqlbench runtime by way of the
[Virtual DataSet](http://virtdata.io/) library. This allows us to create named data generation recipes. These named
recipes for generated data are called bindings. Procedural generation for test data has
[many benefits](http://docs.virtdata.io/why_virtdata/why_virtdata/) over shipping bulk test data around, including speed
and deterministic behavior. With the VirtData approach, most of the hard work is already done for us. We just have to
pull in the recipes we want.
You can add a bindings section like this:
```yaml
bindings:
alpha: Identity()
beta: NumberNameToString()
gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;')
delta: WeightedStrings('one:1;six:6;three:3;')
```
This is a YAML map which provides names and function specifiers. The specifier named _alpha_ provides a function that
takes an input value and returns the same value. Together, the name and value constitute a binding named alpha. All of
the four bindings together are called a bindings set.
The above bindings block is also a valid activity YAML, at least for the _stdout_ activity type. The _stdout_ activity
can construct a statement template from the provided bindings if needed, so this is valid:
```text
[test]$ cat > stdout-test.yaml
bindings:
alpha: Identity()
beta: NumberNameToString()
gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;')
delta: WeightedStrings('one:1;six:6;three:3;')
# EOF (control-D in your terminal)
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
0,zero,00A_pro,six
1,one,00B_pro,six
2,two,00C_pro,three
3,three,00D_pro,three
4,four,00E_pro,six
5,five,00F_pro,six
6,six,00G_pro,six
7,seven,00H_pro,six
8,eight,00I_pro,six
9,nine,00J_pro,six
```
Above, you can see that the stdout activity type is idea for experimenting with data generation recipes. It uses the
default `format=csv` parameter above, but it also supports formats like json, inlinejson, readout, and assignments.
This is all you need to provide a formulaic recipe for converting an ordinal value to a set of field values. Each time
nosqlbench needs to create a set of values as parameters to a statement, the functions are called with an input, known
as the cycle. The functions produce a set of named values that, when combined with a statement template, can yield an
individual statement for a database operation. In this way, each cycle represents a specific operation. Since the
functions above are pure functions, the cycle number of an operation will always produce the same operation, thus making
all nosqlbench workloads deterministic.
In the example above, you can see the cycle numbers down the left.
If you combine the statement section and the bindings sections above into one activity yaml, you get a slightly
different result, as the bindings apply to the statements that are provided, rather than creating a default statement
for the bindings. See the example below:
```text
[test]$ cat > stdout-test.yaml
statements:
- |
This is a statement, and the file format doesn't
know how statements will be used!
- |
submit job {alpha} on queue {beta} with options {gamma};
bindings:
alpha: Identity()
beta: NumberNameToString()
gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;')
delta: WeightedStrings('one:1;six:6;three:3;')
# EOF (control-D in your terminal)
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
This is a statement, and the file format doesn't
know how statements will be used!
submit job 1 on queue one with options 00B_pro;
This is a statement, and the file format doesn't
know how statements will be used!
submit job 3 on queue three with options 00D_pro;
This is a statement, and the file format doesn't
know how statements will be used!
submit job 5 on queue five with options 00F_pro;
This is a statement, and the file format doesn't
know how statements will be used!
submit job 7 on queue seven with options 00H_pro;
This is a statement, and the file format doesn't
know how statements will be used!
submit job 9 on queue nine with options 00J_pro;
```
There are a few things to notice here. First, the statements that are executed are automatically alternated between. If
you had 10 different statements listed, they would all get their turn with 10 cycles. Since there were two, each was run
5 times.
Also, the statement that had named anchors acted as a template, whereas the other one was evaluated just as it was. In
fact, they were both treated as templates, but one of them had no anchors.
On more minor but important detail is that the fourth binding *delta* was not referenced directly in the statements.
Since the statements did not pair up an anchor with this binding name, it was not used. No values were generated for it.
This is how activities are expected to work when they are implemented correctly. This means that the bindings themselves
are templates for data generation, only to be used when necessary. This means that the bindings that are defined around
a statement are more like a menu for the statement. If the statement uses those bindings with `{named}` anchors, then
the recipes will be used to construct data when that statement is selected for a specific cycle. The cycle number both
selects the statement (via the op sequence) and also provides the input value at the left side of the binding functions.

View File

@ -1,28 +0,0 @@
---
title: 03 Statement Params
weight: 03
---
# Statement Parameters
Statements within a YAML can be accessorized with parameters. These are known as _statement params_ and are different
than the parameters that you use at the activity level. They apply specifically to a statement template, and are
interpreted by an activity type when the statement template is used to construct a native statement form.
For example, the statement parameter `ratio` is used when an activity is initialized to construct the op sequence. In
the _cql_ activity type, the statement parameter `prepared` is a boolean that can be used to designated when a CQL
statement should be prepared or not.
As with the bindings, a params section can be added at the same level, setting additional parameters to be used with
statements. Again, this is an example of modifying or otherwise creating a specific type of statement, but always in a
way specific to the activity type. Params can be thought of as statement properties. As such, params don't really do
much on their own, although they have the same basic map syntax as bindings:
```yaml
params:
ratio: 1
```
As with statements, it is up to each activity type to interpret params in a useful way.

View File

@ -1,92 +0,0 @@
---
title: 04 Statement Tags
weight: 04
---
# Statement Tags
Tags are used to mark and filter groups of statements for controlling which ones get used in a given scenario. Tags are
generally free-form, but there is a set of conventions that can make your testing easier.
An example:
```yaml
tags:
name: foxtrot
unit: bravo
```
### Tag Filtering
The tag filters provide a flexible set of conventions for filtering tagged statements. Tag filters are usually provided
as an activity parameter when an activity is launched. The rules for tag filtering are (updated in version 3.12):
0. If no conjugate is specified, `all(...)` is assumed. This is in keeping with the previous default. If you do specify
a conjugate wrapper around the tag filter, it must be in the above form. `all(...)`, `any(...)`, and `none(...)` are
allowed.
1. If no tag filter is specified, then the statement matches.
2. A tag name predicate like `tags=name` asserts the presence of a specific tag name, regardless of its value.
3. A tag value predicate like `tags=name:foxtrot` asserts the presence of a specific tag name and a specific value for it.
4. A tag pattern predicate like `tags=name:'fox.*'` asserts the presence of a specific tag name and a value that matches
the provided regular expression.
5. Multiple tag predicates may be specified as in `tags=name:'fox.*',unit:bravo`
6.
0. If the `all` conjugate form is used (the default), then if any predicate fails to match a tagged element, then the
whole tag filtering expression fails to match.
1. If the `any` conjugate form is used, then if all predicates fail to match a tagged element, then the whole tag filtering
expression fails to match.
2. If the `none` conjugate form is used, then if any predicate _matches_, a tagged element, then the whole expression
matches.
A demonstration:
```text
[test]$ cat > stdout-test.yaml
tags:
name: foxtrot
unit: bravo
statements:
- "I'm alive!\n"
# EOF (control-D in your terminal)
# no tag filter matches any
[test]$ ./nb run driver=stdout workload=stdout-test
I'm alive!
# tag name assertion matches
[test]$ ./nb run driver=stdout workload=stdout-test tags=name
I'm alive!
# tag name assertion does not match
[test]$ ./nb run driver=stdout workload=stdout-test tags=name2
02:25:28.158 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
# tag value assertion does not match
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:bravo
02:25:42.584 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
# tag value assertion matches
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:foxtrot
I'm alive!
# tag pattern assertion matches
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:'fox.*'
I'm alive!
# tag pattern assertion does not match
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:'tango.*'
02:26:05.149 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
# compound tag predicate matches every assertion
[test]$ ./nb run driver=stdout workload=stdout-test tags='name=fox.*',unit=bravo
I'm alive!
# compound tag predicate does not fully match
[test]$ ./nb run driver=stdout workload=stdout-test tags='name=fox.*',unit=delta
11:02:53.490 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
# any(...) form will work as long as one of the tags match
[test]$ ./nb run driver=stdout workload=stdout-test tags='any(name=fox.*,thisone:wontmatch)',unit=bravo
I'm alive!
```

View File

@ -1,48 +0,0 @@
---
title: 05 Statement Blocks
weight: 05
---
# Statement Blocks
All the basic primitives described above (names, statements, bindings, params, tags) can be used to describe and
parameterize a set of statements in a yaml document. In some scenarios, however, you may need to structure your
statements in a more sophisticated way. You might want to do this if you have a set of common statement forms or
parameters that need to apply to many statements, or perhaps if you have several *different* groups of statements that
need to be configured independently.
This is where blocks become useful:
```text
[test]$ cat > stdout-test.yaml
bindings:
alpha: Identity()
beta: Combinations('u;n;u;s;e;d;')
blocks:
- statements:
- "{alpha},{beta}\n"
bindings:
beta: Combinations('b;l;o;c;k;1;-;COMBINATIONS;')
- statements:
- "{alpha},{beta}\n"
bindings:
beta: Combinations('b;l;o;c;k;2;-;COMBINATIONS;')
# EOF (control-D in your terminal)
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
0,block1-C
1,block2-O
2,block1-M
3,block2-B
4,block1-I
5,block2-N
6,block1-A
7,block2-T
8,block1-I
9,block2-O
```
This shows a couple of important features of blocks. All blocks inherit defaults for bindings, params, and tags from the
root document level. Any of these values that are defined at the base document level apply to all blocks contained in
that document, unless specifically overridden within a given block.

View File

@ -1,289 +0,0 @@
---
title: 06 More on Statements
weight: 06
---
# More on Statements
The template forms available in nosqlbench are very flexible. That means that there are multiple ways
of expressing templates for statements or operations. Thankfully, in most cases, the forms look like
what they do, and most of the ways you can imagine constructing a statement will simply work, as long
as the required details are provided for which driver you are using.
## Statement Delimiting
Sometimes, you want to specify the text of a statement in different ways. Since statements are strings, the simplest way
for small statements is in double quotes. If you need to express a much longer statement with special characters an
newlines, then you can use YAML's literal block notation (signaled by the '|' character) to do so:
```yaml
statements:
- |
This is a statement, and the file format doesn't
know how statements will be used!
- |
submit job {alpha} on queue {beta} with options {gamma};
```
Notice that the block starts on the following line after the pipe symbol. This is a very popular form in practice
because it treats the whole block exactly as it is shown, except for the initial indentations, which are removed.
Statements in this format can be raw statements, statement templates, or anything that is appropriate for the specific
activity type they are being used with. Generally, the statements should be thought of as a statement form that you want
to use in your activity -- something that has place holders for data bindings. These place holders are called *named
anchors*. The second line above is an example of a statement template, with anchors that can be replaced by data for
each cycle of an activity.
There is a variety of ways to represent block statements, with folding, without, with the newline removed, with it
retained, with trailing newlines trimmed or not, and so forth. For a more comprehensive guide on the YAML conventions
regarding multi-line blocks, see
[YAML Spec 1.2, Chapter 8, Block Styles](http://www.yaml.org/spec/1.2/spec.html#Block)
## Statement Sequences
To provide a degree of flexibility to the user for statement definitions, multiple statements may be provided together
as a sequence.
```yaml
# a list of statements
statements:
- "This a statement."
- "The file format doesn't know how statements will be used."
- "submit job {job} on queue {queue} with options {options};"
# an ordered map of statements by name
statements:
name1: statement one
name2: "statement two"
```
In the first form, the names are provided automatically by the YAML loader. In the second form, they are specified as
ordered map keys.
## Statement Properties
You can also configure individual statements with named properties, using the **statement properties** form:
```yaml
# a list of statements with properties
statements:
- name: name1
stmt: statement one
- name: name2
stmt: statement two
```
This is the most flexible configuration format at the statement level. It is also the most verbose. Because this format
names each property of the statement, it allows for other properties to be defined at this level as well. This includes
all of the previously described configuration elements: `name`, `bindings`, `params`, `tags`, and additionally `stmt`. A
detailed example follows:
```yaml
statements:
- name: foostmt
stmt: "{alpha},{beta}\n"
bindings:
beta: Combinations('COMBINATIONS;')
params:
parm1: pvalue1
tags:
tag1: tvalue1
freeparam3: a value, as if it were assigned under the params block.
```
In this case, the values for `bindings`, `params`, and `tags` take precedence, overriding those set by the enclosing
block or document or activity when the names match. Parameters called **free parameters** are allowed here, such as
`freeparam3`. These are simply values that get assigned to the params map once all other processing has completed.
## Named Statement form
It is possible to mix the **`<name>: <statement>`** form as above in the example for mapping statement by name, so long
as some specific rules are followed. An example, which is equivalent to the above:
```yaml
statements:
- foostmt: "{alpha},{beta}\n"
parm1: pvalue1
bindings:
beta: Combinations('COMBINATIONS;')
tags:
tag1: tvalue1
```
The rules:
1. You must avoid using both the name property and the initial
**`<name>: <statement>`** together. Doing so will cause an error to be thrown.
2. Do not use the **`<name>: <statement>`** form in combination with a
**`stmt: <statement>`** property. It is not possible to detect if this occurs. Use caution if you choose to mix these forms.
As explained above, `parm1: pvalue1` is a *free parameter*, and is simply short-hand for setting values in the params
map for the statement.
## Named Statement Maps
By combining all the forms together with a map in the middle, we get this form, which allows for the
enumeration of multiple statements, each with an obvious name, and a set of properties:
```yaml
statements:
- foostmt:
stmt: "{alpha},{beta}\n"
parm1: pvalue1
bindings:
beta: Combinations('COMBINATIONS;')
tags:
tag1: tvalue1
- barstmt:
optype: setvar
parm3: 42
parm5: true
userid: 2342
```
This form is arguably the easiest to read, but retains all the expressive power of the other forms too.
The distinction between this form and the named properties form is that the structure underneath the
first value is a map rather than a single value. Particularly, under the 'foostmt' name above, all of
content contained within it is formatted as properties of it -- indented properties.
Here are the basic rules for using this form:
1. Each statement is indicated by a YAML list entry like '-'.
2. Each entry is a map with a single key. This key is taken as the statement name.
3. The properties of this map work exactly the same as for named properties above, but repeating
the name will throw an error since this is ambiguous.
4. If the template is being used for CQL or another driver type which expects a 'stmt' property,
it must be provided as an explicitly named 'stmt' property as in the foostmt example above.
Notice in the 'barstmt' example above that there is no "stmt" property. Some drivers
have more flexible op templates may not require this. This is just a property name that was chosen
to represent the "main body" of a statement template in the shorter YAML forms. While the 'stmt'
property is required for drivers like CQL which have a solid concept for "statement body", it isn't
required for all driver types which may build their operations from other properties.
### Per-Statement Format
It is indeed possible to use any of the three statement formats within each entry of a statement sequence:
```yaml
statements:
- first statement body
- name: statement3
stmt: third statement body
- second: second statement body
- forth: fourth statement body
freeparam1: freeparamvalue1
tags:
type: preload
- fifth:
stmt: fifth statement body
freeparam2: freeparamvalue2
tags:
tag2: tagvalue2
```
The above is valid nosqlbench YAML, although a reader would need to know about the rules explained above in order to
really make sense of it. For most cases, it is best to follow one format convention, but there is flexibility for
overrides and naming when you need it. The main thing to remember is that the statement form is determined on an
element-by-element basis for maximum flexibility.
## Detailed Examples
The above examples are explained in detail below in JSON schematic form, to assist users and developers
understanding of the structural rules:
```yaml
statements:
# ---------------------------------------------------------------------------------------
# string form
# detected when the element is a single string value
- first statement body
# read as:
# {
# name: 'stmt1', // a generated name is also added
# stmt: 'first stmt body'
# }
# ---------------------------------------------------------------------------------------
# properties form
# detected when the element is a map and the value of the first entry is not a map
- name: statement3
stmt: third statement body
# read as:
# {
# name: 'statement3',
# stmt: 'third statement body'
# }
# ---------------------------------------------------------------------------------------
# named statement form:
# detected when reading properties form and the first property name is not a reserved
# word, like stmt, name, params, bindings, tags, ...
- second: second statement body
# read as:
# {
# name: 'second',
# stmt: 'second statement body'
# }
# ---------------------------------------------------------------------------------------
# properties form with free parameters:
# detected when properties are used which are not reserved words.
# Unrecognized words are pushed into the parameters map automatically.
- forth: fourth statement body
freeparam1: freeparamvalue1
tags:
type: preload
# read as:
# {
# name: 'fourth',
# stmt: 'fourth statement body',
# params: {
# freeparam1: 'freeparamvalue1'
# },
# tags: {
# tag2: 'tagvalue2'
# }
# }
# ---------------------------------------------------------------------------------------
# named statement maps
# detected when the element is a map and the only entry is a map.
- fifth:
stmt: fifth statement body
freeparam2: freeparamvalue2
tags:
tag2: tagvalue2
# read as:
# {
# name: 'fifth',
# stmt: 'fifth statement body'
# params: {
# freeparam2: 'freeparamvalue2'
# },
# tags: {
# tag2: 'tagvalue2'
# }
# }
# ---------------------------------------------------------------------------------------
```

View File

@ -1,58 +0,0 @@
---
title: 07 Multi-Docs
weight: 07
---
# Multi-Docs
The YAML spec allows for multiple yaml documents to be concatenated in the
same file with a separator:
```yaml
---
```
This offers an additional convenience when configuring activities. If you
want to parameterize or tag some a set of statements with their own
bindings, params, or tags, but alongside another set of uniquely
configured statements, you need only put them in separate logical
documents, separated by a triple-dash.
For example:
```text
[test]$ cat > stdout-test.yaml
bindings:
docval: WeightedStrings('doc1.1:1;doc1.2:2;')
statements:
- "doc1.form1 {docval}\n"
- "doc1.form2 {docval}\n"
---
bindings:
numname: NumberNameToString()
statements:
- "doc2.number {numname}\n"
# EOF (control-D in your terminal)
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
doc1.form1 doc1.1
doc1.form2 doc1.2
doc2.number two
doc1.form1 doc1.2
doc1.form2 doc1.1
doc2.number five
doc1.form1 doc1.2
doc1.form2 doc1.2
doc2.number eight
doc1.form1 doc1.1
```
This shows that you can use the power of blocks and tags together at one
level and also allow statements to be broken apart into a whole other
level of partitioning if desired.
**WARNING:**
The multi-doc support is there as a ripcord when you need it. However, it
is strongly advised that you keep your YAML workloads simple to start and
only use features like the multi-doc when you absolutely need it. For
this, blocks are generally a better choice. See examples in the standard
workloads.

View File

@ -1,37 +0,0 @@
---
title: 08 Template Params
weight: 08
---
# Template Params
All nosqlbench YAML formats support a parameter macro format that applies before YAML processing starts. It is a basic
macro facility that allows named anchors to be placed in the document as a whole:
```text
<<varname:defaultval>>
# or
TEMPLATE(varname,defaultval)
```
In this example, the name of the parameter is `varname`. It is given a default value of `defaultval`. If an activity
parameter named *varname* is provided, as in `varname=barbaz`, then this whole expression will be replaced with
`barbaz`. If none is provided then the default value will be used instead. For example:
```text
[test]$ cat > stdout-test.yaml
statements:
- "<<linetoprint:MISSING>>\n"
# EOF (control-D in your terminal)
[test]$ ./nb run driver=stdout workload=stdout-test cycles=1
MISSING
[test]$ ./nb run driver=stdout workload=stdout-test cycles=1 linetoprint="THIS IS IT"
THIS IS IT
```
If an empty value is desired by default, then simply use an empty string in your template, like `<<varname:>>` or
`TEMPLATE(varname,)`.

View File

@ -1,39 +0,0 @@
---
title: 09 Statement Naming
weight: 09
---
# Statement Naming
Docs, Blocks, and Statements can all have names:
```yaml
name: doc1
blocks:
- name: block1
statements:
- stmt1: statement1
- name: st2
stmt: statement2
---
name: doc2
...
```
This provides a layered naming scheme for the statements themselves. It is
not usually important to name things except for documentation or metric
naming purposes.
If no names are provided, then names are automatically created for blocks
and statements. Statements assigned at the document level are assigned
to "block0". All other statements are named with the
format `doc#--block#--stmt#`.
For example, the full name of statement1 above would
be `doc1--block1--stmt1`.
**NOTE:**
If you anticipate wanting to get metrics for a specific statement in
addition to the other metrics, then you will want to adopt the habit of
naming all your statements something basic and descriptive.

View File

@ -1,292 +0,0 @@
---
title: 10 Named Scenarios
weight: 10
---
# Named Scenarios
There is one final element of a yaml that you need to know about: _named
scenarios_.
**Named Scenarios allow anybody to run your testing workflows with a
single command.**
You can provide named scenarios for a workload like this:
```yaml
# contents of myworkloads.yaml
scenarios:
default:
- run driver=diag cycles=10 alias=first-ten
- run driver=diag cycles=10..20 alias=second-ten
longrun:
- run driver=diag cycles=10M
```
This provides a way to specify more detailed workflows that users may want
to run without them having to build up a command line for themselves.
A couple of other forms are supported in the YAML, for terseness:
```yaml
scenarios:
oneliner: run driver=diag cycles=10
mapform:
part1: run driver=diag cycles=10 alias=part2
part2: run driver=diag cycles=20 alias=part2
```
These forms simply provide finesse for common editing habits, but they are
automatically read internally as a list. In the map form, the names are
discarded, but they may be descriptive enough for use as inline docs for
some users. The order is retained as listed, since the names have no
bearing on the order.
## Scenario selection
When a named scenario is run, it is *always* named, so that it can be
looked up in the list of named scenarios under your `scenarios:` property.
The only exception to this is when an explicit scenario name is not found
on the command line, in which case it is automatically assumed to be _
default_.
Some examples may be more illustrative:
```text
# runs the scenario named 'default' if it exists, or throws an error if it does not.
nb myworkloads
# or
nb myworkloads default
# runs the named scenario 'longrun' if it exists, or throws an error if it does not.
nb myworkloads longrun
# runs the named scenario 'longrun' if it exists, or throws an error if it does not.
# this is simply the canonical form which is more verbose, but more explicit.
nb scenario myworkloads longrun
# run multiple named scenarios from one workload, and then some from another
nb scenario myworkloads longrun default longrun scenario another.yaml name1 name2
# In this form ^ you may have to add the explicit form to avoid conflicts between
# workload names and scenario names. That's why the explicit form is provided, afterall.
```
## Workload selection
The examples above contain no reference to a workload (formerly called _
yaml_). They don't need to, as they refer to themselves implicitly. You
may add a `workload=` parameter to the command templates if you like, but
this is never needed for basic use, and it is error prone to keep the
filename matched to the command template. Just leave it out by default.
_However_, if you are doing advanced scripting across multiple systems,
you can actually provide a `workload=` parameter particularly to use
another workload description in your test.
**NOTE:**
This is a powerful feature for workload automation and organization.
However, it can get unweildy quickly. Caution is advised for deep-linking
too many scenarios in a workspace, as there is no mechanism for keeping
them in sync when small changes are made.
## Named Scenario Discovery
For named scenarios, there is a way for users to find all the named
scenarios that are currently bundled or in view of their current
directory. A couple simple rules must be followed by scenario publishers
in order to keep things simple:
1. Workload files in the current directory `*.yaml` are considered.
2. Workload files under in the relative path `activities/` with
name `*.yaml` are considered.
3. The same rules are used when looking in the bundled nosqlbench, so
built-ins come along for the ride.
4. Any workload file that contains a `scenarios:` tag is included, but all
others are ignored.
This doesn't mean that you can't use named scenarios for workloads in
other locations. It simply means that when users use
the `--list-scenarios` option, these are the only ones they will see
listed.
## Parameter Overrides
You can override parameters that are provided by named scenarios. Any
parameter that you specify on the command line after your workload and
optional scenario name will be used to override or augment the commands
that are provided for the named scenario.
This is powerful, but it also means that you can sometimes munge
user-provided activity parameters on the command line with the named
scenario commands in ways that may not make sense. To solve this, the
parameters in the named scenario commands may be locked. You can lock them
silently, or you can provide a verbose locking that will cause an error if
the user even tries to adjust them.
Silent locking is provided with a form like `param==value`. Any silent
locked parameters will reject overrides from the command line, but will
not interrupt the user.
Verbose locking is provided with a form like `param===value`. Any time a
user provides a parameter on the command line for the named parameter, an
error is thrown and they are informed that this is not possible. This
level is provided for cases in which you would not want the user to be
unaware of an unset parameter which is germain and specific to the named
scenario.
All other parameters provided by the user will take the place of the
same-named parameters provided in *each* command templates, in the order
they appear in the template. Any other parameters provided by the user
will be added to *each*
of the command templates in the order they appear on the command line.
This is a little counter-intuitive at first, but once you see some
examples it should make sense.
## Parameter Override Examples
Consider a simple workload with three named scenarios:
```yaml
# basics.yaml
scenarios:
s1: run driver=stdout cycles=10
s2: run driver=stdout cycles==10
s3: run driver=stdout cycles===10
bindings:
c: Identity()
statements:
- A: "cycle={c}\n"
```
Running this with no options prompts the user to select one of the named
scenarios:
```text
$ nb basics
ERROR: Unable to find named scenario 'default' in workload 'basics', but you can pick from s1,s2,s3
$
```
### Basic Override example
If you run the first scenario `s1` with your own value for `cycles=7`, it
does as you ask:
```text
$ nb basics s1 cycles=7
Logging to logs/scenario_20200324_205121_554.log
cycle=0
cycle=1
cycle=2
cycle=3
cycle=4
cycle=5
cycle=6
$
```
### Silent Locking example
If you run the second scenario `s2` with your own value for `cycles=7`,
then it does what the locked parameter
`cycles==10` requires, without telling you that it is ignoring the
specified value on your command line.
```text
$ nb basics s2 cycles=7
Logging to logs/scenario_20200324_205339_486.log
cycle=0
cycle=1
cycle=2
cycle=3
cycle=4
cycle=5
cycle=6
cycle=7
cycle=8
cycle=9
$
```
Sometimes, this is appropriate, such as when specifying settings
like `threads==` for schema phases.
### Verbose Locking example
If you run the third scenario `s3` with your own value for `cycles=7`,
then you will get an error telling you that this is not possible.
Sometimes you want to make sure tha the user knows a parameter should not
be changed, and that if they want to change it, they'll have to make their
own custom version of the scenario in question.
```text
$ nb basics s3 cycles=7
ERROR: Unable to reassign value for locked param 'cycles===7'
$
```
Ultimately, it is up to the scenario designer when to lock parameters for
users. The built-in workloads offer some examples on how to set these
parameters so that the right value are locked in place without bother the
user, but some values are made very clear in how they should be set.
Please look at these examples for inspiration when you need.
## Forcing Undefined (default) Parameters
If you want to ensure that any parameter in a named scenario template
remains unset in the generated scenario script, you can assign it a value
of UNDEF. The locking behaviors described above apply to this one as well.
Thus, for schema commands which rely on the default sequence length (which
is based on the number of active statements), you can set cycles==UNDEF to
ensure that when a user passes a cycles parameter the schema phase doesn't
break with too many cycles.
## Automatic Parameters
Some parameters are already known due to the fact that you are using named
scenarios.
### workload
The `workload` parameter is, by default, set to the logical path (fully
qualified workload name) of the yaml file containing the named scenario.
However, if the command template contains this parameter, it may be
overridden by users as any other parameter depending on the assignment
operators as explained above.
### alias
The `alias` parameter is, by default, set to the expanded name of
WORKLOAD_SCENARIO_STEP, which means that each activity within the scenario
has a distinct and symbolic name. This is important for distinguishing
metrics from one another across workloads, named scenarios, and steps
within a named scenario. The above words are interpolated into the alias
as follows:
- WORKLOAD - The simple name part of the fully qualified workload name.
For example, with a workload (yaml path) of foo/bar/baz.yaml, the
WORKLOAD name used here would be `baz`.
- SCENARIO - The name of the scenario as provided on the command line.
- STEP - The name of the step in the named scenario. If you used the list
or string forms to provide a command template, then the steps are
automatically named as a zero-padded number representing the step in the
named scenario, starting from `000`, per named scenario. (The numbers
are not globally assigned)
Because it is important to have uniquely named activities for the sake of
sane metrics and logging, any alias provided when using named scenarios
which does not include the three tokens above will cause a warning to be
issued to the user explaining why this is a bad idea.
**NOTE:**
UNDEF is handled before alias expansion above, so it is possible to force
the default activity naming behavior above with `alias===UNDEF`. This is
generally recommended, and will inform users if they try to set the alias
in an unsafe way.

View File

@ -1,90 +0,0 @@
---
title: YAML Diagnostics
weight: 99
---
# YAML Diagnostics
This section describes errors that you might see if you have a YAML loading issue, and what you can do to fix them.
### Undefined Name-Statement Tuple
This exception is thrown when the statement body is not found in a statement definition in any of the supported formats.
For example, the following block will cause an error:
```yaml
statements:
- name: statement-foo
params:
aparam: avalue
```
This is because `name` and `params` are reserved property names -- removed from the list of name-value pairs before free
parameters are read. If the statement is not defined before free parameters are read, then the first free parameter is
taken as the name and statement in `name: statement` form.
To correct this error, supply a statement property in the map, or simply replace the `name: statement-foo` entry with a
`statement-foo: statement body` at the top of the map:
Either of these will work:
```yaml
statements:
- name: statement-foo
stmt: statement body
params:
aparam: avalue
---
statements:
- statement-foo: statement body
params:
aparam: avalue
```
In both cases, it is clear to the loader where the statement body should come from, and what (if any) explicit naming
should occur.
### Redefined Name-Statement Tuple
This exception is thrown when the statement name is defined in multiple ways. This is an explicit exception to avoid
possible ambiguity about which value the user intended. For example, the following statements definition will cause an
error:
```yaml
statements:
- name: name1
name2: statement body
```
This is an error because the statement is not defined before free parameters are read, and the `name: statement` form
includes a second definition for the statement name. In order to correct this, simply remove the separate `name` entry,
or use the `stmt` property to explicitly set the statement body. Either of these will work:
```yaml
statements:
- name2: statement body
---
statements:
- name: name1
stmt: statement body
```
In both cases, there is only one name defined for the statement according to the supported formats.
### YAML Parsing Error
This exception is thrown when the YAML format is not recognizable by the YAML parser. If you are not working from
examples that are known to load cleanly, then please review your document for correctness according to the
[YAML Specification]().
If you are sure that the YAML should load, then please
[submit a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) with details on the type of YAML
file you are trying to load.
### YAML Construction Error
This exception is thrown when the YAML was loaded, but the configuration object was not able to be constructed from the
in-memory YAML document. If this error occurs, it may be a bug in the YAML loader implementation. Please
[submit a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) with details on the type of YAML
file you are trying to load.

View File

@ -1,50 +0,0 @@
---
title: Designing Workloads
weight: 40
---
# Designing Workloads
Workloads in nosqlbench are always controlled by a workload definition.
Even the built-in workloads are simply pre-configured and controlled
from a single YAML file which is bundled internally.
With nosqlbench a standard YAML configuration format is provided that is
used across all activity types. This makes it easy to specify
statements, statement parameters, data bindings, and tags. This section
describes the standard YAML format and how to use it.
It is recommended that you read through the examples in each of the
design sections in order. This guide was designed to give you a detailed
understanding of workload construction with nosqlbench. The examples
will also give you better insight into how nosqlbench works at a
fundamental level.
## Multi-Protocol Support
You will notice that this guide is not overly CQL-specific. That is
because nosqlbench is a multi-protocol tool. All that is needed for you
to use this guide with other protocols is the release of more activity
types. Try to keep that in mind as you think about designing workloads.
## Advice for new builders
### Review existing examples
The built-in workloads that are include with nosqlbench are also easy to copy out as a starting point. You just need to
use two commands:
# find a workload you want to copy
nb --list-workloads
# copy a workload to your local directory
nb --copy cql-iot
### Follow the conventions
The tagging conventions described under the YAML Conventions section
will make your testing go smoother. All of the baselines that we publish
for nosqlbench will use this form.

View File

@ -1,30 +0,0 @@
---
title: Driver Types
weight: 50
---
# Driver Types
Each nosqlbench scenario is comprised of one or more activities of a specific type. The types of activities available
are provided by the version of nosqlbench.
You can see this list at any time by running the command:
nb --list-drivers
Each one comes with its own built-in documentation. It can be accessed with this command:
nb help <driver>
This section contains the per-driver documentation that you get when you run the above command. These driver docs are
auto-populated when NoSQLBench is built, so they are exactly the same as you will see with the above command, only
rendered in HTML.
There may be additional documentation related to a given driver. To see the list of help topics, you
can run this command:
nb help topics
The help for any topic can be read this way:
nb help <topic>

View File

@ -1,196 +0,0 @@
---
title: 01 Commands
weight: 2
---
# Example Commands
Let's run a simple test against a cluster to establish some basic
familiarity with the NoSQLBench.
## Create a Schema
We will start by creating a simple schema in the database. From your
command line, go ahead and execute the following command, replacing
the `host=<host-or-ip>` with that of one of your database nodes.
```text
./nb run driver=cql workload=cql-keyvalue tags=block:"schema.*" host=<host-or-ip>
```
This command is creating the following schema in your database:
```cql
CREATE KEYSPACE baselines
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}
AND durable_writes = true;
CREATE TABLE baselines.keyvalue (
key text PRIMARY KEY,
value text
)
```
Let's break down each of those command line options.
`run` tells nosqlbench to run an activity.
`driver=...` is used to specify the activity type (driver). In this case
we are using `cql`, which tells nosqlbench to use the DataStax Java Driver
and execute CQL statements against a database.
`workload=...` is used to specify the workload definition file that
defines the activity.
In this example, we use `cql-keyvalue` which is a pre-built workload that
is packaged with nosqlbench.
`tags=block:"schema.*"` tells nosqlbench to run the yaml block that has
the `block:"schema.*"` defined as one of its tags.
In this example, that is the DDL portion of the `cql-keyvalue`
workload. `host=...` tells nosqlbench how to connect to your database,
only one host is necessary.
If you like, you can verify the result of this command by decribing your
keyspace in cqlsh or DataStax Studio with
`DESCRIBE KEYSPACE baselines`.
## Load Some Data
Before running a test of typical access patterns where you want to capture
the results, you need to make the test more interesting than loading an
empty table. For this, we use the rampup phase.
Before sending our test writes to the database, we will use the `stdout`
activity type so we can see what nosqlbench is generating for CQL
statements.
Go ahead and execute the following command:
./nb run driver=stdout workload=cql-keyvalue tags=block:rampup cycles=10
You should see 10 of the following statements in your console
```cql
insert into baselines.keyvalue (key, value) values (0,382062539);
insert into baselines.keyvalue (key, value) values (1,774912474);
insert into baselines.keyvalue (key, value) values (2,949364593);
insert into baselines.keyvalue (key, value) values (3,352527683);
insert into baselines.keyvalue (key, value) values (4,351686621);
insert into baselines.keyvalue (key, value) values (5,114304900);
insert into baselines.keyvalue (key, value) values (6,439790106);
insert into baselines.keyvalue (key, value) values (7,564330072);
insert into baselines.keyvalue (key, value) values (8,296173906);
insert into baselines.keyvalue (key, value) values (9,97405552);
```
NoSQLBench deterministically generates data, so the generated values will
be the same from run to run.
Now we are ready to write some data to our database. Go ahead and execute
the following from your command line:
./nb run driver=cql workload=cql-keyvalue tags=block:rampup host=<host-or-ip> cycles=100k --progress console:1s
Note the differences between this and the command that we used to generate
the schema.
`tags=block:rampup` is running the yaml block in `cql-keyvalue` that has
only INSERT statements.
`cycles=100k` will run a total of 100,000 operations, in this case,
100,000 writes. You will want to pick an appropriately large number of
cycles in actual testing to make your main test meaningful.
**NOTE:**
The cycles parameter is not just a quantity. It is a range of values.
The `cycles=n` format is short for
`cycles=0..n`, which makes cycles a zero-based range. For example,
cycles=5 means that the activity will use cycles 0,1,2,3,4, but not 5. The
reason for this is explained in detail in the Activity Parameters section.
These parameters are explained in detail in the section on _Activity
Parameters_.
`--progress console:1s` will print the progression of the run to the
console every 1 second.
You should see output that looks like this
```text
cql-keyvalue: 0.00%/Running (details: min=0 cycle=1 max=100000)
cql-keyvalue: 0.00%/Running (details: min=0 cycle=1 max=100000)
cql-keyvalue: 0.32%/Running (details: min=0 cycle=325 max=100000)
cql-keyvalue: 1.17%/Running (details: min=0 cycle=1171 max=100000)
cql-keyvalue: 2.36%/Running (details: min=0 cycle=2360 max=100000)
cql-keyvalue: 3.65%/Running (details: min=0 cycle=3648 max=100000)
cql-keyvalue: 4.61%/Running (details: min=0 cycle=4613 max=100000)
cql-keyvalue: 5.59%/Running (details: min=0 cycle=5593 max=100000)
cql-keyvalue: 7.14%/Running (details: min=0 cycle=7138 max=100000)
cql-keyvalue: 8.87%/Running (details: min=0 cycle=8868 max=100000)
...
cql-keyvalue: 100.00%/Finished (details: min=0 cycle=100000 max=100000)
```
## Run the main test phase
Now that we have a base dataset of 100k rows in the database, we will now
run a mixed read / write workload, by default this runs a 50% read / 50%
write workload.
./nb run driver=cql workload=cql-keyvalue tags=block:main host=<host-or-ip> cycles=100k cyclerate=5000 threads=50 --progress console:1s
You should see output that looks like this:
```text
Logging to logs/scenario_20190812_154431_028.log
cql-keyvalue: 0.50%/Running (details: min=0 cycle=500 max=100000)
cql-keyvalue: 2.50%/Running (details: min=0 cycle=2500 max=100000)
cql-keyvalue: 6.70%/Running (details: min=0 cycle=6700 max=100000)
cql-keyvalue: 11.16%/Running (details: min=0 cycle=11160 max=100000)
cql-keyvalue: 14.25%/Running (details: min=0 cycle=14250 max=100000)
cql-keyvalue: 18.41%/Running (details: min=0 cycle=18440 max=100000)
cql-keyvalue: 22.76%/Running (details: min=0 cycle=22760 max=100000)
cql-keyvalue: 27.27%/Running (details: min=0 cycle=27300 max=100000)
cql-keyvalue: 31.81%/Running (details: min=0 cycle=31810 max=100000)
cql-keyvalue: 36.34%/Running (details: min=0 cycle=36340 max=100000)
cql-keyvalue: 40.90%/Running (details: min=0 cycle=40900 max=100000)
cql-keyvalue: 45.48%/Running (details: min=0 cycle=45480 max=100000)
cql-keyvalue: 50.05%/Running (details: min=0 cycle=50050 max=100000)
cql-keyvalue: 54.36%/Running (details: min=0 cycle=54360 max=100000)
cql-keyvalue: 58.91%/Running (details: min=0 cycle=58920 max=100000)
cql-keyvalue: 63.40%/Running (details: min=0 cycle=63400 max=100000)
cql-keyvalue: 66.96%/Running (details: min=0 cycle=66970 max=100000)
cql-keyvalue: 71.61%/Running (details: min=0 cycle=71610 max=100000)
cql-keyvalue: 76.11%/Running (details: min=0 cycle=76130 max=100000)
cql-keyvalue: 80.66%/Running (details: min=0 cycle=80660 max=100000)
cql-keyvalue: 85.22%/Running (details: min=0 cycle=85220 max=100000)
cql-keyvalue: 89.80%/Running (details: min=0 cycle=89800 max=100000)
cql-keyvalue: 94.46%/Running (details: min=0 cycle=94460 max=100000)
cql-keyvalue: 98.93%/Running (details: min=0 cycle=98930 max=100000)
cql-keyvalue: 100.00%/Finished (details: min=0 cycle=100000 max=100000)
```
We have a few new command line options here:
`tags=block:main` is using a new block in our activity's yaml that
contains both read and write queries.
`threads=50` is an important one. The default for nosqlbench is to run
with a single thread. This is not adequate for workloads that will be
running many operations, so threads is used as a way to increase
concurrency on the client side.
`cyclerate=5000` is used to control the operations per second that are
initiated by nosqlbench. This command line option is the primary means to
rate limit the workload and here we are running at 5000 ops/sec.
## Now What?
Note in the above output, we
see `Logging to logs/scenario_20190812_154431_028.log`.
By default nosqlbench records the metrics from the run in this file, we
will go into detail about these metrics in the next section Viewing
Results.

View File

@ -1,58 +0,0 @@
---
title: 02 Results
weight: 3
---
# Example Results
We just ran a very simple workload against our database. In that example,
we saw that nosqlbench writes to a log file and it is in that log file
where the most basic form of metrics are displayed.
## Log File Metrics
For our previous run, we saw that nosqlbench was writing
to `logs/scenario_20190812_154431_028.log`
Even when you don't configure nosqlbench to write its metrics to another
location, it will periodically report all the metrics to the log file. At
the end of a scenario, before nosqlbench shuts down, it will flush the
partial reporting interval again to the logs. This means you can always
look in the logs for metrics information.
**WARNING:**
If you look in the logs for metrics, be aware that the last report will
only contain a partial interval of results. When looking at the last
partial window, only metrics which average over time or which compute the
mean for the whole test will be meaningful.
Below is a sample of the log that gives us our basic metrics. There is a
lot to digest here, for now we will only focus a subset of the most
important metrics.
```text
2019-08-12 15:46:00,274 INFO [main] i.e.c.ScenarioResult [ScenarioResult.java:48] -- BEGIN METRICS DETAIL --
2019-08-12 15:46:00,294 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.config.burstrate, value=5500.0
2019-08-12 15:46:00,295 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.config.cyclerate, value=5000.0
2019-08-12 15:46:00,295 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.waittime, value=3898782735
2019-08-12 15:46:00,298 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM, name=cql-keyvalue.resultset-size, count=100000, min=0, max=1, mean=8.0E-5, stddev=0.008943914131967056, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0
2019-08-12 15:46:01,703 INFO [main] i.e.c.ScenarioResult [ScenarioResult.java:56] -- END METRICS DETAIL --
```
The log contains lots of information on metrics, but this is obviously _
not_ the most desirable way to consume metrics from nosqlbench.
We recommend that you use one of these methods, according to your
environment or tooling available:
1. `--docker-metrics` with a local docker-based grafana dashboard (See the
section on Docker Based Metrics)
2. Send your metrics to a dedicated graphite server
with `--report-graphite-to graphitehost`
3. Record your metrics to local CSV files
with `--report-csv-to my_metrics_dir`
4. Record your metrics to HDR logs
with `--log-histograms my_hdr_metrics.log`
See the command line reference for details on how to route your metrics to
a metrics collector or format of your preference.

View File

@ -1,87 +0,0 @@
---
title: 03 Metrics
weight: 4
---
# Example Metrics
A set of core metrics are provided for every workload that runs with nosqlbench, regardless of the activity type and
protocol used. This section explains each of these metrics and shows an example of them from the log file.
## metric: result
This is the primary metric that should be used to get a quick idea of the throughput and latency for a given run. It
encapsulates the entire operation life cycle ( ie. bind, execute, get result back ).
For this example we see that we averaged 3732 operations / second with 3.6ms 75th percentile latency and 23.9ms 99th
percentile latency. Note the raw metrics are in microseconds. This duration_unit may change depending on how a user
configures nosqlbench, so always double-check it.
```text
2019-08-12 15:46:01,310 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=TIMER,
name=cql-keyvalue.result, count=100000, min=233.48, max=358596.607, mean=3732.00338612, stddev=10254.850416061185,
median=1874.815, p75=3648.767, p95=10115.071, p98=15855.615, p99=23916.543, p999=111292.415,
mean_rate=4024.0234405430424, m1=3514.053841156124, m5=3307.431472596865, m15=3268.6786509004132,
rate_unit=events/second, duration_unit=microseconds
```
## metric: result-success
This metric shows whether there were any errors during the run. You can confirm that the count is equal to the number of
cycles for the run if you are expecting or requiring zero failed operations.
Here we see that all 100k of our cycles succeeded. Note that the metrics for throughput and latency here are slightly
different than the `results` metric simply because this is a separate timer that only includes operations which
completed with no exceptions.
```text
2019-08-12 15:46:01,452 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=TIMER,
name=cql-keyvalue.result-success, count=100000, min=435.168, max=358645.759, mean=3752.40990808,
stddev=10251.524945886964, median=1889.791, p75=3668.479, p95=10154.495, p98=15884.287, p99=24280.063,
p999=111443.967, mean_rate=4003.3090048756894, m1=3523.40328629036, m5=3318.8463896065778, m15=3280.480326762243,
rate_unit=events/second, duration_unit=microseconds
```
## metric: resultset-size
For read workloads, this metric shows the size of result sent back to nosqlbench from the server. This is useful to
confirm that you are reading rows that already exist in the database.
```text
2019-08-12 15:46:00,298 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM,
name=cql-keyvalue.resultset-size, count=100000, min=0, max=1, mean=8.0E-5, stddev=0.008943914131967056,
median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0
```
#### metric: tries
NoSQLBench will retry failures 10 times by default, this is configurable via the `maxtries` command line option for the
cql activity type. This metric shows a histogram of the number of tries that each operation required, in this example,
there were no retries as the `count` is 100k.
```text
2019-08-12 15:46:00,341 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM,
name=cql-keyvalue.tries, count=100000, min=1, max=1, mean=1.0, stddev=0.0, median=1.0,
p75=1.0, p95=1.0, p98=1.0, p99=1.0, p999=1.0
```
### More Metrics
nosqlbench extends many ways to report the metrics from a run, including:
- Built-in Docker Dashboard
- Reporting to CSV
- Reporting to Graphite
- Reporting to HDR
To get more information on these options, see the output of
./nb --help
### Congratulations
You have completed your first run with nosqlbench!
In the 'Next Steps' section, you'll find options for how to continue, whether you are looking for basic testing or
something more advanced.

View File

@ -1,42 +0,0 @@
---
title: Next Steps
weight: 5
---
# Next Steps
Now that you've run nosqlbench for the first time and seen what it does, you can choose what level of customization you
want for further testing.
The sections below describe key areas that users typically customize when working with nosqlbench.
Everyone who uses nosqlbench will want to get familiar with the 'NoSQLBench Basics' section below. This is essential
reading for new and experienced testers alike.
## High-Level Users
Several canonical workloads are already baked-in to nosqlbench for immediate use. If you are simply wanting to drive
workloads from nosqlbench without building a custom workload, then you'll want to learn about the available workloads
and their options.
Recommended reading for high-level testing workflow:
1. 'Built-In Workloads'
2. 'nosqlbench Basics'
## Workload Builders
If you want to use nosqlbench to build a tailored workload that closely emulates what a specific application would do,
then you can build a YAML file that specifies all of the details of an iterative workload. You can specify the access
patterns, data distributions, and more.
The recommended reading for this is:
1. 'NoSQLBench Basics'
2. All of the 'Designing Workloads' section.
3. The online examples (find the links in the Designing Workloads section.)
## Scenario Developers
For advanced scenario designs, iterative testing models, or analysis methods, you can use
ECMAScript to control the scenario from start to finish. This is an advanced feature that is not recommended for
first-time users. A guide for scenario developers will be released in increments.

View File

@ -1,88 +0,0 @@
---
title: Quick Start Example
weight: 20
---
# Quick Start Example
## Getting NoSQLBench
NoSQLBench is packaged directly as a Linux binary named `nb` and as an executable Java jar named `nb.jar`.
## Downloading
The Linux binary is recommended, since it comes with its own JVM and eliminates the need to manage Java downloads. Both
can be obtained at the releases section of the main NoSQLBench project:
- [NoSQLBench Releases](https://github.com/nosqlbench/nosqlbench/releases)
**NOTE:**
Once you download the binary, you may need to `chmod +x nb` to make it
executable. In order to run AppImage binaries, like nb, you need to have
fuse support on your system. This is already provided on most
distributions. If after downloading and executing nb, you get an error,
please consult the
[AppImage troubleshooting page](https://docs.appimage.org/user-guide/run-appimages.html#troubleshooting)
.
This documentation assumes you are using the Linux binary initiating
NoSqlBench commands with `./nb`. If you are using the jar, just
replace `./nb` with `java -jar nb.jar` when running commands. If you are
using the jar version, Java 15 is recommended, and will be required soon.
## Run a cluster
This section requires you to have a CQL system to connect to.
If you dont already have one, you can start an instance of DSE with this one-liner:
docker run -e DS_LICENSE=accept --name my-dse -p 9042:9042 -d datastax/dse-server:6.7.7
or consult the instructions at the
[Apache Cassandra docker hub landing page](https://hub.docker.com/_/cassandra).
## Running
To run a simple built-in workload run:
./nb cql-iot
To get a list of built-in scenarios run:
# Get a list of all named scenarios and parameters
./nb --list-scenarios
If you want a simple list of yamls which contain named scenarios, run:
# Get a simple list of yamls containing named scenarios
./nb --list-workloads
**NOTE:**
Note: These commands will include workloads that were shipped with nb and
workloads in your local directory. To learn more about how to design
custom workloads see
[designing workloads](/index.html#/docs/designing_workloads.html)
To provide your own contact points (comma separated), add the `hosts=`
parameter
./nb cql-iot hosts=host1,host2
Additionally, if you have docker installed on your local system, and your user has permissions to use it, you can use
`--docker-metrics` to stand up a live metrics dashboard at port 3000.
./nb cql-iot --docker-metrics
This example doesn't go into much detail about what it is doing. It is here to show you how quickly you can start
running real workloads without having to learn much about the machinery that makes it happen.
The rest of this section has a more elaborate example that exposes some of
the basic options you may want to adjust for your first serious test.
**NOTE:**
If you want to see system-level metrics from your cluster, it is possible
to get these as well as Apache Cassandra level metrics by using the DSE
Metrics Collector (if using DSE), or by setting up a metrics feed to the
Prometheus instance in your local docker stack. You can find the DSE
Metrics Collector docs
[here](https://docs.datastax.com/en/monitoring/doc/monitoring/metricsCollector/mcExportMetricsDocker.html)
.

View File

@ -1,65 +0,0 @@
---
title: Getting Support
weight: 10
---
# Getting Support
In general, our goals with NoSQLBench are to make the help systems and
examples wrap around the users like a suit of armor, so that they feel
capable of doing most things without having to ask for help. Please keep
this in mind when looking for personal support form our community, and
help us find those places where the docs are lacking. Maybe you can help
us by adding some missing docs!
## Built-In Docs
The documentation for NoSQLBench is quite thorough. On the command line,
you can see a list of built-in docs with the command:
nb help topics
To read any topic, simply use the command:
nb help <topic>
The documentation system you are looking at right now includes the same
docs you can find above already and more. They are automatically included
when NoSQLBench is built.
## NoSQLBench Discord Server
We have a discord server. This is where users and developers can discuss
anything about NoSQLBench and support each other.
Please [join us](https://discord.gg/dBHRakusMN) there if you are a new
user of NoSQLBench!
## General Feedback
These guidelines are mirrored at the
[Submitting Feedback](https://github.com/nosqlbench/nosqlbench/wiki/Submitting-Feedback)
wiki page at the nosqlbench project site, which is also where
any `[Submit Feedback]` links should will take you.
## Bug Fixes
If you think you have found a bug, please
[file a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug)
. nosqlbench is actively used within DataStax, and verified bugs will get
attention as resources permit. Bugs reports which are more detailed, or
bug reports which include steps to reproduce will get attention first.
## Feature Requests
If you would like to see something in nosqlbench that is not there
yet,please
[submit a feature request](https://github.com/nosqlbench/nosqlbench/issues/new?labels=feature)
.
## Documentation Requests
If you would like to see a specific nosqlbench or testing topic added to
the guidebook, please
[request docs content](https://github.com/nosqlbench/nosqlbench/issues/new?labels=docs)
.

View File

@ -1,63 +0,0 @@
---
title: NoSQLBench Intro
weight: 0
---
# Welcome to NoSQLBench
Welcome to the documentation for NoSQLBench. This is a power tool that emulates real application workloads. This means
that you can fast-track performance, sizing and data model testing without writing your own testing harness.
To get started right away, jump to the
[Quick Start Example](/index.html#/docs/getting_started) from the menu on the left.
To see the ways you can get NoSQLBench, check out the project site
[DOWNLOADS.md](https://github.com/nosqlbench/nosqlbench/blob/master/DOWNLOADS.md).
## What is NoSQLBench?
NoSQLBench is a serious performance testing tool for the NoSQL ecosystem.
**NoSQLBench brings advanced testing capabilities into one tool that are not found in other testing tools.**
- You can run common testing workloads directly from the command line. You can start doing this within 5 minutes of
reading this.
- You can generate virtual data sets of arbitrary size, with deterministic data and statistically shaped values.
- You can design custom workloads that emulate your application, contained in a single file, based on statement
templates - no IDE or coding required.
- You can immediately plot your results in a docker and grafana stack on Linux with a single command line option.
- When needed, you can open the access panels and rewire the runtime behavior of NoSQLBench to do advanced testing,
including a full scripting environment with Javascript.
The core machinery of NoSQLBench has been built with attention to detail. It has been battle tested within DataStax as a
way to help users validate their data models, baseline system performance, and qualify system designs for scale.
In short, NoSQLBench wishes to be a programmable power tool for performance testing. However, it is somewhat generic. It
doesn't know directly about a particular type of system, or protocol. It simply provides a suitable machine harness in
which to put your drivers and testing logic. If you know how to build a client for a particular kind of system, EB will
let you load it like a plugin and control it dynamically.
Initially, NoSQLBench comes with support for CQL, but we would like to see this expanded with contributions from others.
## Origins
The code in this project comes from multiple sources. The procedural data generation capability was known before as
'Virtual Data Set'. The core runtime and scripting harness was from the 'EngineBlock' project. The CQL support was
previously used within DataStax. In March of 2020, DataStax and the project maintainers for these projects decided to
put everything into one OSS project in order to make contributions and sharing easier for everyone. Thus, the new
project name and structure was launched as nosqlbench.io. NoSQLBench is an independent project that is primarily
sponsored by DataStax.
We offer NoSQLBench as a new way of thinking about testing systems. It is not limited to testing only one type of
system. It is our wish to build a community of users and practice around this project so that everyone in the NoSQL
ecosystem can benefit from common concepts and understanding and reliable patterns of use.
## Scalable User Experience
NoSQLBench endeavors to be valuable to all users. We do this by making it easy for you, our user, to do just what you
need without worrying about the rest. If you need to do something simple, it should be simple to find the right settings
and just do it. If you need something more sophisticated, then you should be able to find what you need with a
reasonable amount of effort and no surprises.
That is the core design principle behind NoSQLBench. We hope you like it.

View File

@ -1,403 +0,0 @@
---
title: Activity Parameters
weight: 05
---
# Activity Parameters
Activity parameters are passed as named arguments for an activity, either
on the command line or via a scenario script. On the command line, these
take the form of
<paramname>=<paramvalue>
Some activity parameters are universal in that they can be used with any
driver type. These parameters are recognized by nosqlbench whether or not
they are recognized by a particular driver implementation. These are
called _core parameters_. Only core activity parameters are documented
here.
**NOTE:**
To see what activity parameters are valid for a given activity type, see
the documentation for that activity type with
`nb help <activity type>`.
When starting out, you want to familiarize yourself with these parameters.
The most important ones to learn about first are driver, cycles and
threads.
## driver
For historic reasons, you can also use `type`. They both mean the same
thing for now, but `driver` is more descriptive. The `type` parameter will
continue to be supported in this major version (3.x), but it will be an
error to use it in 4.x and newer.
- `driver=<activity type>`
- _default_: inferred from `alias` or `yaml` parameters, or unset
- _required_: yes, unless inferred
- _dynamic_: no
Every activity is powered by a named ActivityType. Thus, you must set
the `type` parameter. If you do not specify this parameter, it will be
inferred from a substring match against the alias and/or yaml parameters.
If there is more than one valid match for a valid type value, then you
must set the type parameter directly.
Telling nosqlbench what type of an activity will be run also determines
what other parameters are considered valid and how they will be used. So
in this way, the type parameter is actually the base parameter for any
activity. When used with scenario commands like `run` or `start`, an
activity of the named type will be initialized, and then further activity
parameters on the command line will be used to configure it before it is
started.
## alias
- `alias=<alias>`
- _default_: inferred from yaml, or 'UNSET'
- _required_: no
- _dynamic_: no
You *should* set the _alias_ parameter when you have multiple activities,
when you want to name metrics per-activity, or when you want to control
activities via scripting.
Each activity can be given a symbolic name known as an _alias_. It is good
practice to give all your activities an alias, since this determines the
named used in logging, metrics, and even scripting control.
_default value_ : The name of any provided YAML filename is used as the
basis for the default alias. Otherwise, the activity type name is used.
This is a convenience for simple test scenarios only.
## threads
- `threads=<threads>`
- _default_: 1
- _required_: no
- _dynamic_: yes
You *should* set the _threads_ parameter when you need to ramp up a
workload.
Each activity can be created with a number of threads. It is important to
adjust this setting to the system types used by nosqlbench.
_default value_ : For now, the default is simply *1*. Users must be aware
of this setting and adjust it to a reasonable value for their workloads.
`threads=auto` : When you set `threads=auto`, it will set the number of
threads to 10x the number of cores in your system. There is no distinction
here between full cores and hardware threads. This is generally a
reasonable number of threads to tap into the procesing power of a client
system.
`threads=__x` : When you set `threads=5x` or `threads=10x`, you will set
the number of threads to some multiplier of the logical CPUs in the local
system.
**NOTE:**
The threads parameter will work slightly differently for activities using
the async parameter. For example, when `async=500` is provided, then the
number of async operations is split between all configured threads, and
each thread will juggle a number of in-flight operations asynchronously.
Without the async parameter, threads determines the logical concurrency
level of nosqlbench in the classic 'request-per-thread' mode. Neither mode
is strictly correct, and both modes can be used for more accurate testing
depending on the constraints of your environment.
A good rule of thumb for setting threads for maximum effect is to set it
relatively high, such as 10XvCPU when running synchronous workloads
(when not providing the async parameter), and to 5XvCPU for all async
workloads. Variation in system dynamics make it difficult to peg an ideal
number, so experimentation is encouraged while you dial in your settings
initially.
## cycles
- `cycles=<cycle count>`
- `cycles=<cycle min>..<cycle max>`
- _default_: same as `stride`
- _required_: no
- _dynamic_: no
The cycles parameter determines the starting and ending point for an
activity. It determines the range of values which will act as seed values
for each operation. For each cycle of the test, a statement is built from
a statement template and executed as an operation.
If you do not set the cycles parameter, then it will automatically be set
to the size of the sequence. The sequence is simply the length of the op
sequence that is constructed from the active statements and ratios in your
activity YAML.
You *should* set the cycles for every activity except for schema-like
activities, or activities which you run just as a sanity check of active
statements.
In the `cycles=<cycle count>` version, the count indicates the total
number of cycles, and is equivalent to `cycles=0..<cycle max>`. In both
cases, the max value is not the actual number of the last cycle. This is
because all cycle parameters define a closed-open interval. In other
words, the minimum value is either zero by default or the specified
minimum value, but the maximum value is the first value *not* included in
the interval. This means that you can easily stack intervals over
subsequent runs while knowing that you will cover all logical cycles
without gaps or duplicates. For example, given `cycles=1000` and then
`cycles=1000..2000`, and then `cycles=2000..5K`, you know that all cycles
between 0 (inclusive) and 5000 (exclusive) have been specified.
## stride
- `stride=<stride>`
- _default_: same as op sequence length
- _required_: no
- _dynamic_: no
Usually, you don't want to provide a setting for stride, but it is still
important to understand what it does. Within nosqlbench, each time a
thread needs to allocate a set of cycles to operate on, it takes a
contiguous range of values from a shared atomic value. Thus, the stride is
the unit of micro-batching within nosqlbench. It also means that you can
use stride to optimize a workload by setting the value higher than the
default. For example if you are running a single-statement workload at a
very high rate, it doesn't make sense for threads to allocate one op at a
time from a shared atomic value. You can simply set
`stride=1000` to cause (ballpark estimation) about 1000X less internal
contention.
The stride is initialized to the calculated sequence length. The sequence
length is simply the number of operations in the op sequence that is
planned from your active statements and their ratios.
You usually do not want to set the stride directly. If you do, make sure
it is a multiple of what it would normally be set to if you need to ensure
that sequences are not divided up differently. This can be important when
simulating the access patterns of applications.
**NOTE:**
When simulating multi-op access patterns in non-async mode, the stride
metric can tell you how long it took for a whole group of operations to
complete.
## async
- `async=<ops>`
- _default_: unset
- _required_: no
- _dynamic_: no
The `async=<ops>` parameter puts an activity into an asynchronous dispatch
mode and configures each thread to juggle a proportion of the operations
specified. If you specify `async=500 threads=10`, then each of 10 threads
will manage execution of 50 operations at a time. With async mode, a
thread will always prepare and send operations if there are fewer in
flight than it is allotted before servicing any pending responses.
Async mode also puts threads into a different sequencing behavior. When in
async mode, responses from an operation may arrive in a different order
than they are sent, and thus linearized operations can't be guaranteed as
with the non-async mode. This means that sometimes you use want to avoid
async mode when you are intentionally simulating access patterns with
multiple linearized operations per user as you may see in your
application.
The absence of the async parameter leaves the activity in the default
non-async mode, where each thread works through a sequence of ops one
operation at a time.
## cyclerate
- `cyclerate=<cycle_per_second>`
- `cyclerate=<cycles_per_second>,<burst_ratio>`
- _default_: unset
- _required_: no
- _dynamic_: yes
The cyclerate parameter sets a maximum op rate for individual cycles
within the activity, across the whole activity, irrespective of how many
threads are active.
**NOTE:**
The cyclerate is a rate limiter, and can thus only throttle an activity to
be slower than it would otherwise run. Rate limiting is also an invasive
element in a workload, and will always come at a cost. For extremely high
throughput testing, consider carefully whether your testing would benefit
more from concurrency-based throttling as with async or the striderate
described below.
When the cyclerate parameter is provided, two additional metrics are
tracked: the wait time and the response time. See the 'Reference|Timing
Terms' section for more details on these metrics.
_default_: None. When the cyclerate parameter is not provided, an activity
runs as fast as it can given how fast operations can complete.
Examples:
- `cyclerate=1000` - set the cycle rate limiter to 1000 ops/s and a
default burst ratio of 1.1.
- `cyclerate=1000,1.0` - same as above, but with burstrate set to 1.0
(use it or lose it, not usually desired)
- `cyclerate=1000,1.5` - same as above, with burst rate set to 1.5 (aka
50% burst allowed)
Synonyms:
- `rate`
- `targetrate`
### burst ratio
This is only an optional part of the cyclerate as shown in examples above.
If you do not specify it when you initialize a cyclerate, then it defaults
1.1. The burst ratio is only valid as part of a rate limit and can not be
specified by itself.
* _default_: `1.1`
* _dynamic_: yes
The nosqlbench rate limiter provides a sliding scale between strict rate
limiting and average rate limiting. The difference between them is
controlled by a _burst ratio_ parameter. When the burst ratio is 1.0
(burst up to 100% relative rate), the rate limiter acts as a strict rate
limiter, disallowing faster operations from using time that was previously
forfeited by prior slower operations. This is a "use it or lose it" mode
that means things like GC events can steal throughput from a running
client as a necessary effect of losing time in a strict timing sense.
When the burst ratio is set to higher than 1.0, faster operations may
recover lost time from previously slower operations. For example, a burst
ratio of 1.3 means that the rate limiter will allow bursting up to 130% of
the base rate, but only until the average rate is back to 100% relative
speed. This means that any valleys created in the actual op rate of the
client can be converted into plateaus of throughput above the strict rate,
but only at a speed that fits within (op rate * burst ratio). This allows
for workloads to approximate the average target rate over time, with
controllable bursting rates. This ability allows for near-strict behavior
while allowing clients to still track truer to rate limit expectations, so
long as the overall workload is not saturating resources.
**NOTE:**
The default burst ratio of 1.1 makes testing results slightly more stable
on average, but can also hide some short-term slow-downs in system
throughput. It is set at the default to fit most tester's expectations for
averaging results, but it may not be strict enough for your testing
purposes. However, a strict setting of 1.0 nearly always adds cold/startup
time to the result, so if you are testing for steady state, be sure to
account for this across test runs.
## striderate
- `striderate=<strides per second>`
- `striderate=<strides per second>,<burst_ratio>`
- _default_: unset
- _required_: no
- _dynamic_: yes
The `striderate` parameter allows you to limit the start of a stride
according to some rate. This works almost exactly like the cyclerate
parameter, except that it blocks a whole group of operations from starting
instead of a single operation. The striderate can use a burst ratio just
as the cyclerate.
This sets the target rate for strides. In nosqlbench, a stride is a group
of operations that are dispatched and executed together within the same
thread. This is useful, for example, to emulate application behaviors in
which some outside request translates to multiple internal requests. It is
also a way to optimize a client runtime for more efficiency and
throughput. The stride rate limiter applies to the whole activity
irrespective of how many threads it has.
**WARNING:**
When using the cyclerate an striderate options together, operations are
delayed based on both rate limiters. If the relative rates are not
synchronised with the side of a stride, then one rate limiter will
artificially throttle the other. Thus, it usually doesn't make sense to
use both of these settings in the same activity.
## seq
- `seq=<bucket|concat|interval>`
- _default_: `seq=bucket`
- _required_: no
- _dynamic_: no
The `seq=<bucket|concat|interval>` parameter determines the type of
sequencing that will be used to plan the op sequence. The op sequence is a
look-up-table that is used for each stride to pick statement forms
according to the cycle offset. It is simply the sequence of statements
from your YAML that will be executed, but in a pre-planned, and highly
efficient form.
An op sequence is planned for every activity. With the default ratio on
every statement as 1, and the default bucket scheme, the basic result is
that each active statement will occur once in the order specified. Once
you start adding ratios to statements, the most obvious thing that you
might expect wil happen: those statements will occur multiple times to
meet their ratio in the op mix. You can customize the op mix further by
changing the seq parameter to concat or interval.
**NOTE:**
The op sequence is a look up table of statement templates, *not*
individual statements or operations. Thus, the cycle still determines the
uniqueness of an operation as you would expect. For example, if statement
form ABC occurs 3x per sequence because you set its ratio to 3, then each
of these would manifest as a distinct operation with fields determined by
distinct cycle values.
There are three schemes to pick from:
### bucket
This is a round robin planner which draws operations from buckets in
circular fashion, removing each bucket as it is exhausted. For example,
the ratios A:4, B:2, C:1 would yield the sequence A B C A B A A. The
ratios A:1, B5 would yield the sequence A B B B B B.
### concat
This simply takes each statement template as it occurs in order and
duplicates it in place to achieve the ratio. The ratios above (A:4, B:2,
C:1) would yield the sequence A A A A B B C for the concat sequencer.
### interval
This is arguably the most complex sequencer. It takes each ratio as a
frequency over a unit interval of time, and apportions the associated
operation to occur evenly over that time. When two operations would be
assigned the same time, then the order of appearance establishes
precedence. In other words, statements appearing first win ties for the
same time slot. The ratios A:4 B:2 C:1 would yield the sequence A B C A A
B A. This occurs because, over the unit interval (0.0,1.0), A is assigned
the positions `A: 0.0, 0.25, 0.5, 0.75`, B is assigned the
positions `B: 0.0, 0.5`, and C is assigned position `C: 0.0`. These
offsets are all sorted with a position-stable sort, and then the
associated ops are taken as the order.
In detail, the rendering appears
as `0.0(A), 0.0(B), 0.0(C), 0.25(A), 0.5(A), 0.5(B), 0.75(A)`, which
yields `A B C A A B A` as the op sequence.
This sequencer is most useful when you want a stable ordering of operation
from a rich mix of statement types, where each operations is spaced as
evenly as possible over time, and where it is not important to control the
cycle-by-cycle sequencing of statements.
## hdr_digits
- `hdr_digits=3`
- _default_: `4`
- _required_: no
- _dynamic_: no
This parameter determines the number of significant digits used in all HDR
histograms for metrics collected from this activity. The default of 4
allows 4 significant digits, which means *up to* 10000 distinct histogram
buckets per named metric, per histogram interval. This does not mean that
there _will be_ 10000 distinct buckets, but it means there could be if
there is significant volume and variety in the measurements.
If you are running a scenario that creates many activities, then you can
set `hdr_digits=1` on some of them to save client resources.

View File

@ -1,113 +0,0 @@
---
title: Advanced Metrics
---
# Advanced Metrics
## Unit of Measure
All metrics collected from activities are recorded in nanoseconds and ops per second. All histograms are recorded with 4
digits of precision using HDR histograms.
## Metric Outputs
Metrics from a scenario run can be gathered in multiple ways:
- In the log output
- In CSV files
- In HDR histogram logs
- In Histogram Stats logs (CSV)
- To a monitoring system via graphite
- via the --docker-metrics option
With the exception of the `--docker-metrics` approach, these forms may be combined and used in combination. The command
line options for enabling these are documented in the built-in help, although some examples of these may be found below.
## Metrics via Graphite
If you like to have all of your testing data in one place, then you may be interested in reporting your measurements to
a monitoring system. For this, nosqlbench includes a
[Metrics Library](https://github.com/dropwizard/metrics). Graphite reporting is baked in as the default reporter.
In order to enable graphite reporting, use one of these options formats:
--report-graphite-to <host>
--report-graphite-to <host>:<port>
## Metric Naming
## Prefix
Core metrics use the prefix _nosqlbench_ by default. You can override this with the ``--metrics-prefix` option:
--metrics-prefix myclient.group5
## Identifiers
Metrics associated with a specific activity will have the activity alias in their name. There is a set of core metrics
which are always present regardless of the activity type. The names and types of additional metrics provided for each
activity type vary.
Sometimes, an activity type will expose metrics on a per statement basis, measuring over all invocations of a given
statement as defined in the YAML. In these cases, you will see `--` separating the name components of the metric. At the
most verbose, a metric name could take on the form like
`<activity>.<docname>--<blockname>--<statementname>--<metricname>`, although this is rare when you name your statements,
which is recommended. Just keep in mind that the double dash connects an activity's alias with named statements *within*
that activity.
## HDR Histograms
### Recording HDR Histogram Logs
You can record details of histograms from any compatible metric (histograms and timers) with an option like this:
--log-histograms hdrdata.log
If you want to record only certain metrics in this way, then use this form:
--log-histograms 'hdrdata.log:.*suffix'
Notice that the option is enclosed in single quotes. This is because the second part of the option value is a regex. The
'.*suffix' pattern matches any metric name that ends with "suffix". Effectively, leaving out the pattern is the same as
using '.\*', which matches all metrics. Any valid regex is allowed here.
Metrics may be included in multiple logs, but care should be taken not to overdo this. Keeping higher fidelity histogram
reservoirs does come with a cost, so be sure to be specific in what you record as much as possible.
If you want to specify the recording interval, use this form:
--log-histograms 'hdrdata.log:.*suffix:5s'
If you want to specify the interval, you must use the third form above, although it is valid to leave the pattern empty,
such as 'hdrdata.log::5s'.
Each interval specified will be tracked in a discrete reservoir in memory, so they will not interfere with each other in
terms of accuracy.
### Recording HDR Histogram Stats
You can also record basic snapshots of histogram data on a periodic interval just like above with HDR histogram logs.
The option to do this is:
--log-histostats 'hdrstats.log:.*suffix:10s'
Everything works the same as for hdr histogram logging, except that the format is in CSV as shown in the example below:
~~~
#logging stats for session scenario-1479089852022
#[Histogram log format version 1.0]
#[StartTime: 1479089852.046 (seconds since epoch), Sun Nov 13 20:17:32 CST 2016]
#Tag,Interval_Start,Interval_Length,count,min,p25,p50,p75,p90,p95,p98,p99,p999,p9999,max
Tag=diag1.delay,0.457,0.044,1,16,31,31,31,31,31,31,31,31,31,31
Tag=diag1.cycles,0.48,0.021,31,4096,8191,8191,8191,8191,8191,8191,8191,8191,8191,2097151
Tag=diag1.delay,0.501,0.499,1,1,1,1,1,1,1,1,1,1,1,1
Tag=diag1.cycles,0.501,0.499,498,1024,2047,2047,4095,4095,4095,4095,4095,4095,4095,4194303
...
~~~
This includes the metric name (Tag), the interval start time and length (from the beginning of collection time), number
of metrics recorded (count), minimum magnitude, a number of percentile measurements, and the maximum value. Notice that
the format used is similar to that of the HDR logging, although instead of including the raw histogram data, common
percentiles are recorded directly.

View File

@ -1,124 +0,0 @@
---
title: CLI Scripting
---
# Command-Line Scripting
Sometimes you want to to run a set of workloads in a particular order, or call other specific test setup logic in
between phases or workloads. While the full scripting environment allows you to do this and more, it is not necessary to
write javascript for every scenario.
For more basic setup and sequencing needs, you can achive a fair degree of flexibility on the command line. A few key
API calls are supported directly on the command line. This guide explains each of them, what the do, and how to use them
together.
## Script Construction
As the command line is parsed, from left to right, the scenario script is built in an internal scripting buffer. Once
the command line is fully parsed, this script is executed. Each of the commands below is effectively a macro for a
snippet of script. It is important to remember that order is important.
## Command line format
Newlines are not allowed when building scripts from the command line. As long as you follow the allowed forms below, you
can simply string multiple commands together with spaces between. As usual, single word options without double dashes
are commands, key=value style parameters apply to the previous command, and all other commands with
--this-style
are non-scripting options.
## Concurrency & Control
All activities that run during a scenario run under the control of, but independently from the scenario script. This
means that you can have a number of activities running while the scenario script is doing its own thing. The scenario
only completes when both the scenario script and the activities are finished.
### start an activity
`start driver=<activity type> alias=<alias> ...`
You can start an activity with this command. At the time this command is evaluated, the activity is started, and the
script continues without blocking. This is an asynchronous start of an activity. If you start multiple activities in
this way, they will run concurrently.
The type argument is required to identify the activity type to run. The alias parameter is not strictly required, unless
you want to be able to interact with the started activity later. In any case, it is a good idea to name all your
activities with a meaningful alias.
### stop an activity
`stop <alias>`
Stop an activity with the given alias. This is synchronous, and causes the scenario to pause until the activity is
stopped. This means that all threads for the activity have completed and signalled that they're in a stopped state.
You can stop a single activity alias, or any which match a regex pattern. If the value provided
contains spaces, semicolons or commas, then it is split into words and each word is used as if
the stop command had been called on it in turn.
If the alias name you supply follows matches the regular expression `[a-zA-Z_][a-zA-Z0-9_.]*`, i.e. an alphanumeric
name with dots or underscores and no leading digit, then it is taken as a literal name and only
matches the same literal alias. However, if there are any other characters, it is presumed to be
a regular expression and matched as such against all currently running activities.
### await an activity
`await <alias>`
Await the normal completion of an activity with the given alias. This causes the scenario script to pause while it waits
for the named activity to finish. This does not tell the activity to stop. It simply puts the scenario script into a
paused state until the named activity is complete.
### run an activity
`run driver=<activity type> alias=<alias> ...`
Run an activity to completion, waiting until it is complete before continuing with the scenario script. It is
effectively the same as
start driver=<activity type> ... alias=<alias>
await <alias>
### wait millis
`waitmillis <milliseconds>`
Pause the scenario script for this many milliseconds. This is useful for controlling workload run duration, etc.
### add a script
`script <script file>`
Add the contents of the named file to the scenario script buffer.
### add a fragment
`fragment <script text>`
Add the contents of the next argument to the scenario script buffer. If the last character is a
semicolon, then a newline is also added immediately after.
# An example CLI script
~~~text
./nb \
start driver=stdout alias=a cycles=100K workload=cql-iot tags=block:main\
start driver=stdout alias=b cycles=200K workload=cql-iot tags=block:main\
waitmillis 10000 \
await one \
stop two
~~~
in this CLI script, the backslashes are necessary in order keep everything on the same command line. Here is a narrative
of what happens when it is run.
1. An activity named 'a' is started, with 100K cycles of work.
2. An activity named 'b' is started, with 200K cycles of work.
3. While these activities run, the scenario script waits for ten seconds.
4. If a is complete, the await returns immediately. If not, the
script waits for a to complete its 100K cycles.
5. b is immediately stopped.
6. Because all activities are stopped or complete, and the script is complete, the scenario exits.

View File

@ -1,250 +0,0 @@
---
title: NoSQLBench CLI Options
weight: 01
---
# The NoSQLBench Command Line
This is the same documentation you get in markdown format with the
`nb --help` command.
---------------------------------------
### Command-Line Options ###
Help ( You're looking at it. )
--help
Short options, like '-v' represent simple options, like verbosity. Using multiples increases the
level of the option, like '-vvv'.
Long options, like '--help' are top-level options that may only be used once. These modify general
behavior, or allow you to get more details on how to use nosqlbench.
All other options are either commands, or named arguments to commands. Any single word without
dashes is a command that will be converted into script form. Any option that includes an equals sign
is a named argument to the previous command. The following example is a commandline with a command *
start*, and two named arguments to that command.
./nb start driver=diag alias=example
### Discovery options ###
These options help you learn more about running nosqlbench, and about the plugins that are
present in your particular version.
Get a list of additional help topics that have more detailed documentation:
./nb help topics
Provide specific help for the named activity type:
./nb help <activity type>
List the available drivers:
--list-drivers
List the available scenarios:
--list-scenarios
List only the available workloads which contain the above scenarios:
--list-workloads
Copy a workload or other file to your local directory as a starting point:
--copy <name>
Provide the metrics that are available for scripting
--list-metrics <activity type> [ <activity name> ]
### Execution Options ###
This is how you actually tell nosqlbench what scenario to run. Each of these commands appends
script logic to the scenario that will be executed. These are considered as commands, can occur in any order and
quantity. The only rule is that arguments in the arg=value form will apply to the preceding script
or activity.
Add the named script file to the scenario, interpolating named parameters:
script <script file> [arg=value]...
Add the named activity to the scenario, interpolating named parameters
activity [arg=value]...
### General options ###
These options modify how the scenario is run.
Specify a directory for scenario log files:
--logs-dir <dirname>
Specify a limit on logfiles (old files will be purged):
--logs-max <count>
Specify the priority level of file logs:
--logs-level <level>
where `<level>` can be one of OFF, ERROR, WARN, INFO, DEBUG, TRACE, or ALL
Specify an override for one or more classes:
--log-level-override com.foobarbaz:DEBUG,com.barfoobaz:TRACE
Specify the logging pattern for console and logfile:
--logging-pattern '%date %level [%thread] %logger{10} [%file:%line] %msg%n'
--logging-pattern 'TERSE'
Specify the logging pattern for console only:
--console-pattern '%date %level [%thread] %logger{10} [%file:%line] %msg%n'
--console-pattern 'TERSE-ANSI'
Specify the logging pattern for logfile only:
--logfile-pattern '%date %level [%thread] %logger{10} [%file:%line] %msg%n'
--logfile-pattern 'VERBOSE'
# See https://logging.apache.org/log4j/2.x/manual/layouts.html#Pattern_Layout
# These shortcuts are allowed
TERSE %8r %-5level [%t] %-12logger{0} %msg%n%throwable
VERBOSE %d{DEFAULT}{GMT} [%t] %logger %-5level: %msg%n%throwable
TERSE-ANSI %8r %highlight{%-5level} %style{%C{1.} [%t] %-12logger{0}} %msg%n%throwable
VERBOSE-ANSI %d{DEFAULT}{GMT} [%t] %highlight{%logger %-5level}: %msg%n%throwable
# ANSI variants are auto promoted for console if --ansi=enable
# ANSI variants are auto demoted for logfile in any case
Explicitly enable or disable ANSI logging support:
(ANSI support is enabled if the TERM environment variable is defined)
--ansi=enabled
--ansi=disabled
Specify a directory and enable CSV reporting of metrics:
--report-csv-to <dirname>
Specify the graphite destination and enable reporting
--report-graphite-to <addr>[:<port>]
Specify the interval for graphite or CSV reporting in seconds:
--report-interval 10
Specify the metrics name prefix for graphite reporting:
--metrics-prefix <metrics-prefix>
Log all HDR histogram data to a file:
--log-histograms histodata.log
--log-histograms 'histodata.log:.*'
--log-histograms 'histodata.log:.*:1m'
--log-histograms 'histodata.log:.*specialmetrics:10s'
Log HDR histogram stats to a CSV file:
--log-histostats stats.csv
--log-histostats 'stats.csv:.*' # same as above
--log-histostats 'stats.csv:.*:1m' # with 1-minute interval
--log-histostats 'stats.csv:.*specialmetrics:10s'
Adjust the HDR histogram precision:
--hdr-digits 3
The default is 3 digits, which creates 1000 equal-width histogram buckets for every named metric in
every reporting interval. For longer running test or for test which require a finer grain of
precision in metrics, you can set this up to 4 or 5. Note that this only sets the global default.
Each activity can also override this value with the hdr_digits parameter. Be aware that each
increase in this number multiples the amount of detail tracked on the client by 10x, so use
caution.
Adjust the progress reporting interval:
--progress console:1m
or
--progress logonly:5m
NOTE: The progress indicator on console is provided by default unless logging levels are turned up
or there is a script invocation on the command line.
If you want to add in classic time decaying histogram metrics for your histograms and timers, you
may do so with this option:
--classic-histograms prefix
--classic-histograms 'prefix:.*' # same as above
--classic-histograms 'prefix:.*specialmetrics' # subset of names
Name the current session, for logfile naming, etc By default, this will be "scenario-TIMESTAMP", and
a logfile will be created for this name.
--session-name <name>
Enlist nosqlbench to stand up your metrics infrastructure using a local docker runtime:
--docker-metrics
When this option is set, nosqlbench will start graphite, prometheus, and grafana automatically on
your local docker, configure them to work together, and point nosqlbench to send metrics the system
automatically. It also imports a base dashboard for nosqlbench and configures grafana snapshot
export to share with a central DataStax grafana instance (grafana can be found on localhost:3000
with the default credentials admin/admin).
### Console Options ###
Increase console logging levels: (Default console logging level is *warning*)
-v (info)
-vv (debug)
-vvv (trace)
--progress console:1m (disables itself if -v options are used)
These levels affect *only* the console output level. Other logging level parameters affect logging
to the scenario log, stored by default in logs/...
Show version, long form, with artifact coordinates.
--version
### Summary Reporting
The classic metrics logging format is used to report results into the logfile for every scenario.
This format is not generally human-friendly, so a better summary report is provided by default to
the console and/or a specified summary file by default.
Examples:
# report to console if session ran more than 60 seconds
--report-summary-to stdout:60
# report to auto-named summary file for every session
--report-summary-to _LOGS_/_SESSION_.summary
# do both (the default)
--report-summary-to stdout:60,_LOGS_/_SESSION_.summary
Values of `stdout` or `stderr` are send summaries directly to the console, and any other pattern is
taken as a file name.
You can use `_SESSION_` and `_LOGS_` to automatically name the file according to the current session
name and log directory.
The reason for the optional timing parameter is to allow for results of short scenario runs to be
squelched. Metrics for short runs are not generally accurate nor meaningful. Spamming the console
with boiler-plate in such cases is undesirable. If the minimum session length is not specified, it
is assumed to be 0, meaning that a report will always show on that channel.

View File

@ -1,115 +0,0 @@
---
title: Core Statement Params
weight: 06
---
# Core Statement Parameters
Some statement parameters are recognized by the nosqlbench runtime and can
be used on any statement in a YAML file.
## *ratio*
A statement parameter called _ratio_ is supported by every workload. It
can be attached to a statement, or a block or a document level parameter
block. It sets the relative ratio of a statement in the op sequence before
an activity is started.
When an activity is initialized, all of the active statements are combined
into a sequence based on their relative ratios. By default, all statement
templates are initialized with a ratio of 1 if non is specified by the
user.
For example, consider the statements below:
```yaml
statements:
- s1: "select foo,bar from baz where ..."
ratio: 1
- s2: "select bar,baz from foo where ..."
ratio: 2
- s3: "select baz,foo from bar where ..."
ratio: 3
```
If all statements are activated (there is no tag filtering), then the
activity will be initialized with a sequence length of 6. In this case,
the relative ratio of statement "s3" will be 50% overall. If you filtered
out the first statement, then the sequence would be 5 operations long. In
this case, the relative ratio of statement "s3" would be 60% overall. It
is important to remember that statement ratios are always relative to the
total sum of the active statements' ratios.
**NOTE:**
Because the ratio works so closely with the activity parameter `seq`, the
description for that parameter is include below.
### *seq* (activity level - do not use on statements)
- `seq=<bucket|concat|interval>`
- _default_: `seq=bucket`
- _required_: no
- _dynamic_: no
The `seq=<bucket|concat|interval>` parameter determines the type of
sequencing that will be used to plan the op sequence. The op sequence is a
look-up-table that is used for each stride to pick statement forms
according to the cycle offset. It is simply the sequence of statements
from your YAML that will be executed, but in a pre-planned, and highly
efficient form.
An op sequence is planned for every activity. With the default ratio on
every statement as 1, and the default bucket scheme, the basic result is
that each active statement will occur once in the order specified. Once
you start adding ratios to statements, the most obvious thing that you
might expect wil happen: those statements will occur multiple times to
meet their ratio in the op mix. You can customize the op mix further by
changing the seq parameter to concat or interval.
**NOTE:**
The op sequence is a look up table of statement templates, *not*
individual statements or operations. Thus, the cycle still determines the
uniqueness of an operation as you would expect. For example, if statement
form ABC occurs 3x per sequence because you set its ratio to 3, then each
of these would manifest as a distinct operation with fields determined by
distinct cycle values.
There are three schemes to pick from:
### bucket
This is a round robin planner which draws operations from buckets in
circular fashion, removing each bucket as it is exhausted. For example,
the ratios A:4, B:2, C:1 would yield the sequence A B C A B A A. The
ratios A:1, B5 would yield the sequence A B B B B B.
### concat
This simply takes each statement template as it occurs in order and
duplicates it in place to achieve the ratio. The ratios above (A:4, B:2,
C:1) would yield the sequence A A A A B B C for the concat sequencer.
### interval
This is arguably the most complex sequencer. It takes each ratio as a
frequency over a unit interval of time, and apportions the associated
operation to occur evenly over that time. When two operations would be
assigned the same time, then the order of appearance establishes
precedence. In other words, statements appearing first win ties for the
same time slot. The ratios A:4 B:2 C:1 would yield the sequence A B C A A
B A. This occurs because, over the unit interval
(0.0,1.0), A is assigned the positions `A: 0.0, 0.25, 0.5, 0.75`, B is
assigned the positions `B: 0.0, 0.5`, and C is assigned position `C: 0.0`.
These offsets are all sorted with a position-stable sort, and then the
associated ops are taken as the order.
In detail, the rendering appears
as `0.0(A), 0.0(B), 0.0(C), 0.25(A), 0.5(A), 0.5(B), 0.75(A)`, which
yields `A B C A A B A` as the op sequence.
This sequencer is most useful when you want a stable ordering of operation
from a rich mix of statement types, where each operations is spaced as
evenly as possible over time, and where it is not important to control the
cycle-by-cycle sequencing of statements.

View File

@ -1,28 +0,0 @@
---
title: Grafana Metrics
weight: 2
---
# Grafana Metrics
NoSQLBench comes with a built-in helper to get you up and running quickly
with client-side testing metrics. This functionality is based on docker,
and a built-in method for bringing up a docker stack, automated by
NoSQLBench.
**WARNING:**
This feature requires that you have docker running on the local system and
that your user is in a group that is allowed to manage docker. Using
the `--docker-metrics` command *will* attempt to manage docker on your
local system.
To ask nosqlbench to stand up your metrics infrastructure using a local
docker runtime, use this command line option with any other nosqlbench
commands:
--docker-metrics
When this option is set, nosqlbench will start graphite, prometheus, and grafana automatically on your local docker,
configure them to work together, and to send metrics the system automatically. It also imports a base dashboard for
nosqlbench and configures grafana snapshot export to share with a central DataStax grafana instance (grafana can be
found on localhost:3000 with the default credentials admin/admin).

View File

@ -1,8 +0,0 @@
---
title: Reference
weight: 90
---
# Reference Section
This section contains additional reference details across a range of nosqlbench topics.

View File

@ -1,72 +0,0 @@
---
title: Parameter Types
weight: 03
---
# Parameter Types
To configure a NoSQLBench activity to do something meaningful, you have to provide parameters to it. This can occur in
one of several ways. This section is a guide on NoSQLBench parameters, how they layer together, and when to use one form
over another.
The command line is used to configure both the overall runtime (logging, etc) as well as the individual activities and
scripts. Global options can be distinguished from scenario commands and their parameters because global options always
start with a -single or --double-hyphen.
## Activity Parameters
Parameters for an activity always have the form of `<name>=<value>` on the command line. Activity parameters *must*
follow a command, such as `run` or `start`, for example. Scenario commands are always single words without any leading
hyphens. Every command-line argument that follows a scenario command in the form of `<name>=<value>` is a parameter to
that command.
Activity parameters can be provided by the core runtime or they can be provided by the activity type. It's not important
where they are provided from so long as you know what they do for your workloads, how to configure them, and where to
find the docs. Core parameters are documented
*Core* Parameters are those provided by the core runtime. They are part of the core API and used by every
activity type. Core activity params include type*, *alias*, and *threads*, for example. These parameters are explained
individually under the next section.
*Driver* Parameters are those provided by an activity type. These parameters are documented for each activity type. You
can see them by running `nb help <activity type>`.
Driver parameters may be dynamic. *Dynamic* Activity Parameters are parameters which may be changed while an activity is
running. This means that scenario scripting logic may change some variables while an activity is running, and that the
runtime should dynamically adjust to match. Dynamic parameters are mainly used in more advanced scripting scenarios.
Parameters that are dynamic should be documented as such in the respective activity type's help page.
### Template Parameters
If you need to provide general-purpose overrides to a named section of the standard YAML, then you may use a mechanism
called _template parameters_. These are just like activity parameters, but they are set via macro and cna have defaults.
This is a YAML format feature that allows you to easily template workload properties in a way that is easy to override
on the command line or via scripting. More details on template parameters are shared under 'Designing Workloads|Template
Params'.
### Parameter Loading
Now that we've described all the parameter types, let's tie them together. When an activity is loaded from the command
line or script, the parameters are resolved in the following order:
1. The `type` parameter tells nosqlbench which activity type implementation to load.
2. The activity type implementation creates an activity.
3. The activity is initialized with the parameters provided.
4. The yaml parameter is used to load the workload definition into
a buffer without parsing the YAML.
5. Any template parameters in the file in `<<varname:default value>>` or `TEMPLATE(varname,default value)` form are resolved, taking override values from the provided params.
6. Finally, the activity is started.
## Statement Parameters
Some activities make use of parameters for statements. These are called _statement parameters_ and are completely
different than _activity parameters_. Statement parameters in a YAML allow you to affect *how* a statement is used in a
workload. Just as with activity level parameters, statement parameters may be supported by the core runtime or by an
activity type. These are also documented in the respective activity type's documentation included in the 'Activity
Types' section.
The core statement parameters are explained just below the core activity parameters in this section.

View File

@ -1,137 +0,0 @@
---
title: Scenario Scripting
---
# Scenario Scripting
## Motive
The NoSQLBench runtime is a combination of a scripting sandbox and a workload execution machine. This is not accidental.
With this particular arrangement, it should be possible to build sophisticated tests across a variety of scenarios. In
particular, logic which can observe and react to the system under test can be powerful. With this approach, it becomes
possible to break away from the conventional run-interpret-adjust cycle which is all too often done by human hands.
## Machinery, Controls & Instruments
All of the heavy lifting is left to Java and the core nosqlbench runtime. This includes the iterative workloads that are
meant to test the target system. This is combined with a control layer which is provided by Nashorn and eventually
GraalVM. This division of responsibility allows the high-level test logic to be "script" and the low-level activity
logic to be "machinery". While the scenario script has the most control, it also is the least busy relative to activity
workloads. The net effect is that you have the efficiency of the iterative test loads in conjunction with the open
design palette of a first-class scripting language.
Essentially, the ActivityType drivers are meant to handle the workload-specific machinery. They also provide dynamic
control points and parameters which special to that activity type (driver). This exposes a full feedback loop between a
running scenario script and the activities that it runs. The scenario is free to read the performance metrics from a
running activity and make changes to it on the fly.
## Scripting Environment
The nosqlbench scripting environment provided has a few modifications meant to streamline understanding and usage of
nosqlbench dynamic parameters and metric.
### Active Bindings
Active bindings are control variables which, when assigned to, cause an immediate change in the behavior of the runtime.
Each of the variables below is pre-wired into each script environment.
#### scenario
This is the __Scenario Controller__ object which manages the activity executors in the runtime. All the methods on this
Java type are provided to the scripting environment directly.
#### activities.&lt;alias&gt;.&lt;paramname&gt;
Each activity parameter for a given activity alias is available at this name within the scripting environment. Thus, you
can change the number of threads on an activity named foo (alias=foo) in the scripting environment by assigning a value
to it as in `activities.foo.threads=3`. Any assignments take effect synchronously before the next line of the script
continues executing.
#### __metrics__.&lt;alias&gt;.&lt;metric name&gt;
Each activity metric for a given activity alias is available at this name. This gives you access to the metrics objects
directly. Some metrics objects have also been enhanced with wrapper logic to provide simple getters and setters, like
`.p99ms` or `.p99ns`, for example.
Interaction with the nosqlbench runtime and the activities therein is made
easy by the above variables and objects. When an assignment is made to any
of these variables, the changes are propagated to internal listeners. For
changes to
_threads_, the thread pool responsible for the affected activity adjusts
the number of active threads (AKA slots). Other changes are further
propagated directly to the thread harnesses and components which implement
the ActivityType.
**WARNING:**
Assignment to the _workload_ and _alias_ activity parameters has no
special effect, as you can't change an activity to a different driver once
it has been created.
You can make use of more extensive Java or Javascript libraries as needed,
mixing then with the runtime controls provided above.
## Enhanced Metrics for Scripting
The metrics available in nosqlbench are slightly different than the standard kit with dropwizard metrics. The key
differences are:
### HDR Histograms
All histograms use HDR histograms with *four* significant digits.
All histograms reset on snapshot, automatically keeping all data until you report the snapshot or access the snapshot
via scripting. (see below).
The metric types that use histograms have been replaced with nicer version for scripting. You don't have to do anything
differently in your reporter config to use them. However, if you need to use the enhanced versions in your local
scripting, you can. This means that Timer and Histogram types are enhanced. If you do not use the scripting extensions,
then you will automatically get the standard behavior that you are used to, only with higher-resolution HDR and full
snapshots for each report to your downstream metrics systems.
### Scripting with Delta Snapshots
For both the timer and the histogram types, you can call getDeltaReader(), or access it simply as
&lt;metric&gt;.deltaReader. When you do this, the delta snapshotting behavior is maintained until you use the
deltaReader to access it. You can get a snapshot from the deltaReader by calling getDeltaSnapshot(10000), which causes
the snapshot to be reset for collection, but retains a cache of the snapshot for any other consumer of getSnapshot() for
that duration in milliseconds. If, for example, metrics reporters access the snapshot in the next 10 seconds, the
reported snapshot will be exactly what was used in the script.
This is important for using local scripting methods and calculations with aggregate views downstream. It means that the
histograms will match up between your local script output and your downstream dashboards, as they will both be using the
same frame of data, when done properly.
### Histogram Convenience Methods
All histogram snapshots have additional convenience methods for accessing every percentile in (P50, P75, P90, P95, P98,
P99, P999, P9999) and every time unit in (s, ms, us, ns). For example, getP99ms() is supported, as is getP50ns(), and
every other possible combination. This means that you can access the 99th percentile metric value in your scripts for
activity _foo_ as _metrics.foo.cycles.snapshot.p99ms_.
## Control Flow
When a script is run, it has absolute control over the scenario runtime while it is active. Once the script reaches its
end, however, it will only exit if all activities have completed. If you want to explicitly stop a script, you must stop
all activities.
## Strategies
You can use nosqlbench in the classic form with `run driver=<activity_type> param=value ...` command line syntax. There
are reasons, however, that you will sometimes want customize and modify your scripts directly, such as:
- Permute test variables to cover many sub-conditions in a test.
- Automatically adjust load factors to identify the nominal capacity of a system.
- Adjust rate of a workload in order to get a specific measurement of system behavior.
- React to changes in test or target system state in order to properly sequence a test.
## Script Input & Output
Internal buffers are kept for _stdin_, _stdout_, and _stderr_ for the scenario script execution. These are logged to the
logfile upon script completion, with markers showing the timestamp and file descriptor (stdin, stdout, or stderr) that
each line was recorded from.
## External Docs
- [Java Platform, Standard Edition Nashorn User's Guide (Java 8)](https://docs.oracle.com/javase/8/docs/technotes/guides/scripting/nashorn/api.html)
- [Nashorn extensions on OpenJDK Wiki](https://wiki.openjdk.java.net/display/Nashorn/Nashorn+extensions)
- [Scripting for the Java (8) Platform](http://docs.oracle.com/javase/8/docs/technotes/guides/scripting/)

View File

@ -1,38 +0,0 @@
---
title: Standard Metrics
---
# Standard Metrics
nosqlbench comes with a set of standard metrics that will be part of every activity type (driver). Each activity type
(driver) enhances the metrics available by adding their own metrics with the nosqlbench APIs. This section explains what
the standard metrics are, and how to interpret them.
## read-input
Within nosqlbench, a data stream provider called an _Input_ is responsible for providing the actual cycle number that
will be used by consumer threads. Because different _Input_ implementations may perform differently, a separate metric
is provided to track the performance in terms of client-side overhead. The **read-input** metric is a timer that only
measured the time it takes for a given activity thread to read the input value, nothing more.
## strides
A stride represents the work-unit for a thread within nosqlbench. It allows a set of cycles to be logically grouped
together for purposes of optimization -- or in some cases -- to simulate realistic client-side behavior over multiple
operations. The stride is the number of cycles that will be allocated to each thread before it starts iterating on them.
The **strides** timer measures the time each stride takes, including all cycles within the stride. It starts measuring
time before the cycle starts, and stops measuring after the last cycle in the stride has run.
## cycles
Within nosqlbench, each logical iteration of a statement is handled within a distinct cycle. A cycle represents an
iteration of a workload. This corresponds to a single operation executed according to some statement definition.
The **cycles** metric is a timer that starts counting at the start of a cycle, before any specific activity behavior has
control. It stops timing once the logical cycle is complete. This includes and additional phases that are executed by
multi-phase actions.

View File

@ -1,48 +0,0 @@
---
title: Timing Terms
---
# Timing Terms
Often, terms used to describe latency can create confusion. In fact, the term _latency_ is so overloaded in practice
that it is not useful by itself. Because of this, nosqlbench will avoid using the term latency _except in a specific
way_. Instead, the terms described in this section will be used.
nosqlbench is a client-centric testing tool. The measurement of operations occurs on the client, without visibility to
what happens in transport or on the server. This means that the client *can* see how long an operation takes, but it
*cannot see* how much of the operational time is spent in transport and otherwise. This has a bearing on the terms that
are adopted with nosqlbench.
Some terms are anchored by the context in which they are used. For latency terms, *service time* can be subjective. When
using this term to describe other effects in your system, what is included depends on the perspective of the requester.
The concept of service is universal, and every layer in a system can be seen as a service. Thus, the service time is
defined by the vantage point of the requester. This is the perspective taken by the nosqlbench approach for naming and
semantics below.
## responsetime
**The duration of time a user has to wait for a response from the time they submitted the request.** Response time is
the duration of time from when a request was expected to start, to the time at which the response is finally seen by the
user. A request is generally expected to start immediately when users make a request. For example, when a user enters a
URL into a browser, they expect the request to start immediately when they hit enter.
In nosqlbench, the response time for any operation can be calculated by adding its wait time and its the service time
together.
## waittime
**The duration of time between when an operation is intended to start and when it actually starts on a client.** This is
also called *scheduling delay* in some places. Wait time occurs because clients are not able to make all requests
instantaneously when expected. There is an ideal time at which the request would be made according to user demand. This
ideal time is always earlier than the actual time in practice. When there is a shortage of resources *of any kind* that
delays a client request, it must wait.
Wait time can accumulate when you are running something according to a dispatch rate, as with a rate limiter.
## servicetime
**The duration of time it takes a server or other system to fully process to a request and send a response.** From the
perspective of a testing client, the _system_ includes the infrastructure as well as remote servers. As such, the
service time metrics in nosqlbench include any operational time that is external to the client, including transport
latency.

View File

@ -1,59 +0,0 @@
---
title: Scripting
weight: 95
---
# Scripting with NoSQLBench
NoSQLBench is designed to be used as both a turnkey testing system as well as a toolkit for advanced
testing. The approach that enables this is based on a few key principles:
1. NoSQLBench is packaged by default for users who want to use pre-built testing configurations.
2. The whole runtime is modular and designed for composition.
3. The default testing configurations are assembled from the modules components as needed.
4. Users can choose to build their own testing configurations from these modules.
5. When a user moves from using pre-built configurations to custom configurations,
is an incremental process.
Why base the internal logic on a scripting engine?
The principles described above apply all the way to the scripting layer. Every NoSQLBench
scenario is after-all, a script. For users who just need to run the pre-package
configurations, the fact a scripting engine is at the core is an implementation detail that
doesn't matter. For others who need to create advanced testing logic, this feature
allows them to build on the self-same concepts and components that other NoSQLBench users
are already familiar with and using. This common ground pays for itself in terms of reusability,
clarity, and a shared approach to testing at different levels of detail.
## Unique Capabilities
Unlike some other systems which define what a user is allowed to do during a scenario with something
like a DSL, NoSQLBench comes with no limitations. In other words, rather than pick a set of behaviors
from a limited list of DSL verbs, you can do anything you want during a scenario as long as it can
be expressed in Javascript.
That said, if you want to use a DSL within NoSQLBench, it doesn't prevent you from doing so. It just
doesn't come with a DSL to tell you what you can (and can't) do. Instead, it comes with a set of
scripting libraries and extensions that have proven useful for advanced testing scenarios.
NoSQLBench scripting is supported with realtime interaction between the scripting environment
and the running scenario. Activities, metrics, and control variables that are needed to dynamically
interact with a running workload are all wired in and ready to go.
Contributors can add to the scripting runtime by adding extensions to NoSQLBench. These extensions
are generally added to the integrated tests with full-roundtrip content checking to ensure that
they perform exactly as expected.
## Getting Started
For users who want to tap into the programmatic power of NoSQLBench, it's easy to get started by
using the `--show-script` option. For any normal command line that you might use with NoSQLBench,
this option causes it to dump the scenario script to stdout and exist instead of running the scenario.
You can store this into a file with a `.js` extension, and then use a command line like
nosqlbench script myfile.js
to invoke it. This is exactly the same as running the original command line, only with a couple of
extra steps that let you see what it is doing directly in the scenario script.

View File

@ -1,59 +0,0 @@
---
title: Script Parameters
weight: 20
---
# Script Parameters
When running a script, it is sometimes necessary to pass parameters to it in the same way
that you would for an activity. For example, you might have a scenario script like this:
# myscript.js
scenario.run({
driver: 'stdout',
workload: 'test.yaml',
cycles: '1000'
});
This is what the script form of starting an activity might look like. It is
simply passing a parameter map with the activity parameters to the scenario controller.
You might invoke it like this:
nb script myscript
Suppose that you want to allow the user to run such an activity by calling the script directly,
but you also want them to allow them to add their own parameters specifically to the
activity.
NoSQLBench supports this type of flexibility by providing any command-line arguments to the
script as a script object. It is possible to then combine the parameters that a user provides
with any templated parameters in your script. You can make either one the primary, while allowing
the other to backfill values. In either case, it's a matter of using helper methods that are
baked into the command line parameters object.
To force parameters to specific values while allowing user command line parameters to backfill,
use a pattern like this:
myparams = params.withOverrides(
{
myparam: 'myvalue'
}
);
This will force 'myparam' to the specified values irrespective of what the user has provided for
that value, and will add the value if it is not present already.
To force _unset_ a parameter, use a similar pattern, but with the value `UNSET` instead:
myparams = params.withOverrides(
{
myparam: 'UNSET'
}
);
If this form is used, then any parameter which has already been provided for `myparam` will be
removed from the resulting map.

View File

@ -1,43 +0,0 @@
---
title: Scripting Extensions
weight: 30
---
# Scripting Extensions
Extensions are injected into the scripting environment as plugins. They appear as service
objects in the script environment under a name determined by the plugin.
This section describes some of the scripting extensions available.
## csvmetrics
Allows a script to log some or all metrics to CSV files.
## files
Allows for convenient read access to local files.
## globalvars
Allows access to the shared variable state that can be populated from operations.
## histologger
Allows script control of HDR histogram interval logging.
## histostatslogger
Allows script control of histogram stats logging in CSV files.
## http
Easily use http get and post in scripts.
## optimos
Allows use of the BOBYQA optimizer in scripts.
## scriptingmetrics
Allows you to create and append metrics within your scenario scripts

View File

@ -1,102 +0,0 @@
---
title: Advanced Testing
weight: 13
---
# Advanced Testing
**NOTE:**
Some of the features discussed here are only for advanced testing
scenarios. First-time users should become familiar with the basic options
first.
## Hybrid Rate Limiting
Rate limiting is a complicated endeavor, if you want to do it well. The
basic rub is that going fast means you have to be less accurate, and
vice-versa. As such, rate limiting is a parasitic drain on any system. The
act of rate limiting itself poses a limit to the maximum rate, regardless
of the settings you pick. This occurs as a side-effect of forcing your
system to interact with some hardware notion of time passing, which takes
CPU cycles that could be going to the thing you are limiting.
This means that in practice, rate limiters are often very featureless.
It's daunting enough to need rate limiting, and asking for anything more
than that is often wishful thinking. Not so in NoSQLBench.
The rate limiter in NoSQLBench provides a comparable degree of performance
and accuracy to others found in the Java ecosystem, but it *also* has
advanced features:
- It allows a sliding scale between average rate limiting and strict rate
limiting, called _bursting_.
- It internally accumulates delay time, for C.O. friendly metrics which
are separately tracked for each and every operation.
- It is resettable and reconfigurable on the fly, including the bursting
rate.
- It provides its configured values in addition to performance data in
metrics, capturing your rate limiter settings as a simple matter of
metrics collection.
- It comes with advanced scripting helpers which allow you to read data
directly from histogram reservoirs, or control the reservoir window
programmatically.
## Flexible Error Handling
An emergent facility in NoSQLBench is the way that error are handled
within an activity. For example, with the CQL activity type, you are able
to route error handling for any of the known exception types. You can
count errors, you can log them. You can cause errored operations to
auto-retry if possible, up to a configurable number of tries.
This means, that as a user, you get to decide what your test is about. Is
it about measuring some nominal but anticipated level of errors due to
intentional over-saturation? If so, then count the errors, and look at
their histogram data for timing details within the available timeout.
Are you doing a basic stability test, where you want the test to error out
for even the slightest error? You can configure for that if you need.
## Cycle Logging
It is possible to record the result status of each and every cycles in a
NoSQLBench test run. If the results are mostly homogeneous, the RLE
encoding of the results will reduce the output file down to a small
fraction of the number of cycles. The errors are mapped to ordinals by
error type, and these ordinals are stored into a direct RLE-encoded log
file. For most testing where most of the results are simply success, this
file will be tiny. You can also convert the cycle log into textual form
for other testing and post-processing and vice-versa.
## Op Sequencing
The way that operations are planned for execution in NoSQLBench is based
on a stable ordering that is configurable. The statement forms are mixed
together based on their relative ratios. The three schemes currently
supported are round-robin with exhaustion (bucket), duplicate in order
(concat), and a way to spread each statement out over the unit interval
(interval). These account for most configuration scenarios without users
having to micro-manage their statement templates.
## Sync and Async
There are two distinct usage modes in NoSQLBench when it comes to
operation dispatch and thread management:
### Sync
Sync is the default form. In this mode, each thread reads its sequence and
dispatches one statement at a time, holding only one operation in flight
per thread. This is the mode you often use when you want to emulate an
application's request-per-thread model, as it implicitly linearizes the
order of operations within the computed sequence of statements.
### Async
In Async mode, each thread in an activity is responsible for juggling a
number of operations in-flight. This allows a NoSQLBench client to juggle
an arbitrarily high number of connections, limited primarily by how much
memory you have.
Internally, the Sync and Async modes have different code paths. It is
possible for an activity type to support one or both of these.

View File

@ -1,76 +0,0 @@
---
title: Core Concepts
weight: 2
---
# Refined Core Concepts
The core concepts that NoSQLBench is built on have been scrutinized,
replaced, refined, and hardened through several years of use by users of
various needs and backgrounds.
This level of refinement is important when trying to find a way to express
common patterns in what is often a highly fragmented practice. Testing is
hard. Scale testing is hard. Distributed testing is hard. Combined, the
challenge of executing realistic tests is often quite daunting to all but
seasons test engineers. To make this worse, existing tools have only
skirmished with this problem enough to make dents, but none has tackled
full-on the lack of conceptual building blocks.
This has to change. We need a set of conceptual building blocks that can
span across workloads and system types, and machinery to put these
concepts to use. This is why it is important to focus on finding a useful
and robust set of concepts to use as the foundation for the rest of the
toolkit to be built on. Finding these building blocks is often one of the
most difficult tasks in systems design. Once you find and validate a
useful set of concepts, everything else gets easier
We feel that the success that we've already had using NoSQLBench has been
strongly tied to the core concepts. Some concepts used in NoSQLBench are
shared below for illustration, but this is by no means an exhaustive list.
### The Cycle
Cycles in NoSQLBench are whole numbers on a number line. Each operation in
a NoSQLBench scenario is derived from a single cycle. It's a long value,
and a seed. The cycle determines not only which statements is selected for
execution, but also what synthetic payload data will be attached to it.
Cycles are specified as a closed-open `[min,max)` interval, just as slices
in some languages. That is, the min value is included in the range, but
the max value is not. This means that you can stack slices using common
numeric reference points without overlaps or gaps. It means you can have
exact awareness of what data is in your dataset, even incrementally.
You can think of a cycle as a single-valued coordinate system for data
that lives adjacent to that number on the number line. In this way,
virtual dataset functions are ways of converting coordinates into data.
### The Activity
An activity is a multi-threaded flywheel of statements in some sequence
and ratio. Activities run over the numbers in a cycle range. Each activity
has a driver type which determines the native protocol that it speaks.
### The Driver Type
A driver type is a high level driver for a protocol. It is like a
statement-aware cartridge that knows how to take a basic statement
template and turn it into an operation for an activity to execute within
the scenario.
### The Scenario
The scenario is a runtime session that holds the activities while they
run. A NoSQLBench scenario is responsible for aggregating global runtime
settings, metrics reporting channels, log files, and so on. All activities
run within a scenario, under the control of the scenario script.
### The Scenario Script
Each scenario is governed by a script runs single-threaded, asynchronously
from activities, but in control of activities. If needed, the scenario
script is automatically created for the user, and the user never knows it
is there. If the user has advanced testing requirements, then they may
take advantage of the scripting capability at such time. When the script
exits, *AND* all activities are complete, then the scenario is complete.

View File

@ -1,53 +0,0 @@
---
title: High Fidelity Metrics
weight: 12
---
# High-Fidelity Metrics
Since NoSQLBench has been built as a serious testing tool for all users,
some attention was necessary on the way metric are used.
## Discrete Reservoirs
In NoSQLBench, we avoid the use of time-decaying metrics reservoirs.
Internally, we use HDR reservoirs with discrete time boundaries. This is
so that you can look at the min and max values and know that they apply
accurately to the whole sampling window.
## Metric Naming
All running activities have a symbolic alias that identifies them for the
purposes of automation and metrics. If you have multiple activities
running concurrently, they will have different names and will be
represented distinctly in the metrics flow.
## Precision and Units
By default, the internal HDR histogram reservoirs are kept at 4 digits of
precision. All timers are kept at nanosecond resolution.
## Metrics Reporting
Metrics can be reported via graphite as well as CSV, logs, HDR logs, and
HDR stats summary CSV files.
## Coordinated Omission
The metrics naming and semantics in NoSQLBench are setup so that you can
have coordinated omission metrics when they are appropriate, but there are
no there changes when they are not. This means that the metric names and
meanings remain stable in any case.
Particularly, NoSQLBench avoids the term "latency" altogether as it is
often overused and thus prone to confusing people.
Instead, the terms `service time`, `wait time`, and `response time` are
used. These are abbreviated in metrics as `servicetime`, `waittime`, and
`responsetime`.
The `servicetime` metric is the only one which is always present. When a
rate limiter is used, then additionally `waittime` and `responsetime` are
reported.

View File

@ -1,26 +0,0 @@
---
title: NoSQLBench Showcase
weight: 10
---
# NoSQLBench Showcase
Since NoSQLBench is new on the scene in its current form, you may be
wondering why you would want to use it over any other tool. That is what
this section is all about.
You don't have to read all of this! It is here for those who want to know
the answer to the question "So, what's the big deal??" Just remember it is
here for later if you want to skip to the next section and get started
testing.
NoSQLBench can do nearly everything that other testing tools can do, and
more. It achieves this by focusing on a scalable user experience in
combination with a modular internal architecture.
NoSQLBench is a workload construction and simulation tool for scalable
systems testing. That is an entirely different scope of endeavor than most
other tools.
The pages in this section all speak to a selection of advanced
capabilities that are unique to NoSQLBench.

View File

@ -1,27 +0,0 @@
---
title: Modular Architecture
weight: 11
---
# Modular Architecture
The internal architecture of NoSQLBench is modular throughout. Everything
from the scripting extensions to data generation is enumerated at compile
time into a service descriptor, and then discovered at runtime by the SPI
mechanism in Java.
This means that extending and customizing bundles and features is quite
manageable.
It also means that it is relatively easy to provide a suitable API for
multi-protocol support. In fact, there are several drivers available in
the current NoSQLBench distribution. You can list them out with `nb
--list-drivers`, and you can get help on how to use each of them with `nb
help <driver name>`.
This also is a way for us to encourage and empower other contributors to
help develop the capabilities and reach of NoSQLBench. By encouraging
others to help us build NoSQLBench modules and extensions, we can help
more users in the NoSQL community at large.

View File

@ -1,50 +0,0 @@
---
title: Portable Workloads
weight: 2
---
# Portable Workloads
All of the workloads that you can build with NoSQLBench are self-contained
in a workload file. This is a statement-oriented configuration file that
contains templates for the operations you want to run in a workload.
This defines part of an activity - the iterative flywheel part that is run
directly within an activity type. This file contains everything needed to
run a basic activity -- A set of statements in some ratio. It can be used
to start an activity, or as part of several activities within a scenario.
## Standard YAML Format
The format for describing statements in NoSQLBench is generic, but in a
particular way that is specialized around describing statements for a
workload. That means that you can use the same YAML format to describe a
workload for kafka as you can for Apache Cassandra or DSE.
The YAML structure has been tailored to describing statements, their data
generation bindings, how they are grouped and selected, and the parameters
needed by drivers, like whether they should be prepared statements or not.
Further, the YAML format allows for defaults and overrides with a very
simple mechanism that reduces editing fatigue for frequent users.
You can also template document-wide macro parameters which are taken from
the command line just like any other parameter. This is a way of
templating a workload and make it multi-purpose or adjustable on the fly.
## Experimentation Friendly
Because the workload YAML format is generic across driver types, it is
possible to ask one driver type to interpret the statements that are meant
for another. This isn't generally a good idea, but it becomes extremely
handy when you want to have a high level driver type like `stdout`
interpret the syntax of another driver like `cql`. When you do this, the
stdout activity type _plays_ the statements to your console as they would
be executed in CQL, data bindings and all.
This means you can empirically and directly demonstrate and verify access
patterns, data skew, and other dataset details before you change back to
cql mode and turn up the settings for a higher scale test. It takes away
the guess work about what your test is actually doing, and it works for
all drivers.

View File

@ -1,97 +0,0 @@
---
title: Scripting Environment
weight: 3
---
# Scripting Environment
The ability to write open-ended testing simulations is provided in
NoSQLBench by means of a scripted runtime, where each scenario is driven
from a control script that can do anything the user wants.
## Dynamic Parameters
Some configuration parameters of activities are designed to be assignable
while a workload is running. This makes things like threads, rates, and
other workload dynamics in real-time. The internal APIs work with the
scripting environment to expose these parameters directly to scenario
scripts. Drivers that are provided to NoSQLBench can also expose dynamic
parameters in the same way so that anything can be scripted dynamically
when needed.
## Scripting Automatons
When a NoSQLBench scenario is running, it is under the control of a
single-threaded script. Each activity that is started by this script is
run within its own thread pool, simultaneously and asynchronously.
The control script has executive control of the activities, as well as
full visibility into the metrics that are provided by each activity. The
way these two parts of the runtime meet is through the service objects
which are installed into the scripting runtime. These service objects
provide a named access point for each running activity and its metrics.
This means that the scenario script can do something simple, like start
activities and wait for them to complete, OR, it can do something more
sophisticated like dynamically and iteratively scrutinize the metrics and
make real-time adjustments to the workload while it runs.
## Analysis Methods
Scripting automatons that do feedback-oriented analysis of a target system
are called analysis methods in NoSQLBench. We have prototypes a couple of
these already, but there is nothing keeping the adventurous from coming up
with their own.
## Command Line Scripting
The command line has the form of basic test commands and parameters. These
command get converted directly into scenario control script in the order
they appear. The user can choose whether to stay in high level executive
mode, with simple commands like `nb test-scenario ...`, or to drop down
directly into script design. They can look at the equivalent script for
any command line by running --show-script. If you take the script that is
dumped to console and run it, it will do exactly the same thing as if you
hadn't even looked at it and just ran basic commands on the command line.
There are even ways to combine script fragments, full commands, and calls
to scripts on the command line. Since each variant is merely a way of
constructing scenario script, they all get composited together before the
scenario script is run.
New introductions to NoSQLBench should focus on the command line. Once a
user is familiar with this, it is up to them whether to tap into the
deeper functionality. If they don't need to know about scenario scripting,
then they shouldn't have to learn about it to be effective. This is what
we are calling a _scalable user experience_.
## Compared to DSLs
Other tools may claim that their DSL makes scenario "simulation" easier.
In practice, any DSL is generally dependent on a development tool to lay
the language out in front of a user in a fluent way. This means that DSLs
are almost always developer-targeted tools, and mostly useless for casual
users who don't want to break out an IDE.
One of the things a DSL proponent may tell you is that it tells you "all
the things you can do!". This is de-facto the same thing as it telling you
"all the things you can't do" because it's not part of the DSL. This is
not a win-win for the user. For DSL-based systems, the user has to use the
DSL whether or not it enhances their creative control, while in fact, most
DSLs aren't rich enough to do much that is interesting from a simulation
perspective.
In NoSQLBench, we don't force the user to use the programming abstractions
except at a very surface level -- the CLI. It is up to the user whether or
not to open the secret access panel for the more advance functionality. If
they decide to do this, we give them a commodity language (ECMAScript),
and we wire it into all the things they were already using. We don't take
away their creative freedom by telling them what they can't do. This way,
users can pick their level of investment and reward as best fits their
individual needs, as it should be.
## Scripting Extensions
Also mentioned under the section on modularity, it is relatively easy for
a developer to add their own scripting extensions into NoSQLBench as named
service objects.

View File

@ -1,126 +0,0 @@
---
title: Virtual Datasets
weight: 1
---
# Virtual Datasets
The _Virtual Dataset_ capabilities within NoSQLBench allow you to generate
data on the fly. There are many reasons for using this technique in
testing, but it is often a topic that is overlooked or taken for granted.
This has multiple positive effects on the fidelity of the test:
1) It is much much more efficient than interacting with storage systems and piping data around. Even loading
data from lightweight storage like NVMe will be more time intensive than simply generating it in most cases.
2) As such, it leaves significant headroom on the table for introducing other valuable capabilities into
the test system, like advanced rate metering, coordinated ommission awareness, etc.
3) Changing the data which is generated is as easy as changing the recipe.
4) The efficiency of the client is often high enough to support single-client test setups without appreciable
loss of capacity.
5) Because of modern procedural generation techniques, the variety and shape of data available is significant.
Increasing the space of possibilities is a matter of adding new algorithms. There is no data bulk to manage.
6) Sophisticated test setups that are highly data dependent are portable. All you need is the test client.
The building blocks for data generation are included, and many pre-built testing scenarios are already
wired to use them.
Additional details of this approach are explained below.
## Industrial Strength
The algorithms used to generate data are based on advanced techniques in
the realm of variate sampling. The authors have gone to great lengths to
ensure that data generation is efficient and as much O(1) in processing
time as possible.
For example...
One technique that is used to achieve this is to initialize and cache data
in high resolution look-up tables for distributions which may otherwise
perform differently depending on their respective density functions. The
existing Apache Commons Math libraries have been adapted into a set of
interpolated Inverse Cumulative Distribution sampling functions. This
means that you can use them all in the same place as you would a Uniform
distribution, and once initialized, they sample with identical overhead.
This means that by changing your test definition, you don't accidentally
change the behavior of your test client, only the data as intended.
## A Purpose-Built Tool
Many other testing systems avoid building a dataset generation component.
It's a tough problem to solve, so it's often just avoided. Instead, they
use libraries like "faker" or other sources of data which weren't designed
for testing at scale. Faker is well named, no pun intended. It was meant
as a vignette and wire-framing library, not a source of test data for
realistic results. If you are using a testing tool for scale testing and
relying on a faker variant, then you will almost certainly get invalid
results that do not represent how a system would perform in production.
The virtual dataset component of NoSQLBench is a library that was designed
for high scale and realistic data streams. It uses the limits of the data
types in the JVM to simulate high cardinality datasets which approximate
production data distributions for realistic and reproducible results.
## Deterministic
The data that is generated by the virtual dataset libraries is
deterministic. This means that for a given cycle in a test, the operation
that is synthesized for that cycle will be the same from one session to
the next. This is intentional. If you want to perturb the test data from
one session to the next, then you can most easily do it by simply
selecting a different set of cycles as your basis.
This means that if you find something interesting in a test run, you can
go back to it just by specifying the cycles in question. It also means
that you aren't losing comparative value between tests with additional
randomness thrown in. The data you generate will still look random to the
human eye, but that doesn't mean that it can't be reproducible.
## Statistically Shaped
All this means is that the values you use to tie your dataset together can
be specific to any distribution that is appropriate. You can ask for a
stream of floating point values 1 trillion values long, in any order. You
can use discrete or continuous distributions, with whatever distribution
parameters you need.
## Best of Both Worlds
Some might worry that fully synthetic testing data is not realistic
enough. The devil is in the details on these arguments, but suffice it to
say that you can pick the level of real data you use as seed data with
NoSQLBench. You don't have to choose between realism and agility. The
procedural data generation approach allows you to have all the benefits of
testing agility of low-entropy testing tools while retaining nearly all of
the benefits of real testing data.
For example, using the alias sampling method and a published US census
(public domain) list of names and surnames tha occurred more than 100x, we
can provide extremely accurate samples of names according to the published
labels and weights. The alias method allows us to sample accurately in
O(1) time from the entire dataset by turning a large number of weights
into two uniform samples. You will simply not find a better way to sample
realistic (US) names than this. (If you do, please file an issue!)
Actually, any data set that you have in CSV form with a weight column can
also be used this way, so you're not strictly limited to US census data.
## Java Idiomatic Extension
The way that the virtual dataset component works allows Java developers to
write any extension to the data generation functions simply in the form of
Java 8 or newer Functional interfaces. As long as they include the
annotation processor and annotate their classes, they will show up in the
runtime and be available to any workload by their class name.
Additionally, annotation based examples and annotation processing is used
to hoist function docs directly into the published docs that go along with
any version of NoSQLBench.
## Binding Recipes
It is possible to stitch data generation functions together directly in a
workload YAML. These are data-flow sketches of functions that can be
copied and pasted between workload descriptions to share or remix data
streams. This allows for the adventurous to build sophisticated virtual
datasets that emulate nuances of real datasets, but in a form that takes
up less space on the screen than this paragraph!

View File

@ -1,8 +0,0 @@
---
title: Testing Practices
weight: 5
---
# Testing Practices
This section contains some introductory details on setting up test scenarios for new NoSQLBench users.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 125 KiB

View File

@ -1,53 +0,0 @@
@startuml
header Layers of Messaging
hide footbox
box "User View" #white
actor Alice as user
collections "User\nAgent" as agent
collections "Client\nSide\nApp" as capp
end box
box "WebApp View" #white
boundary "App Server\nor node, ..." as sapp
boundary "Database\nDriver" as driver
end box
box "DB View" #white
database DB as db
boundary "Storage\nService" as store
entity "Storage\nDevice" as device
end box
user -> agent: user clicks link
activate agent
agent -> capp: event\nhandler
activate capp
capp -> sapp: [http\nrequest]
activate sapp
sapp -> driver: read op
activate driver
driver -> db: read data
activate db
db -> store: read data
activate store
store -> device: read data
activate device
store <- device: [data]
deactivate device
db <- store: [data]
deactivate store
driver <- db: [data]
deactivate db
sapp <- driver: [data]
deactivate driver
capp <- sapp: web content\n[http]
deactivate sapp
agent <- capp: web content\n[http]
deactivate capp
user <- agent: browser\nrenders page
deactivate agent
@enduml

View File

@ -1,430 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
contentScriptType="application/ecmascript"
contentStyleType="text/css" height="865px" preserveAspectRatio="none"
style="width:930px;height:865px;" version="1.1" viewBox="0 0 930 865"
width="930px" zoomAndPan="magnify">
<defs>
<filter height="300%" id="f6q2vew7ewgyq" width="300%" x="-1" y="-1">
<feGaussianBlur result="blurOut" stdDeviation="2.5"/>
<feColorMatrix in="blurOut" result="blurOut2" type="matrix"
values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 .4 0"/>
<feOffset dx="5.0" dy="5.0" in="blurOut2" result="blurOut3"/>
<feBlend in="SourceGraphic" in2="blurOut3" mode="normal"/>
</filter>
</defs>
<g>
<rect fill="#FFFFFF" height="817.7353"
style="stroke: #A80036; stroke-width: 1.25;" width="346.875"
x="5" y="25.775"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
font-weight="bold" lengthAdjust="spacingAndGlyphs"
textLength="80" x="138.4375" y="43.1463">User View
</text>
<rect fill="#FFFFFF" height="817.7353"
style="stroke: #A80036; stroke-width: 1.25;" width="208.75"
x="390" y="25.775"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
font-weight="bold" lengthAdjust="spacingAndGlyphs"
textLength="110" x="439.375" y="43.1463">WebApp View
</text>
<rect fill="#FFFFFF" height="817.7353"
style="stroke: #A80036; stroke-width: 1.25;" width="283.75"
x="630.625" y="25.775"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
font-weight="bold" lengthAdjust="spacingAndGlyphs"
textLength="66.25" x="739.375" y="43.1463">DB View
</text>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="625.8854"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="166.25" y="202.6249"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="502.3553"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="302.5" y="264.39"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="378.8252"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="438.75" y="326.155"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="277.4276"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="544.375" y="365.7875"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="198.1626"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="651.875" y="405.42"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="118.8976"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="759.375" y="445.0526"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="39.6325"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="866.875" y="484.6851"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="33.75" x2="33.75" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="172.5" x2="172.5" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="308.125" x2="308.125" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="445" x2="445" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="550" x2="550" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="658.125" x2="658.125" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="765.625" x2="765.625" y1="160.4924" y2="851.0103"/>
<line
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
x1="873.125" x2="873.125" y1="160.4924" y2="851.0103"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="40" x="10"
y="155.3649">Alice
</text>
<ellipse cx="33.75" cy="65.4075" fill="#FEFECE"
filter="url(#f6q2vew7ewgyq)" rx="10" ry="10"
style="stroke: #A80036; stroke-width: 2.5;"/>
<path
d="M33.75,75.4075 L33.75,109.1575 M17.5,85.4075 L50,85.4075 M33.75,109.1575 L17.5,127.9075 M33.75,109.1575 L50,127.9075 "
fill="none" filter="url(#f6q2vew7ewgyq)"
style="stroke: #A80036; stroke-width: 2.5;"/>
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="65.1698"
style="stroke: #A80036; stroke-width: 1.875;" width="67.5"
x="138.75" y="84.0726"/>
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="65.1698"
style="stroke: #A80036; stroke-width: 1.875;" width="67.5"
x="133.75" y="89.0726"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="38.75" x="148.125"
y="116.53">User
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="50" x="142.5"
y="140.3649">Agent
</text>
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="89.0047"
style="stroke: #A80036; stroke-width: 1.875;" width="66.25"
x="275.625" y="60.2377"/>
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="89.0047"
style="stroke: #A80036; stroke-width: 1.875;" width="66.25"
x="270.625" y="65.2377"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="48.75" x="279.375"
y="92.6951">Client
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="36.25" x="285.625"
y="116.53">Side
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="33.75" x="286.875"
y="140.3649">App
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="92.5" x="395"
y="131.53">App Server
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="90" x="396.25"
y="155.3649">or node, ...
</text>
<path
d="M419.375,77.8226 L419.375,107.8226 M419.375,92.8226 L440.625,92.8226 "
fill="none" filter="url(#f6q2vew7ewgyq)"
style="stroke: #A80036; stroke-width: 2.5;"/>
<ellipse cx="455.625" cy="92.8226" fill="#FEFECE"
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
style="stroke: #A80036; stroke-width: 2.5;"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="78.75" x="507.5"
y="131.53">Database
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="51.25" x="521.25"
y="155.3649">Driver
</text>
<path d="M525,77.8226 L525,107.8226 M525,92.8226 L546.25,92.8226 "
fill="none" filter="url(#f6q2vew7ewgyq)"
style="stroke: #A80036; stroke-width: 2.5;"/>
<ellipse cx="561.25" cy="92.8226" fill="#FEFECE"
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
style="stroke: #A80036; stroke-width: 2.5;"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="23.75" x="642.5"
y="155.3649">DB
</text>
<path
d="M635.625,91.6575 C635.625,79.1575 658.125,79.1575 658.125,79.1575 C658.125,79.1575 680.625,79.1575 680.625,91.6575 L680.625,124.1575 C680.625,136.6575 658.125,136.6575 658.125,136.6575 C658.125,136.6575 635.625,136.6575 635.625,124.1575 L635.625,91.6575 "
fill="#FEFECE" filter="url(#f6q2vew7ewgyq)"
style="stroke: #000000; stroke-width: 1.875;"/>
<path
d="M635.625,91.6575 C635.625,104.1575 658.125,104.1575 658.125,104.1575 C658.125,104.1575 680.625,104.1575 680.625,91.6575 "
fill="none" style="stroke: #000000; stroke-width: 1.875;"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="65" x="729.375"
y="131.53">Storage
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="60" x="731.875"
y="155.3649">Service
</text>
<path d="M740,77.8226 L740,107.8226 M740,92.8226 L761.25,92.8226 "
fill="none" filter="url(#f6q2vew7ewgyq)"
style="stroke: #A80036; stroke-width: 2.5;"/>
<ellipse cx="776.25" cy="92.8226" fill="#FEFECE"
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
style="stroke: #A80036; stroke-width: 2.5;"/>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="65" x="836.875"
y="131.53">Storage
</text>
<text fill="#000000" font-family="sans-serif" font-size="17.5"
lengthAdjust="spacingAndGlyphs" textLength="55" x="841.875"
y="155.3649">Device
</text>
<ellipse cx="873.125" cy="92.8226" fill="#FEFECE"
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
style="stroke: #A80036; stroke-width: 2.5;"/>
<line style="stroke: #A80036; stroke-width: 2.5;" x1="858.125"
x2="888.125" y1="110.3226" y2="110.3226"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="625.8854"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="166.25" y="202.6249"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="502.3553"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="302.5" y="264.39"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="378.8252"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="438.75" y="326.155"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="277.4276"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="544.375" y="365.7875"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="198.1626"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="651.875" y="405.42"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="118.8976"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="759.375" y="445.0526"/>
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="39.6325"
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
x="866.875" y="484.6851"/>
<polygon fill="#A80036"
points="151.25,197.6249,163.75,202.6249,151.25,207.6249,156.25,202.6249"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="33.75"
x2="158.75" y1="202.6249" y2="202.6249"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="105" x="42.5"
y="195.3637">user clicks link
</text>
<polygon fill="#A80036"
points="287.5,259.39,300,264.39,287.5,269.39,292.5,264.39"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="178.75"
x2="295" y1="264.39" y2="264.39"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="42.5" x="187.5"
y="234.9962">event
</text>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="57.5" x="187.5"
y="257.1287">handler
</text>
<polygon fill="#A80036"
points="423.75,321.155,436.25,326.155,423.75,331.155,428.75,326.155"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="315"
x2="431.25" y1="326.155" y2="326.155"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="37.5" x="323.75"
y="296.7612">[http
</text>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="62.5" x="323.75"
y="318.8938">request]
</text>
<polygon fill="#A80036"
points="529.375,360.7875,541.875,365.7875,529.375,370.7875,534.375,365.7875"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="451.25"
x2="536.875" y1="365.7875" y2="365.7875"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="57.5" x="460"
y="358.5263">read op
</text>
<polygon fill="#A80036"
points="636.875,400.42,649.375,405.42,636.875,410.42,641.875,405.42"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="556.875"
x2="644.375" y1="405.42" y2="405.42"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="71.25" x="565.625"
y="398.1588">read data
</text>
<polygon fill="#A80036"
points="744.375,440.0526,756.875,445.0526,744.375,450.0526,749.375,445.0526"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="664.375"
x2="751.875" y1="445.0526" y2="445.0526"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="71.25" x="673.125"
y="437.7913">read data
</text>
<polygon fill="#A80036"
points="851.875,479.6851,864.375,484.6851,851.875,489.6851,856.875,484.6851"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="771.875"
x2="859.375" y1="484.6851" y2="484.6851"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="71.25" x="780.625"
y="477.4238">read data
</text>
<polygon fill="#A80036"
points="785.625,519.3176,773.125,524.3176,785.625,529.3176,780.625,524.3176"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="778.125"
x2="871.875" y1="524.3176" y2="524.3176"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="793.125"
y="517.0564">[data]
</text>
<polygon fill="#A80036"
points="678.125,558.9501,665.625,563.9501,678.125,568.9501,673.125,563.9501"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="670.625"
x2="764.375" y1="563.9501" y2="563.9501"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="685.625"
y="556.6889">[data]
</text>
<polygon fill="#A80036"
points="570.625,598.5826,558.125,603.5826,570.625,608.5826,565.625,603.5826"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="563.125"
x2="656.875" y1="603.5826" y2="603.5826"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="578.125"
y="596.3214">[data]
</text>
<polygon fill="#A80036"
points="465,638.2152,452.5,643.2152,465,648.2152,460,643.2152"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="457.5"
x2="549.375" y1="643.2152" y2="643.2152"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="472.5"
y="635.9539">[data]
</text>
<polygon fill="#A80036"
points="328.75,699.9802,316.25,704.9802,328.75,709.9802,323.75,704.9802"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="321.25"
x2="443.75" y1="704.9802" y2="704.9802"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="93.75" x="336.25"
y="675.5864">web content
</text>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="42.5" x="336.25"
y="697.719">[http]
</text>
<polygon fill="#A80036"
points="192.5,761.7453,180,766.7453,192.5,771.7453,187.5,766.7453"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="185" x2="307.5"
y1="766.7453" y2="766.7453"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="93.75" x="200"
y="737.3515">web content
</text>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="42.5" x="200"
y="759.484">[http]
</text>
<polygon fill="#A80036"
points="47.5,823.5103,35,828.5103,47.5,833.5103,42.5,828.5103"
style="stroke: #A80036; stroke-width: 1.25;"/>
<line style="stroke: #A80036; stroke-width: 1.25;" x1="40" x2="171.25"
y1="828.5103" y2="828.5103"/>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="61.25" x="55"
y="799.1165">browser
</text>
<text fill="#000000" font-family="sans-serif" font-size="16.25"
lengthAdjust="spacingAndGlyphs" textLength="98.75" x="55"
y="821.249">renders page
</text>
<text fill="#888888" font-family="sans-serif" font-size="12.5"
lengthAdjust="spacingAndGlyphs" textLength="122.5" x="793.125"
y="17.1125">Layers of Messaging
</text><!--MD5=[349deb3338ea8e90f753903e3c65ec3b]
@startuml
header Layers of Messaging
hide footbox
box "User View" #clear
actor Alice as user
collections "User\nAgent" as agent
collections "Client\nSide\nApp" as capp
end box
box "WebApp View" #clear
boundary "App Server\nor node, ..." as sapp
boundary "Database\nDriver" as driver
end box
box "DB View" #clear
database DB as db
boundary "Storage\nService" as store
entity "Storage\nDevice" as device
end box
user -> agent: user clicks link [[{tooltip}]]
activate agent
agent -> capp: event\nhandler
activate capp
capp -> sapp: [http\nrequest]
activate sapp
sapp -> driver: read op
activate driver
driver -> db: read data
activate db
db -> store: read data
activate store
store -> device: read data
activate device
store <- device: [data]
deactivate device
db <- store: [data]
deactivate store
driver <- db: [data]
deactivate db
sapp <- driver: [data]
deactivate driver
capp <- sapp: web content\n[http]
deactivate sapp
agent <- capp: web content\n[http]
deactivate capp
user <- agent: browser\nrenders page
deactivate agent
@enduml
PlantUML version 1.2020.09(Sun May 10 05:51:06 CDT 2020)
(GPL source distribution)
Java Runtime: OpenJDK Runtime Environment
JVM: OpenJDK 64-Bit Server VM
Java Version: 11.0.8+10-b944.31
Operating System: Linux
Default Encoding: UTF-8
Language: en
Country: US
-->
</g>
</svg>

Before

Width:  |  Height:  |  Size: 20 KiB

View File

@ -1,99 +0,0 @@
---
title: Multiple Clients
weight: 9
---
# Multiple Clients (Q&A)
This page is a basic FAQ regarding multiple clients with NoSQLBench.
The details in this section will be absorbed into the docs unless users find this format more useful. (Please give feedback on the Q&A format!)
-----
**question**
What is the right approach to run multiple instances of NoSQLBench (nb) for a given test?
**answer**
NoSQLBench can generate a significant amount of traffic. If you are testing with more than 5 nodes on the server side (for comparable hardware) then it may be necessary to add more clients if you are indeed wanting to generate a saturating workload. Otherwise, one client is nearly always enough. Of course, you may want to double check the resource usage on your client and then decide. Generally speaking, if your CPU is over 50% on the client, then it's a good idea to add more clients.
If you need to add more clients, then you can make sure they are using different
data and thus splitting the workload by ensuring that they each operate on a
different set of cyles. For example, with a total workload size of 100M cycles,
you can split it by setting `cycles=0..50M` on the first client, and then `cycles=50M..100M` on the second. This approach can be used to split cycles among any number of clients.
-----
**question**
I observed that nb is creating exactly the same transactions each time it is run. I do understand that this is a feature and not a bug and supports reproducibly. I thought I could run nb from multiple drivers, but then I would need a more randomized behavior in nb.
**answer**
Yes, The cycle range used in the test actually changes the data used in the data bindings. If you are generating pseudo-random data already, you can simply use a different cycle range. For example cycles=100M (shorthand for cycles=0..100M) is one set of operations, and cycles=100M..200M is a different set of operations (also 100M total, but different values are used within the operations)
This is a common enough request that we are going to add a way to hash the
input different for different tests when desired. This will not be applied
by default, but when needed it will become the easiest way to handle this type of scenario.
-----
**question**
Is there a more verbose documentation on the syntax of the yaml files that describe the benchmark. A list of examples would be welcome, too.
**answer**
The section of the docs called "Building Workloads" is actually a detailed
explanation of the yaml format. The YAML format and the concepts that
one needs to understand it are woven together here with detailed examples
from start to end.
-----
**question**
I installed ops center and used it to visualize metrics such as Read Requests, Read Request Latency, OS: CPU and others. This works.
I also tried --docker-metrics on the nb command line. I was able to open Grafana on port 3000 and found some metrics, but not Read Request, Write Request etc. It seems to be that ops center has more information.
**answer**
The metrics recorded by NoSQLBench are client-side. OpsCenter looks at server-side metrics. You can have both in one place if you use dsemetricscollector and combine the configs, but it is not as easy as just using --docker-metrics. We will add better docs for this.
When lookin at metrics, it is critical that you know what the vantage point is for each one, and what it means for the test results. A new section has been added to this section of the docs called "Vantage Points" as a primer for this.
There are generally 4 vantage points used of some significance in C* testing:
1. Application (same as nb in this case)
2. Driver/Data Layer (generally the same as nb in this case, but we do offer driver metrics separately if needed)
3. Coordinator (sometimes called Proxy)
4. Replica
The latter 2 are the only ones you will see in OpsCenter. It usually makes sense to look at the path and do some deduction about the differences, say the difference in read latency from the client, proxy, or storage levels.
-----
**question**
I also checked the metrics at end of the log file that are created by nb and didnt find a breakdown into read/write metrics either. I used Cassandra-stress in the past and remember that it provided such information in their log file.
**answer**
If you want to instrument your statements in nosqblench so that metrics are provided separately for each statement, you can do that by throwing the `instrument: true` option on your statements in the yaml. This works for the CQL driver and we will look at ways to support it in other drivers too.
-----
**question**
Im looking for something that can be scripted so that I can run multiple variations and extract results automatically.
**answer**
NoSQLBench can definitely do that. It is what it was built for. For multiple variations either use the cycle range setting as described above, or add a permutation function to the head of your binding recipes.
Getting down to the details of what you mean by "variation" might be a quick conversation, but it could also be in-depth depending on your requirements. For simple cases, just throwing a Hash() into the front will cause the data to be randomized. You can also consider the Shuffle(...) functions with different bank numbers.
The feature mentioned above for pre-hashing will be the easiest way to do this once it is implemented.

View File

@ -1,60 +0,0 @@
---
title: Random Data
weight: 5
---
# Random Data
This section touches on topics of using randomized data within NoSQLBench tests.
## Benefits
The benefits of using procedural generation for the purposes of load testing is taken as granted in
this section. For a more thorough discussion on the assumed merits, please see _Showcase_, _Virtual Datasets_
section.
## Basic Theory
In NoSQLBench, the data used for each operation is generated on the fly. However, the data is also deterministic
by default. That means, for a given activity, any numbered cycle will produce the same operation from test to test,
so long as the parameters are the same.
NoSQLBench runs each activity over a specific range of cycles. Each cycle is based on a specific number
from the cycle range. This cycle number is used as the seed value for that cycle. It determines not
only which operation is selected, but also what data is generated and bound to that operation for execution.
The data generation is initialized at the start, and optimized for rapid access during steady state operation.
This is by-design. However, there are ways of selecting how much variation you have from one test scenario to another.
## Managing Variation
Sometimes you will want to run the same test with the same operations, access patterns, and data.
For certain types of testing and comparisons, this is the only way to shed a light on a specific
issue, or variation in performance. The ability to run the same test between different target systems
is extremely valuable.
### Selecting Cycles
You can cause an activity to run a different set of operations simply by changing the cycle range used
in the test.
For an activity that is configured with `cycles=100M`, 100 million independent cycles will be used.
These cycles will be automatically apportioned to the client threads as needed until they are all
used up.
If you want to run 100 million different cycles, all you have to do is specify a different set
of seeds. This is as simple as specifying `cycles=100M..200M`, as the first example above is only short-hand
for `cycles=0..100M`.
### Selecting Bindings
The built-in workloads come with bindings which support the "rampup" and "main" phases appropriately. This means that the cycles for rampup will use a binding that lays data into a dataset incrementally, as you would build a log cabin. Each cycle adds to the data. The bindings are chosen for this effect so that the rampup phase is incremental with the cycle value.
The main phase is selected differently. In the main phase, you don't want to address over the data in order. To emulate a real workload, you need to select the data pseudo-randomly so that storage devices don't get to cheat with read-ahead (more than they would realistically) and so on. That means that the main phase bindings are also specifically chosen for the "random" access patterns that you might expect in some workloads.
The distinction between these two types of bindings should tell you something about the binding capabilities. You can really do what ever you want as long as you can stitch the right functions together to get there. Although the data produced by some of the functions (like `Hash()` for example) look random, it is not. It is, however, effectively random enough for most distributed systems performance testing.
If you need to add randomization to fields, it doesn't hurt to add an additional `Hash()` to the front. Just be advised that the same constructions from one binding recipe to the next will yield the same outputs, so season to taste.

View File

@ -1,65 +0,0 @@
---
title: Vantage Points
weight: 8
---
# Vantage Points
A successful test of a system results in a set of measurements. However, there are many ways to
take measurements and they all serve to answer different questions. Where you take your measurements
also determines what you measure.
Consider the following diagram:
![Layers](layers.svg =600x)
This diagram illustrates a prototypical set of services and their inner service dependencies. This view only shows synchronous calls to keep the diagram simple.
## User Impact
The outer-most layer of the onion is what the user interacts with. In most modern services, this is the browser. As well, most modern applications, there is an active client-side component which acts as part of the composed application, with local page state being pseudo-persistent except for cache controls and full reloads. This highlights how far designers will go to make interactions "local" for users to avoid the cost of long request loops.
As such, the browser is subject to any response times included within the inner service layers. Still, the browser represents the outer-most and thus most authentic vantage point from which to measure user impact of service time. This is called the _User View_ in the above diagram.
## Looking Inward
Beyond the outer layer, you'll usually find more layers. In terms of what these layers are called: "endpoint", "service", "web app", "app server", there is a ton of subjectivity. although the names change, the underlying mechanisms are generally the same. The naming conventions come more from local norms within a tech space or community of builders. One person's "App Server" is another's "RESTful endpoint". What is important to notice is how the layers form a cascade of dependencies down to some physical device which is reponsible for storing data. This pattern will be durable in nearly every system you look at.
Between each layer is a type of messaging component. These are sometimes called "media", or "transport" in RFCs. Each connection between the layers carries with it a set of fundamental trade-offs that, if understood, can establish reasonably durable minimum and maximum response times in the realm of possibilities.
For example, a storage device that is using NVMe as the host bus will, all else being equal, perform better than one service by a SATA channel. The specification for these "transports" say as much, but more importantly, real-world results back this up.
Understanding of the connections between each layer of abstraction is essential. At least,
knowing the limits of technology at each layer, theoretical and practical is useful. Not to fear, a good testing setup can help you find these limits in specific terms.
## Service Time Math
There will be a limit to how much data you can collect, and from which vantage points you
can get it from. That means that sometimes you need to do some sleuthing with the data you
have in order to tease out important details.
For example, say you have a good set of metrics for the app server in the diagram above. You know that the p95 service time is 121ms. Suppose you also know the p95 service time for _the same calls_ at the DB layer. That is 32ms. If you don't know _anything else_ about the calls, you can at least infer that the difference between these two layers is around 89ms (P95). That means that, for 5 out of every 100 operations, somewhere between your web app, your db driver, and your db service, you are spending at least 89ms doing *something*. This could be in the active processing, or in the passive transport of data -- the ethernet layer or otherwise. At least you can set book-end expectations between these layers.
## Applied Principles
**outside-in**
Generally speaking, to understand how service times impact users, you generally want to measure from outer vantage points. To understand why the user sees these service times, you look at the inner layers.
**detailed enough**
When constructing layered views of your metrics, it is useful to add the elements you need and
can instrument for metrics first. The above diagram goes to a degree of detail that may be too much to be useful in a practical analysis scenario. You could add place holders to capture elements of the transport and inter-connections, additional internal subsystems of layers, etc. This is only useful if it helps tell an important story about the details of your system, i.e. details that you can use to take action for an improvement or to help you focus effort in the right place.
**clear labeling**
When you are capturing metrics, make sure that the nesting and vantage points are very clear to observers. A little detail in naming goes a long way to keeping operators honest with each other about what is actually happening in the system.
**contextual views**
As you learn to build operational views of systems, be sure to tailor them to the user-impacting services that your business is measured by. This starts on the outside of your system, and cuts through critical paths, focusing on those areas which have the highest variability in responsiveness or availability. It includes the details that need the most attention. You can't start from a rich dashboard of data that includes the kitchen sink to arrive at this. It is an art form that you must constantly practice in order to keep operational relevant. Yes, there will be long-standing themes and objectives, but the more ephemeral factors need to be treated as such.

View File

@ -1,63 +0,0 @@
---
title: Built-In Workloads
weight: 40
---
# Built-In Workloads
There are a few built-in workloads which you may want to run. These can be run from a command without having to
configure anything, or they can be tailored with their built-in parameters.
## Finding Workloads
To find the build-in scenarios, ask NoSQLBench like this:
nb --list-workloads
This specifically lists the workloads which provide named scenarios. Only named scenarios are included. Workloads are
contained in yaml files. If a yaml file is in the standard path and contains a root `scenarios` element, then it is
included in the listing above.
Each of these scenarios has a set of parameters which can be changed on the command line.
## Running Workloads
You can run them directly, by name with `nb <workload> [<scenario>] [<params>...]`. If not provided, scenario is assumed
to be `default`.
For example, the `cql-iot` workload is listed with the above command, and can be executed like this:
# put your normal extra params in ... below, like hosts, for example
nb cql-iot default ...
# OR, with scenario name default
nb cql-iot ...
You can add any parameters to the end, and these parameters will be passed automatically to each stage of the scenario
as needed. Within the scenario, designers have the ability to lock parameters so that overrides are used appropriately.
## Conventions
The built-in workloads follow a set of conventions so that they can be used interchangeably. This is more for users who
are using the stages of these workloads directly, or for users who are designing new scenarios to be included in the
built-ins.
### Phases
Each built-in contains the following tags that can be used to break the workload up into uniform phases:
- schema - selected with `tags=block:"schema.*"`
- rampup - selected with `tags=block:rampup`
- main - selected with `tags=block:main`
### Parameters
Each built-in has a set of adjustable parameters which is documented below per workload. For example, the cql-iot
workload has a `sources` parameter which determines the number of unique devices in the dataset.
## Adding Workloads
If you want to add your own workload to NoSQLBench, or request a specific type of workload, please
[Request a workload](https://github.com/nosqlbench/nosqlbench/issues) or
[Submit a pull request](https://github.com/nosqlbench/nosqlbench/pulls).

View File

@ -43,12 +43,6 @@
<version>${revision}</version>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>engine-docs</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>engine-core</artifactId>

View File

@ -45,7 +45,6 @@
<module.engine-extensions>engine-extensions</module.engine-extensions>
<module.engine-docker>engine-docker</module.engine-docker>
<module.engine-cli>engine-cli</module.engine-cli>
<module.engine-docs>engine-docs</module.engine-docs>
<module.nb5>nb5</module.nb5>
<module.nbr>nbr</module.nbr>
@ -96,7 +95,6 @@
<module>engine-core</module>
<module>engine-extensions</module>
<module>engine-docker</module>
<module>engine-docs</module>
<module>engine-clients</module>
<module>engine-cli</module>
<module>adapters-api</module>