mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
remove duplicitous doc files
This commit is contained in:
parent
5d0a890e28
commit
2d1f23a8d8
@ -1 +0,0 @@
|
||||
[Slack Invite](https://docs.google.com/forms/d/e/1FAIpQLSdUOJ8iAPqyxsLfh1nBBsKShI53RAeuzYW4bKExmRMWjj4ufQ/viewform)
|
@ -1,83 +0,0 @@
|
||||
<!--
|
||||
~ Copyright (c) 2023 nosqlbench
|
||||
~
|
||||
~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~ you may not use this file except in compliance with the License.
|
||||
~ You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<artifactId>mvn-defaults</artifactId>
|
||||
<groupId>io.nosqlbench</groupId>
|
||||
<version>${revision}</version>
|
||||
<relativePath>../mvn-defaults</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>engine-docs</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>CLI for nosqlbench.</description>
|
||||
|
||||
<properties>
|
||||
<javadoc.name>nosqlbench Docs</javadoc.name>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>io.nosqlbench</groupId>-->
|
||||
<!-- <artifactId>engine-vis</artifactId>-->
|
||||
<!-- <version>2.11.31-SNAPSHOT</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<dependency>
|
||||
<groupId>io.nosqlbench</groupId>
|
||||
<artifactId>docsys</artifactId>
|
||||
<version>${revision}</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/resources</directory>
|
||||
<filtering>true</filtering>
|
||||
</resource>
|
||||
</resources>
|
||||
</build>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>assemble</id>
|
||||
<activation>
|
||||
<activeByDefault>true</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<configuration>
|
||||
<descriptorRefs>jar-with-dependencies</descriptorRefs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
||||
|
||||
</project>
|
@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2022 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.engine.docs;
|
||||
|
||||
import io.nosqlbench.nb.annotations.Service;
|
||||
import io.nosqlbench.api.markdown.providers.DocsRootDirectory;
|
||||
import io.nosqlbench.api.markdown.providers.RawMarkdownSource;
|
||||
|
||||
@Service(value = RawMarkdownSource.class, selector = "docs-for-eb")
|
||||
public class NosqlBenchRawMarkdownSource extends DocsRootDirectory {
|
||||
|
||||
@Override
|
||||
protected String getRootPathName() {
|
||||
return "docs-for-eb";
|
||||
}
|
||||
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2022 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.engine.docs;
|
||||
|
||||
import io.nosqlbench.api.docsapi.Docs;
|
||||
import io.nosqlbench.api.docsapi.DocsBinder;
|
||||
import io.nosqlbench.docsys.api.DocsysDynamicManifest;
|
||||
import io.nosqlbench.nb.annotations.Service;
|
||||
|
||||
@Service(value = DocsysDynamicManifest.class, selector = "docs-for-eb")
|
||||
public class NosqlbenchMarkdownManifest implements DocsysDynamicManifest {
|
||||
@Override
|
||||
public DocsBinder getDocs() {
|
||||
return new Docs().namespace("docs-for-eb")
|
||||
.addFirstFoundPath(
|
||||
"engine-docs/src/main/resources/docs-for-nb/",
|
||||
"docs-for-nb/"
|
||||
).setEnabledByDefault(true)
|
||||
.asDocsBinder();
|
||||
}
|
||||
}
|
@ -1,79 +0,0 @@
|
||||
---
|
||||
title: 00 YAML Organization
|
||||
weight: 00
|
||||
---
|
||||
|
||||
# YAML Organization
|
||||
|
||||
It is best to keep every workload self-contained within a single YAML
|
||||
file, including schema, data rampup, and the main phase of testing. The
|
||||
phases of testing are controlled by tags as described below.
|
||||
|
||||
**NOTE:**
|
||||
The phase names described below have been adopted as a convention within
|
||||
the built-in workloads. It is strongly advised that new workload YAMLs use
|
||||
the same tagging scheme so that workload are more plugable across YAMLs.
|
||||
|
||||
## Schema phase
|
||||
|
||||
The schema phase is simply a phase of your test which creates the
|
||||
necessary schema on your target system. For CQL, this generally consists
|
||||
of a keyspace and one ore more table statements. There is no special
|
||||
schema layer in nosqlbench. All statements executed are simply statements.
|
||||
This provides the greatest flexibility in testing since every activity
|
||||
type is allowed to control its DDL and DML using the same machinery.
|
||||
|
||||
The schema phase is normally executed with defaults for most parameters.
|
||||
This means that statements will execute in the order specified in the
|
||||
YAML, in serialized form, exactly once. This is a welcome side-effect of
|
||||
how the initial parameters like _cycles_ is set from the statements which
|
||||
are activated by tagging.
|
||||
|
||||
You can mark statements as schema phase statements by adding this set of
|
||||
tags to the statements, either directly, or by block:
|
||||
|
||||
tags:
|
||||
block: schema
|
||||
|
||||
## Rampup phase
|
||||
|
||||
When you run a performance test, it is very important to be aware of how
|
||||
much data is present. Higher density tests are more realistic for systems
|
||||
which accumulate data over time, or which have a large working set of
|
||||
data. The amount of data on the system you are testing should recreate a
|
||||
realistic amount of data that you would run in production, ideally. In
|
||||
general, there is a triangular trade-off between service time, op rate,
|
||||
and data density.
|
||||
|
||||
It is the purpose of the _rampup_ phase to create the backdrop data on a
|
||||
target system that makes a test meaningful for some level of data density.
|
||||
Data density is normally discussed as average per node, but it is also
|
||||
important to consider distribution of data as it varies from the least
|
||||
dense to the most dense nodes.
|
||||
|
||||
Because it is useful to be able to add data to a target cluster in an
|
||||
incremental way, the bindings which are used with a _rampup_ phase may
|
||||
actually be different from the ones used for a _main_ phase. In most
|
||||
cases, you want the rampup phase to create data in a way that
|
||||
incrementally adds to the population of data in the cluster. This allows
|
||||
you to add some data to a cluster with `cycles=0..1M` and then decide
|
||||
whether to continue adding data using the next contiguous range of cycles,
|
||||
with `cycles=1M..2M` and so on.
|
||||
|
||||
You can mark statements as rampup phase statements by adding this set of
|
||||
tags to the statements, either directly, or by block:
|
||||
|
||||
tags:
|
||||
block: rampup
|
||||
|
||||
## Main phase
|
||||
|
||||
The main phase of a nosqlbench scenario is the one during which you really
|
||||
care about the metric. This is the actual test that everything else has
|
||||
prepared your system for.
|
||||
|
||||
You can mark statement as schema phase statements by adding this set of
|
||||
tags to the statements, either directly, or by block:
|
||||
|
||||
tags:
|
||||
block: main
|
@ -1,55 +0,0 @@
|
||||
---
|
||||
title: 01 Statement Templates
|
||||
weight: 01
|
||||
---
|
||||
|
||||
# Statement Templates
|
||||
|
||||
A valid config file for an activity consists of statement templates, parameters for them, bindings to generate the data
|
||||
to use with them, and tags for organizing them.
|
||||
|
||||
In essence, the config format is *all about configuring statements*. Every other element in the config format is in some
|
||||
way modifying or otherwise helping create statements to be used in an activity.
|
||||
|
||||
Statement templates are the single most important part of a YAML config.
|
||||
|
||||
```yaml
|
||||
# a single statement
|
||||
statements:
|
||||
- a single statement body
|
||||
```
|
||||
|
||||
This is a valid activity YAML file in and of itself. It has a single statement template.
|
||||
|
||||
It is up to the individual activity types like _cql_, or _stdout_ to interpret the statement template in some way. The
|
||||
example above is valid as a statement in the stdout activity, but it does not produce a valid CQL statement with the CQL
|
||||
activity type. The contents of the statement template are free form text. If the statement template is valid CQL, then
|
||||
the CQL activity type can use it without throwing an error. Each activity type determines what a statement means, and
|
||||
how it will be used.
|
||||
|
||||
You can provide multiple statements, and you can use the YAML pipe to put them on multiple lines, indented a little
|
||||
further in:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- |
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
- |
|
||||
submit job {alpha} on queue {beta} with options {gamma};
|
||||
```
|
||||
|
||||
Statements can be named:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- s1: |
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
- s2: |
|
||||
submit job {alpha} on queue {beta} with options {gamma};
|
||||
```
|
||||
|
||||
Actually, every statement in a YAML has a name. If you don't provide one, then a name is auto-generated for the
|
||||
statement based on its position in the YAML file.
|
||||
|
@ -1,118 +0,0 @@
|
||||
---
|
||||
title: 02 Data Bindings
|
||||
weight: 02
|
||||
---
|
||||
|
||||
# Data Bindings
|
||||
|
||||
Procedural data generation is built-in to the nosqlbench runtime by way of the
|
||||
[Virtual DataSet](http://virtdata.io/) library. This allows us to create named data generation recipes. These named
|
||||
recipes for generated data are called bindings. Procedural generation for test data has
|
||||
[many benefits](http://docs.virtdata.io/why_virtdata/why_virtdata/) over shipping bulk test data around, including speed
|
||||
and deterministic behavior. With the VirtData approach, most of the hard work is already done for us. We just have to
|
||||
pull in the recipes we want.
|
||||
|
||||
You can add a bindings section like this:
|
||||
|
||||
```yaml
|
||||
bindings:
|
||||
alpha: Identity()
|
||||
beta: NumberNameToString()
|
||||
gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;')
|
||||
delta: WeightedStrings('one:1;six:6;three:3;')
|
||||
```
|
||||
|
||||
This is a YAML map which provides names and function specifiers. The specifier named _alpha_ provides a function that
|
||||
takes an input value and returns the same value. Together, the name and value constitute a binding named alpha. All of
|
||||
the four bindings together are called a bindings set.
|
||||
|
||||
The above bindings block is also a valid activity YAML, at least for the _stdout_ activity type. The _stdout_ activity
|
||||
can construct a statement template from the provided bindings if needed, so this is valid:
|
||||
|
||||
```text
|
||||
[test]$ cat > stdout-test.yaml
|
||||
bindings:
|
||||
alpha: Identity()
|
||||
beta: NumberNameToString()
|
||||
gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;')
|
||||
delta: WeightedStrings('one:1;six:6;three:3;')
|
||||
# EOF (control-D in your terminal)
|
||||
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
|
||||
0,zero,00A_pro,six
|
||||
1,one,00B_pro,six
|
||||
2,two,00C_pro,three
|
||||
3,three,00D_pro,three
|
||||
4,four,00E_pro,six
|
||||
5,five,00F_pro,six
|
||||
6,six,00G_pro,six
|
||||
7,seven,00H_pro,six
|
||||
8,eight,00I_pro,six
|
||||
9,nine,00J_pro,six
|
||||
```
|
||||
|
||||
Above, you can see that the stdout activity type is idea for experimenting with data generation recipes. It uses the
|
||||
default `format=csv` parameter above, but it also supports formats like json, inlinejson, readout, and assignments.
|
||||
|
||||
This is all you need to provide a formulaic recipe for converting an ordinal value to a set of field values. Each time
|
||||
nosqlbench needs to create a set of values as parameters to a statement, the functions are called with an input, known
|
||||
as the cycle. The functions produce a set of named values that, when combined with a statement template, can yield an
|
||||
individual statement for a database operation. In this way, each cycle represents a specific operation. Since the
|
||||
functions above are pure functions, the cycle number of an operation will always produce the same operation, thus making
|
||||
all nosqlbench workloads deterministic.
|
||||
|
||||
In the example above, you can see the cycle numbers down the left.
|
||||
|
||||
If you combine the statement section and the bindings sections above into one activity yaml, you get a slightly
|
||||
different result, as the bindings apply to the statements that are provided, rather than creating a default statement
|
||||
for the bindings. See the example below:
|
||||
|
||||
```text
|
||||
[test]$ cat > stdout-test.yaml
|
||||
statements:
|
||||
- |
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
- |
|
||||
submit job {alpha} on queue {beta} with options {gamma};
|
||||
bindings:
|
||||
alpha: Identity()
|
||||
beta: NumberNameToString()
|
||||
gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;')
|
||||
delta: WeightedStrings('one:1;six:6;three:3;')
|
||||
# EOF (control-D in your terminal)
|
||||
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
submit job 1 on queue one with options 00B_pro;
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
submit job 3 on queue three with options 00D_pro;
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
submit job 5 on queue five with options 00F_pro;
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
submit job 7 on queue seven with options 00H_pro;
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
submit job 9 on queue nine with options 00J_pro;
|
||||
```
|
||||
|
||||
There are a few things to notice here. First, the statements that are executed are automatically alternated between. If
|
||||
you had 10 different statements listed, they would all get their turn with 10 cycles. Since there were two, each was run
|
||||
5 times.
|
||||
|
||||
Also, the statement that had named anchors acted as a template, whereas the other one was evaluated just as it was. In
|
||||
fact, they were both treated as templates, but one of them had no anchors.
|
||||
|
||||
On more minor but important detail is that the fourth binding *delta* was not referenced directly in the statements.
|
||||
Since the statements did not pair up an anchor with this binding name, it was not used. No values were generated for it.
|
||||
|
||||
This is how activities are expected to work when they are implemented correctly. This means that the bindings themselves
|
||||
are templates for data generation, only to be used when necessary. This means that the bindings that are defined around
|
||||
a statement are more like a menu for the statement. If the statement uses those bindings with `{named}` anchors, then
|
||||
the recipes will be used to construct data when that statement is selected for a specific cycle. The cycle number both
|
||||
selects the statement (via the op sequence) and also provides the input value at the left side of the binding functions.
|
||||
|
@ -1,28 +0,0 @@
|
||||
---
|
||||
title: 03 Statement Params
|
||||
weight: 03
|
||||
|
||||
---
|
||||
|
||||
# Statement Parameters
|
||||
|
||||
Statements within a YAML can be accessorized with parameters. These are known as _statement params_ and are different
|
||||
than the parameters that you use at the activity level. They apply specifically to a statement template, and are
|
||||
interpreted by an activity type when the statement template is used to construct a native statement form.
|
||||
|
||||
For example, the statement parameter `ratio` is used when an activity is initialized to construct the op sequence. In
|
||||
the _cql_ activity type, the statement parameter `prepared` is a boolean that can be used to designated when a CQL
|
||||
statement should be prepared or not.
|
||||
|
||||
As with the bindings, a params section can be added at the same level, setting additional parameters to be used with
|
||||
statements. Again, this is an example of modifying or otherwise creating a specific type of statement, but always in a
|
||||
way specific to the activity type. Params can be thought of as statement properties. As such, params don't really do
|
||||
much on their own, although they have the same basic map syntax as bindings:
|
||||
|
||||
```yaml
|
||||
params:
|
||||
ratio: 1
|
||||
```
|
||||
|
||||
As with statements, it is up to each activity type to interpret params in a useful way.
|
||||
|
@ -1,92 +0,0 @@
|
||||
---
|
||||
title: 04 Statement Tags
|
||||
weight: 04
|
||||
---
|
||||
|
||||
# Statement Tags
|
||||
|
||||
Tags are used to mark and filter groups of statements for controlling which ones get used in a given scenario. Tags are
|
||||
generally free-form, but there is a set of conventions that can make your testing easier.
|
||||
|
||||
An example:
|
||||
|
||||
```yaml
|
||||
tags:
|
||||
name: foxtrot
|
||||
unit: bravo
|
||||
```
|
||||
|
||||
### Tag Filtering
|
||||
|
||||
The tag filters provide a flexible set of conventions for filtering tagged statements. Tag filters are usually provided
|
||||
as an activity parameter when an activity is launched. The rules for tag filtering are (updated in version 3.12):
|
||||
|
||||
0. If no conjugate is specified, `all(...)` is assumed. This is in keeping with the previous default. If you do specify
|
||||
a conjugate wrapper around the tag filter, it must be in the above form. `all(...)`, `any(...)`, and `none(...)` are
|
||||
allowed.
|
||||
1. If no tag filter is specified, then the statement matches.
|
||||
2. A tag name predicate like `tags=name` asserts the presence of a specific tag name, regardless of its value.
|
||||
3. A tag value predicate like `tags=name:foxtrot` asserts the presence of a specific tag name and a specific value for it.
|
||||
4. A tag pattern predicate like `tags=name:'fox.*'` asserts the presence of a specific tag name and a value that matches
|
||||
the provided regular expression.
|
||||
5. Multiple tag predicates may be specified as in `tags=name:'fox.*',unit:bravo`
|
||||
6.
|
||||
0. If the `all` conjugate form is used (the default), then if any predicate fails to match a tagged element, then the
|
||||
whole tag filtering expression fails to match.
|
||||
1. If the `any` conjugate form is used, then if all predicates fail to match a tagged element, then the whole tag filtering
|
||||
expression fails to match.
|
||||
2. If the `none` conjugate form is used, then if any predicate _matches_, a tagged element, then the whole expression
|
||||
matches.
|
||||
|
||||
A demonstration:
|
||||
|
||||
```text
|
||||
[test]$ cat > stdout-test.yaml
|
||||
tags:
|
||||
name: foxtrot
|
||||
unit: bravo
|
||||
statements:
|
||||
- "I'm alive!\n"
|
||||
# EOF (control-D in your terminal)
|
||||
|
||||
# no tag filter matches any
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test
|
||||
I'm alive!
|
||||
|
||||
# tag name assertion matches
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags=name
|
||||
I'm alive!
|
||||
|
||||
# tag name assertion does not match
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags=name2
|
||||
02:25:28.158 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
|
||||
|
||||
# tag value assertion does not match
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:bravo
|
||||
02:25:42.584 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
|
||||
|
||||
# tag value assertion matches
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:foxtrot
|
||||
I'm alive!
|
||||
|
||||
# tag pattern assertion matches
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:'fox.*'
|
||||
I'm alive!
|
||||
|
||||
# tag pattern assertion does not match
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags=name:'tango.*'
|
||||
02:26:05.149 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
|
||||
|
||||
# compound tag predicate matches every assertion
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags='name=fox.*',unit=bravo
|
||||
I'm alive!
|
||||
|
||||
# compound tag predicate does not fully match
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags='name=fox.*',unit=delta
|
||||
11:02:53.490 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured.
|
||||
|
||||
# any(...) form will work as long as one of the tags match
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test tags='any(name=fox.*,thisone:wontmatch)',unit=bravo
|
||||
I'm alive!
|
||||
```
|
||||
|
@ -1,48 +0,0 @@
|
||||
---
|
||||
title: 05 Statement Blocks
|
||||
weight: 05
|
||||
---
|
||||
|
||||
# Statement Blocks
|
||||
|
||||
All the basic primitives described above (names, statements, bindings, params, tags) can be used to describe and
|
||||
parameterize a set of statements in a yaml document. In some scenarios, however, you may need to structure your
|
||||
statements in a more sophisticated way. You might want to do this if you have a set of common statement forms or
|
||||
parameters that need to apply to many statements, or perhaps if you have several *different* groups of statements that
|
||||
need to be configured independently.
|
||||
|
||||
This is where blocks become useful:
|
||||
|
||||
```text
|
||||
[test]$ cat > stdout-test.yaml
|
||||
bindings:
|
||||
alpha: Identity()
|
||||
beta: Combinations('u;n;u;s;e;d;')
|
||||
blocks:
|
||||
- statements:
|
||||
- "{alpha},{beta}\n"
|
||||
bindings:
|
||||
beta: Combinations('b;l;o;c;k;1;-;COMBINATIONS;')
|
||||
- statements:
|
||||
- "{alpha},{beta}\n"
|
||||
bindings:
|
||||
beta: Combinations('b;l;o;c;k;2;-;COMBINATIONS;')
|
||||
# EOF (control-D in your terminal)
|
||||
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
|
||||
0,block1-C
|
||||
1,block2-O
|
||||
2,block1-M
|
||||
3,block2-B
|
||||
4,block1-I
|
||||
5,block2-N
|
||||
6,block1-A
|
||||
7,block2-T
|
||||
8,block1-I
|
||||
9,block2-O
|
||||
```
|
||||
|
||||
This shows a couple of important features of blocks. All blocks inherit defaults for bindings, params, and tags from the
|
||||
root document level. Any of these values that are defined at the base document level apply to all blocks contained in
|
||||
that document, unless specifically overridden within a given block.
|
||||
|
@ -1,289 +0,0 @@
|
||||
---
|
||||
title: 06 More on Statements
|
||||
weight: 06
|
||||
---
|
||||
|
||||
# More on Statements
|
||||
|
||||
The template forms available in nosqlbench are very flexible. That means that there are multiple ways
|
||||
of expressing templates for statements or operations. Thankfully, in most cases, the forms look like
|
||||
what they do, and most of the ways you can imagine constructing a statement will simply work, as long
|
||||
as the required details are provided for which driver you are using.
|
||||
|
||||
## Statement Delimiting
|
||||
|
||||
Sometimes, you want to specify the text of a statement in different ways. Since statements are strings, the simplest way
|
||||
for small statements is in double quotes. If you need to express a much longer statement with special characters an
|
||||
newlines, then you can use YAML's literal block notation (signaled by the '|' character) to do so:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- |
|
||||
This is a statement, and the file format doesn't
|
||||
know how statements will be used!
|
||||
- |
|
||||
submit job {alpha} on queue {beta} with options {gamma};
|
||||
```
|
||||
|
||||
Notice that the block starts on the following line after the pipe symbol. This is a very popular form in practice
|
||||
because it treats the whole block exactly as it is shown, except for the initial indentations, which are removed.
|
||||
|
||||
Statements in this format can be raw statements, statement templates, or anything that is appropriate for the specific
|
||||
activity type they are being used with. Generally, the statements should be thought of as a statement form that you want
|
||||
to use in your activity -- something that has place holders for data bindings. These place holders are called *named
|
||||
anchors*. The second line above is an example of a statement template, with anchors that can be replaced by data for
|
||||
each cycle of an activity.
|
||||
|
||||
There is a variety of ways to represent block statements, with folding, without, with the newline removed, with it
|
||||
retained, with trailing newlines trimmed or not, and so forth. For a more comprehensive guide on the YAML conventions
|
||||
regarding multi-line blocks, see
|
||||
[YAML Spec 1.2, Chapter 8, Block Styles](http://www.yaml.org/spec/1.2/spec.html#Block)
|
||||
|
||||
## Statement Sequences
|
||||
|
||||
To provide a degree of flexibility to the user for statement definitions, multiple statements may be provided together
|
||||
as a sequence.
|
||||
|
||||
```yaml
|
||||
# a list of statements
|
||||
statements:
|
||||
- "This a statement."
|
||||
- "The file format doesn't know how statements will be used."
|
||||
- "submit job {job} on queue {queue} with options {options};"
|
||||
|
||||
# an ordered map of statements by name
|
||||
statements:
|
||||
name1: statement one
|
||||
name2: "statement two"
|
||||
```
|
||||
|
||||
In the first form, the names are provided automatically by the YAML loader. In the second form, they are specified as
|
||||
ordered map keys.
|
||||
|
||||
## Statement Properties
|
||||
|
||||
You can also configure individual statements with named properties, using the **statement properties** form:
|
||||
|
||||
```yaml
|
||||
# a list of statements with properties
|
||||
statements:
|
||||
- name: name1
|
||||
stmt: statement one
|
||||
- name: name2
|
||||
stmt: statement two
|
||||
```
|
||||
|
||||
This is the most flexible configuration format at the statement level. It is also the most verbose. Because this format
|
||||
names each property of the statement, it allows for other properties to be defined at this level as well. This includes
|
||||
all of the previously described configuration elements: `name`, `bindings`, `params`, `tags`, and additionally `stmt`. A
|
||||
detailed example follows:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- name: foostmt
|
||||
stmt: "{alpha},{beta}\n"
|
||||
bindings:
|
||||
beta: Combinations('COMBINATIONS;')
|
||||
params:
|
||||
parm1: pvalue1
|
||||
tags:
|
||||
tag1: tvalue1
|
||||
freeparam3: a value, as if it were assigned under the params block.
|
||||
```
|
||||
|
||||
In this case, the values for `bindings`, `params`, and `tags` take precedence, overriding those set by the enclosing
|
||||
block or document or activity when the names match. Parameters called **free parameters** are allowed here, such as
|
||||
`freeparam3`. These are simply values that get assigned to the params map once all other processing has completed.
|
||||
|
||||
## Named Statement form
|
||||
|
||||
It is possible to mix the **`<name>: <statement>`** form as above in the example for mapping statement by name, so long
|
||||
as some specific rules are followed. An example, which is equivalent to the above:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- foostmt: "{alpha},{beta}\n"
|
||||
parm1: pvalue1
|
||||
bindings:
|
||||
beta: Combinations('COMBINATIONS;')
|
||||
tags:
|
||||
tag1: tvalue1
|
||||
```
|
||||
|
||||
The rules:
|
||||
|
||||
1. You must avoid using both the name property and the initial
|
||||
**`<name>: <statement>`** together. Doing so will cause an error to be thrown.
|
||||
2. Do not use the **`<name>: <statement>`** form in combination with a
|
||||
**`stmt: <statement>`** property. It is not possible to detect if this occurs. Use caution if you choose to mix these forms.
|
||||
|
||||
As explained above, `parm1: pvalue1` is a *free parameter*, and is simply short-hand for setting values in the params
|
||||
map for the statement.
|
||||
|
||||
|
||||
## Named Statement Maps
|
||||
|
||||
By combining all the forms together with a map in the middle, we get this form, which allows for the
|
||||
enumeration of multiple statements, each with an obvious name, and a set of properties:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- foostmt:
|
||||
stmt: "{alpha},{beta}\n"
|
||||
parm1: pvalue1
|
||||
bindings:
|
||||
beta: Combinations('COMBINATIONS;')
|
||||
tags:
|
||||
tag1: tvalue1
|
||||
- barstmt:
|
||||
optype: setvar
|
||||
parm3: 42
|
||||
parm5: true
|
||||
userid: 2342
|
||||
```
|
||||
|
||||
This form is arguably the easiest to read, but retains all the expressive power of the other forms too.
|
||||
The distinction between this form and the named properties form is that the structure underneath the
|
||||
first value is a map rather than a single value. Particularly, under the 'foostmt' name above, all of
|
||||
content contained within it is formatted as properties of it -- indented properties.
|
||||
|
||||
Here are the basic rules for using this form:
|
||||
|
||||
1. Each statement is indicated by a YAML list entry like '-'.
|
||||
2. Each entry is a map with a single key. This key is taken as the statement name.
|
||||
3. The properties of this map work exactly the same as for named properties above, but repeating
|
||||
the name will throw an error since this is ambiguous.
|
||||
4. If the template is being used for CQL or another driver type which expects a 'stmt' property,
|
||||
it must be provided as an explicitly named 'stmt' property as in the foostmt example above.
|
||||
|
||||
Notice in the 'barstmt' example above that there is no "stmt" property. Some drivers
|
||||
have more flexible op templates may not require this. This is just a property name that was chosen
|
||||
to represent the "main body" of a statement template in the shorter YAML forms. While the 'stmt'
|
||||
property is required for drivers like CQL which have a solid concept for "statement body", it isn't
|
||||
required for all driver types which may build their operations from other properties.
|
||||
|
||||
### Per-Statement Format
|
||||
|
||||
It is indeed possible to use any of the three statement formats within each entry of a statement sequence:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- first statement body
|
||||
- name: statement3
|
||||
stmt: third statement body
|
||||
- second: second statement body
|
||||
- forth: fourth statement body
|
||||
freeparam1: freeparamvalue1
|
||||
tags:
|
||||
type: preload
|
||||
- fifth:
|
||||
stmt: fifth statement body
|
||||
freeparam2: freeparamvalue2
|
||||
tags:
|
||||
tag2: tagvalue2
|
||||
```
|
||||
|
||||
The above is valid nosqlbench YAML, although a reader would need to know about the rules explained above in order to
|
||||
really make sense of it. For most cases, it is best to follow one format convention, but there is flexibility for
|
||||
overrides and naming when you need it. The main thing to remember is that the statement form is determined on an
|
||||
element-by-element basis for maximum flexibility.
|
||||
|
||||
## Detailed Examples
|
||||
|
||||
The above examples are explained in detail below in JSON schematic form, to assist users and developers
|
||||
understanding of the structural rules:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
|
||||
# string form
|
||||
# detected when the element is a single string value
|
||||
|
||||
- first statement body
|
||||
|
||||
# read as:
|
||||
# {
|
||||
# name: 'stmt1', // a generated name is also added
|
||||
# stmt: 'first stmt body'
|
||||
# }
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
|
||||
# properties form
|
||||
|
||||
# detected when the element is a map and the value of the first entry is not a map
|
||||
|
||||
- name: statement3
|
||||
stmt: third statement body
|
||||
|
||||
# read as:
|
||||
# {
|
||||
# name: 'statement3',
|
||||
# stmt: 'third statement body'
|
||||
# }
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
|
||||
# named statement form:
|
||||
# detected when reading properties form and the first property name is not a reserved
|
||||
# word, like stmt, name, params, bindings, tags, ...
|
||||
|
||||
- second: second statement body
|
||||
|
||||
# read as:
|
||||
# {
|
||||
# name: 'second',
|
||||
# stmt: 'second statement body'
|
||||
# }
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
|
||||
# properties form with free parameters:
|
||||
# detected when properties are used which are not reserved words.
|
||||
# Unrecognized words are pushed into the parameters map automatically.
|
||||
|
||||
- forth: fourth statement body
|
||||
freeparam1: freeparamvalue1
|
||||
tags:
|
||||
type: preload
|
||||
|
||||
# read as:
|
||||
# {
|
||||
# name: 'fourth',
|
||||
# stmt: 'fourth statement body',
|
||||
# params: {
|
||||
# freeparam1: 'freeparamvalue1'
|
||||
# },
|
||||
# tags: {
|
||||
# tag2: 'tagvalue2'
|
||||
# }
|
||||
# }
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
|
||||
# named statement maps
|
||||
# detected when the element is a map and the only entry is a map.
|
||||
|
||||
- fifth:
|
||||
stmt: fifth statement body
|
||||
freeparam2: freeparamvalue2
|
||||
tags:
|
||||
tag2: tagvalue2
|
||||
|
||||
# read as:
|
||||
# {
|
||||
# name: 'fifth',
|
||||
# stmt: 'fifth statement body'
|
||||
# params: {
|
||||
# freeparam2: 'freeparamvalue2'
|
||||
# },
|
||||
# tags: {
|
||||
# tag2: 'tagvalue2'
|
||||
# }
|
||||
# }
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
```
|
||||
|
@ -1,58 +0,0 @@
|
||||
---
|
||||
title: 07 Multi-Docs
|
||||
weight: 07
|
||||
---
|
||||
|
||||
# Multi-Docs
|
||||
|
||||
The YAML spec allows for multiple yaml documents to be concatenated in the
|
||||
same file with a separator:
|
||||
|
||||
```yaml
|
||||
---
|
||||
```
|
||||
|
||||
This offers an additional convenience when configuring activities. If you
|
||||
want to parameterize or tag some a set of statements with their own
|
||||
bindings, params, or tags, but alongside another set of uniquely
|
||||
configured statements, you need only put them in separate logical
|
||||
documents, separated by a triple-dash.
|
||||
|
||||
For example:
|
||||
|
||||
```text
|
||||
[test]$ cat > stdout-test.yaml
|
||||
bindings:
|
||||
docval: WeightedStrings('doc1.1:1;doc1.2:2;')
|
||||
statements:
|
||||
- "doc1.form1 {docval}\n"
|
||||
- "doc1.form2 {docval}\n"
|
||||
---
|
||||
bindings:
|
||||
numname: NumberNameToString()
|
||||
statements:
|
||||
- "doc2.number {numname}\n"
|
||||
# EOF (control-D in your terminal)
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test cycles=10
|
||||
doc1.form1 doc1.1
|
||||
doc1.form2 doc1.2
|
||||
doc2.number two
|
||||
doc1.form1 doc1.2
|
||||
doc1.form2 doc1.1
|
||||
doc2.number five
|
||||
doc1.form1 doc1.2
|
||||
doc1.form2 doc1.2
|
||||
doc2.number eight
|
||||
doc1.form1 doc1.1
|
||||
```
|
||||
|
||||
This shows that you can use the power of blocks and tags together at one
|
||||
level and also allow statements to be broken apart into a whole other
|
||||
level of partitioning if desired.
|
||||
|
||||
**WARNING:**
|
||||
The multi-doc support is there as a ripcord when you need it. However, it
|
||||
is strongly advised that you keep your YAML workloads simple to start and
|
||||
only use features like the multi-doc when you absolutely need it. For
|
||||
this, blocks are generally a better choice. See examples in the standard
|
||||
workloads.
|
@ -1,37 +0,0 @@
|
||||
---
|
||||
title: 08 Template Params
|
||||
weight: 08
|
||||
---
|
||||
|
||||
# Template Params
|
||||
|
||||
All nosqlbench YAML formats support a parameter macro format that applies before YAML processing starts. It is a basic
|
||||
macro facility that allows named anchors to be placed in the document as a whole:
|
||||
|
||||
```text
|
||||
<<varname:defaultval>>
|
||||
# or
|
||||
TEMPLATE(varname,defaultval)
|
||||
```
|
||||
|
||||
In this example, the name of the parameter is `varname`. It is given a default value of `defaultval`. If an activity
|
||||
parameter named *varname* is provided, as in `varname=barbaz`, then this whole expression will be replaced with
|
||||
`barbaz`. If none is provided then the default value will be used instead. For example:
|
||||
|
||||
```text
|
||||
[test]$ cat > stdout-test.yaml
|
||||
statements:
|
||||
- "<<linetoprint:MISSING>>\n"
|
||||
# EOF (control-D in your terminal)
|
||||
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test cycles=1
|
||||
MISSING
|
||||
|
||||
[test]$ ./nb run driver=stdout workload=stdout-test cycles=1 linetoprint="THIS IS IT"
|
||||
THIS IS IT
|
||||
```
|
||||
|
||||
If an empty value is desired by default, then simply use an empty string in your template, like `<<varname:>>` or
|
||||
`TEMPLATE(varname,)`.
|
||||
|
||||
|
@ -1,39 +0,0 @@
|
||||
---
|
||||
title: 09 Statement Naming
|
||||
weight: 09
|
||||
---
|
||||
|
||||
# Statement Naming
|
||||
|
||||
Docs, Blocks, and Statements can all have names:
|
||||
|
||||
```yaml
|
||||
name: doc1
|
||||
blocks:
|
||||
- name: block1
|
||||
statements:
|
||||
- stmt1: statement1
|
||||
- name: st2
|
||||
stmt: statement2
|
||||
---
|
||||
name: doc2
|
||||
...
|
||||
```
|
||||
|
||||
This provides a layered naming scheme for the statements themselves. It is
|
||||
not usually important to name things except for documentation or metric
|
||||
naming purposes.
|
||||
|
||||
If no names are provided, then names are automatically created for blocks
|
||||
and statements. Statements assigned at the document level are assigned
|
||||
to "block0". All other statements are named with the
|
||||
format `doc#--block#--stmt#`.
|
||||
|
||||
For example, the full name of statement1 above would
|
||||
be `doc1--block1--stmt1`.
|
||||
|
||||
**NOTE:**
|
||||
If you anticipate wanting to get metrics for a specific statement in
|
||||
addition to the other metrics, then you will want to adopt the habit of
|
||||
naming all your statements something basic and descriptive.
|
||||
|
@ -1,292 +0,0 @@
|
||||
---
|
||||
title: 10 Named Scenarios
|
||||
weight: 10
|
||||
---
|
||||
|
||||
# Named Scenarios
|
||||
|
||||
There is one final element of a yaml that you need to know about: _named
|
||||
scenarios_.
|
||||
|
||||
**Named Scenarios allow anybody to run your testing workflows with a
|
||||
single command.**
|
||||
|
||||
You can provide named scenarios for a workload like this:
|
||||
|
||||
```yaml
|
||||
# contents of myworkloads.yaml
|
||||
scenarios:
|
||||
default:
|
||||
- run driver=diag cycles=10 alias=first-ten
|
||||
- run driver=diag cycles=10..20 alias=second-ten
|
||||
longrun:
|
||||
- run driver=diag cycles=10M
|
||||
```
|
||||
|
||||
This provides a way to specify more detailed workflows that users may want
|
||||
to run without them having to build up a command line for themselves.
|
||||
|
||||
A couple of other forms are supported in the YAML, for terseness:
|
||||
|
||||
```yaml
|
||||
scenarios:
|
||||
oneliner: run driver=diag cycles=10
|
||||
mapform:
|
||||
part1: run driver=diag cycles=10 alias=part2
|
||||
part2: run driver=diag cycles=20 alias=part2
|
||||
```
|
||||
|
||||
These forms simply provide finesse for common editing habits, but they are
|
||||
automatically read internally as a list. In the map form, the names are
|
||||
discarded, but they may be descriptive enough for use as inline docs for
|
||||
some users. The order is retained as listed, since the names have no
|
||||
bearing on the order.
|
||||
|
||||
## Scenario selection
|
||||
|
||||
When a named scenario is run, it is *always* named, so that it can be
|
||||
looked up in the list of named scenarios under your `scenarios:` property.
|
||||
The only exception to this is when an explicit scenario name is not found
|
||||
on the command line, in which case it is automatically assumed to be _
|
||||
default_.
|
||||
|
||||
Some examples may be more illustrative:
|
||||
|
||||
```text
|
||||
# runs the scenario named 'default' if it exists, or throws an error if it does not.
|
||||
nb myworkloads
|
||||
# or
|
||||
nb myworkloads default
|
||||
|
||||
# runs the named scenario 'longrun' if it exists, or throws an error if it does not.
|
||||
nb myworkloads longrun
|
||||
|
||||
# runs the named scenario 'longrun' if it exists, or throws an error if it does not.
|
||||
# this is simply the canonical form which is more verbose, but more explicit.
|
||||
nb scenario myworkloads longrun
|
||||
|
||||
# run multiple named scenarios from one workload, and then some from another
|
||||
nb scenario myworkloads longrun default longrun scenario another.yaml name1 name2
|
||||
# In this form ^ you may have to add the explicit form to avoid conflicts between
|
||||
# workload names and scenario names. That's why the explicit form is provided, afterall.
|
||||
```
|
||||
|
||||
## Workload selection
|
||||
|
||||
The examples above contain no reference to a workload (formerly called _
|
||||
yaml_). They don't need to, as they refer to themselves implicitly. You
|
||||
may add a `workload=` parameter to the command templates if you like, but
|
||||
this is never needed for basic use, and it is error prone to keep the
|
||||
filename matched to the command template. Just leave it out by default.
|
||||
|
||||
_However_, if you are doing advanced scripting across multiple systems,
|
||||
you can actually provide a `workload=` parameter particularly to use
|
||||
another workload description in your test.
|
||||
|
||||
**NOTE:**
|
||||
This is a powerful feature for workload automation and organization.
|
||||
However, it can get unweildy quickly. Caution is advised for deep-linking
|
||||
too many scenarios in a workspace, as there is no mechanism for keeping
|
||||
them in sync when small changes are made.
|
||||
|
||||
## Named Scenario Discovery
|
||||
|
||||
For named scenarios, there is a way for users to find all the named
|
||||
scenarios that are currently bundled or in view of their current
|
||||
directory. A couple simple rules must be followed by scenario publishers
|
||||
in order to keep things simple:
|
||||
|
||||
1. Workload files in the current directory `*.yaml` are considered.
|
||||
2. Workload files under in the relative path `activities/` with
|
||||
name `*.yaml` are considered.
|
||||
3. The same rules are used when looking in the bundled nosqlbench, so
|
||||
built-ins come along for the ride.
|
||||
4. Any workload file that contains a `scenarios:` tag is included, but all
|
||||
others are ignored.
|
||||
|
||||
This doesn't mean that you can't use named scenarios for workloads in
|
||||
other locations. It simply means that when users use
|
||||
the `--list-scenarios` option, these are the only ones they will see
|
||||
listed.
|
||||
|
||||
## Parameter Overrides
|
||||
|
||||
You can override parameters that are provided by named scenarios. Any
|
||||
parameter that you specify on the command line after your workload and
|
||||
optional scenario name will be used to override or augment the commands
|
||||
that are provided for the named scenario.
|
||||
|
||||
This is powerful, but it also means that you can sometimes munge
|
||||
user-provided activity parameters on the command line with the named
|
||||
scenario commands in ways that may not make sense. To solve this, the
|
||||
parameters in the named scenario commands may be locked. You can lock them
|
||||
silently, or you can provide a verbose locking that will cause an error if
|
||||
the user even tries to adjust them.
|
||||
|
||||
Silent locking is provided with a form like `param==value`. Any silent
|
||||
locked parameters will reject overrides from the command line, but will
|
||||
not interrupt the user.
|
||||
|
||||
Verbose locking is provided with a form like `param===value`. Any time a
|
||||
user provides a parameter on the command line for the named parameter, an
|
||||
error is thrown and they are informed that this is not possible. This
|
||||
level is provided for cases in which you would not want the user to be
|
||||
unaware of an unset parameter which is germain and specific to the named
|
||||
scenario.
|
||||
|
||||
All other parameters provided by the user will take the place of the
|
||||
same-named parameters provided in *each* command templates, in the order
|
||||
they appear in the template. Any other parameters provided by the user
|
||||
will be added to *each*
|
||||
of the command templates in the order they appear on the command line.
|
||||
|
||||
This is a little counter-intuitive at first, but once you see some
|
||||
examples it should make sense.
|
||||
|
||||
## Parameter Override Examples
|
||||
|
||||
Consider a simple workload with three named scenarios:
|
||||
|
||||
```yaml
|
||||
# basics.yaml
|
||||
scenarios:
|
||||
s1: run driver=stdout cycles=10
|
||||
s2: run driver=stdout cycles==10
|
||||
s3: run driver=stdout cycles===10
|
||||
|
||||
bindings:
|
||||
c: Identity()
|
||||
|
||||
statements:
|
||||
- A: "cycle={c}\n"
|
||||
```
|
||||
|
||||
Running this with no options prompts the user to select one of the named
|
||||
scenarios:
|
||||
|
||||
```text
|
||||
$ nb basics
|
||||
ERROR: Unable to find named scenario 'default' in workload 'basics', but you can pick from s1,s2,s3
|
||||
$
|
||||
```
|
||||
|
||||
### Basic Override example
|
||||
|
||||
If you run the first scenario `s1` with your own value for `cycles=7`, it
|
||||
does as you ask:
|
||||
|
||||
```text
|
||||
$ nb basics s1 cycles=7
|
||||
Logging to logs/scenario_20200324_205121_554.log
|
||||
cycle=0
|
||||
cycle=1
|
||||
cycle=2
|
||||
cycle=3
|
||||
cycle=4
|
||||
cycle=5
|
||||
cycle=6
|
||||
$
|
||||
```
|
||||
|
||||
### Silent Locking example
|
||||
|
||||
If you run the second scenario `s2` with your own value for `cycles=7`,
|
||||
then it does what the locked parameter
|
||||
`cycles==10` requires, without telling you that it is ignoring the
|
||||
specified value on your command line.
|
||||
|
||||
```text
|
||||
$ nb basics s2 cycles=7
|
||||
Logging to logs/scenario_20200324_205339_486.log
|
||||
cycle=0
|
||||
cycle=1
|
||||
cycle=2
|
||||
cycle=3
|
||||
cycle=4
|
||||
cycle=5
|
||||
cycle=6
|
||||
cycle=7
|
||||
cycle=8
|
||||
cycle=9
|
||||
$
|
||||
```
|
||||
|
||||
Sometimes, this is appropriate, such as when specifying settings
|
||||
like `threads==` for schema phases.
|
||||
|
||||
### Verbose Locking example
|
||||
|
||||
If you run the third scenario `s3` with your own value for `cycles=7`,
|
||||
then you will get an error telling you that this is not possible.
|
||||
Sometimes you want to make sure tha the user knows a parameter should not
|
||||
be changed, and that if they want to change it, they'll have to make their
|
||||
own custom version of the scenario in question.
|
||||
|
||||
```text
|
||||
$ nb basics s3 cycles=7
|
||||
ERROR: Unable to reassign value for locked param 'cycles===7'
|
||||
$
|
||||
```
|
||||
|
||||
Ultimately, it is up to the scenario designer when to lock parameters for
|
||||
users. The built-in workloads offer some examples on how to set these
|
||||
parameters so that the right value are locked in place without bother the
|
||||
user, but some values are made very clear in how they should be set.
|
||||
Please look at these examples for inspiration when you need.
|
||||
|
||||
## Forcing Undefined (default) Parameters
|
||||
|
||||
If you want to ensure that any parameter in a named scenario template
|
||||
remains unset in the generated scenario script, you can assign it a value
|
||||
of UNDEF. The locking behaviors described above apply to this one as well.
|
||||
Thus, for schema commands which rely on the default sequence length (which
|
||||
is based on the number of active statements), you can set cycles==UNDEF to
|
||||
ensure that when a user passes a cycles parameter the schema phase doesn't
|
||||
break with too many cycles.
|
||||
|
||||
## Automatic Parameters
|
||||
|
||||
Some parameters are already known due to the fact that you are using named
|
||||
scenarios.
|
||||
|
||||
### workload
|
||||
|
||||
The `workload` parameter is, by default, set to the logical path (fully
|
||||
qualified workload name) of the yaml file containing the named scenario.
|
||||
However, if the command template contains this parameter, it may be
|
||||
overridden by users as any other parameter depending on the assignment
|
||||
operators as explained above.
|
||||
|
||||
### alias
|
||||
|
||||
The `alias` parameter is, by default, set to the expanded name of
|
||||
WORKLOAD_SCENARIO_STEP, which means that each activity within the scenario
|
||||
has a distinct and symbolic name. This is important for distinguishing
|
||||
metrics from one another across workloads, named scenarios, and steps
|
||||
within a named scenario. The above words are interpolated into the alias
|
||||
as follows:
|
||||
|
||||
- WORKLOAD - The simple name part of the fully qualified workload name.
|
||||
For example, with a workload (yaml path) of foo/bar/baz.yaml, the
|
||||
WORKLOAD name used here would be `baz`.
|
||||
|
||||
- SCENARIO - The name of the scenario as provided on the command line.
|
||||
|
||||
- STEP - The name of the step in the named scenario. If you used the list
|
||||
or string forms to provide a command template, then the steps are
|
||||
automatically named as a zero-padded number representing the step in the
|
||||
named scenario, starting from `000`, per named scenario. (The numbers
|
||||
are not globally assigned)
|
||||
|
||||
Because it is important to have uniquely named activities for the sake of
|
||||
sane metrics and logging, any alias provided when using named scenarios
|
||||
which does not include the three tokens above will cause a warning to be
|
||||
issued to the user explaining why this is a bad idea.
|
||||
|
||||
**NOTE:**
|
||||
UNDEF is handled before alias expansion above, so it is possible to force
|
||||
the default activity naming behavior above with `alias===UNDEF`. This is
|
||||
generally recommended, and will inform users if they try to set the alias
|
||||
in an unsafe way.
|
||||
|
||||
|
@ -1,90 +0,0 @@
|
||||
---
|
||||
title: YAML Diagnostics
|
||||
weight: 99
|
||||
---
|
||||
|
||||
# YAML Diagnostics
|
||||
|
||||
This section describes errors that you might see if you have a YAML loading issue, and what you can do to fix them.
|
||||
|
||||
### Undefined Name-Statement Tuple
|
||||
|
||||
This exception is thrown when the statement body is not found in a statement definition in any of the supported formats.
|
||||
For example, the following block will cause an error:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- name: statement-foo
|
||||
params:
|
||||
aparam: avalue
|
||||
```
|
||||
|
||||
This is because `name` and `params` are reserved property names -- removed from the list of name-value pairs before free
|
||||
parameters are read. If the statement is not defined before free parameters are read, then the first free parameter is
|
||||
taken as the name and statement in `name: statement` form.
|
||||
|
||||
To correct this error, supply a statement property in the map, or simply replace the `name: statement-foo` entry with a
|
||||
`statement-foo: statement body` at the top of the map:
|
||||
|
||||
Either of these will work:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- name: statement-foo
|
||||
stmt: statement body
|
||||
params:
|
||||
aparam: avalue
|
||||
---
|
||||
statements:
|
||||
- statement-foo: statement body
|
||||
params:
|
||||
aparam: avalue
|
||||
```
|
||||
|
||||
In both cases, it is clear to the loader where the statement body should come from, and what (if any) explicit naming
|
||||
should occur.
|
||||
|
||||
### Redefined Name-Statement Tuple
|
||||
|
||||
This exception is thrown when the statement name is defined in multiple ways. This is an explicit exception to avoid
|
||||
possible ambiguity about which value the user intended. For example, the following statements definition will cause an
|
||||
error:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- name: name1
|
||||
name2: statement body
|
||||
```
|
||||
|
||||
This is an error because the statement is not defined before free parameters are read, and the `name: statement` form
|
||||
includes a second definition for the statement name. In order to correct this, simply remove the separate `name` entry,
|
||||
or use the `stmt` property to explicitly set the statement body. Either of these will work:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- name2: statement body
|
||||
---
|
||||
statements:
|
||||
- name: name1
|
||||
stmt: statement body
|
||||
```
|
||||
|
||||
In both cases, there is only one name defined for the statement according to the supported formats.
|
||||
|
||||
### YAML Parsing Error
|
||||
|
||||
This exception is thrown when the YAML format is not recognizable by the YAML parser. If you are not working from
|
||||
examples that are known to load cleanly, then please review your document for correctness according to the
|
||||
[YAML Specification]().
|
||||
|
||||
If you are sure that the YAML should load, then please
|
||||
[submit a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) with details on the type of YAML
|
||||
file you are trying to load.
|
||||
|
||||
### YAML Construction Error
|
||||
|
||||
This exception is thrown when the YAML was loaded, but the configuration object was not able to be constructed from the
|
||||
in-memory YAML document. If this error occurs, it may be a bug in the YAML loader implementation. Please
|
||||
[submit a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) with details on the type of YAML
|
||||
file you are trying to load.
|
||||
|
@ -1,50 +0,0 @@
|
||||
---
|
||||
title: Designing Workloads
|
||||
weight: 40
|
||||
---
|
||||
|
||||
# Designing Workloads
|
||||
|
||||
Workloads in nosqlbench are always controlled by a workload definition.
|
||||
Even the built-in workloads are simply pre-configured and controlled
|
||||
from a single YAML file which is bundled internally.
|
||||
|
||||
With nosqlbench a standard YAML configuration format is provided that is
|
||||
used across all activity types. This makes it easy to specify
|
||||
statements, statement parameters, data bindings, and tags. This section
|
||||
describes the standard YAML format and how to use it.
|
||||
|
||||
It is recommended that you read through the examples in each of the
|
||||
design sections in order. This guide was designed to give you a detailed
|
||||
understanding of workload construction with nosqlbench. The examples
|
||||
will also give you better insight into how nosqlbench works at a
|
||||
fundamental level.
|
||||
|
||||
## Multi-Protocol Support
|
||||
|
||||
You will notice that this guide is not overly CQL-specific. That is
|
||||
because nosqlbench is a multi-protocol tool. All that is needed for you
|
||||
to use this guide with other protocols is the release of more activity
|
||||
types. Try to keep that in mind as you think about designing workloads.
|
||||
|
||||
## Advice for new builders
|
||||
|
||||
### Review existing examples
|
||||
|
||||
The built-in workloads that are include with nosqlbench are also easy to copy out as a starting point. You just need to
|
||||
use two commands:
|
||||
|
||||
# find a workload you want to copy
|
||||
nb --list-workloads
|
||||
|
||||
|
||||
# copy a workload to your local directory
|
||||
nb --copy cql-iot
|
||||
|
||||
### Follow the conventions
|
||||
|
||||
The tagging conventions described under the YAML Conventions section
|
||||
will make your testing go smoother. All of the baselines that we publish
|
||||
for nosqlbench will use this form.
|
||||
|
||||
|
@ -1,30 +0,0 @@
|
||||
---
|
||||
title: Driver Types
|
||||
weight: 50
|
||||
---
|
||||
|
||||
# Driver Types
|
||||
|
||||
Each nosqlbench scenario is comprised of one or more activities of a specific type. The types of activities available
|
||||
are provided by the version of nosqlbench.
|
||||
|
||||
You can see this list at any time by running the command:
|
||||
|
||||
nb --list-drivers
|
||||
|
||||
Each one comes with its own built-in documentation. It can be accessed with this command:
|
||||
|
||||
nb help <driver>
|
||||
|
||||
This section contains the per-driver documentation that you get when you run the above command. These driver docs are
|
||||
auto-populated when NoSQLBench is built, so they are exactly the same as you will see with the above command, only
|
||||
rendered in HTML.
|
||||
|
||||
There may be additional documentation related to a given driver. To see the list of help topics, you
|
||||
can run this command:
|
||||
|
||||
nb help topics
|
||||
|
||||
The help for any topic can be read this way:
|
||||
|
||||
nb help <topic>
|
@ -1,196 +0,0 @@
|
||||
---
|
||||
title: 01 Commands
|
||||
weight: 2
|
||||
---
|
||||
|
||||
# Example Commands
|
||||
|
||||
Let's run a simple test against a cluster to establish some basic
|
||||
familiarity with the NoSQLBench.
|
||||
|
||||
## Create a Schema
|
||||
|
||||
We will start by creating a simple schema in the database. From your
|
||||
command line, go ahead and execute the following command, replacing
|
||||
the `host=<host-or-ip>` with that of one of your database nodes.
|
||||
|
||||
```text
|
||||
./nb run driver=cql workload=cql-keyvalue tags=block:"schema.*" host=<host-or-ip>
|
||||
```
|
||||
|
||||
This command is creating the following schema in your database:
|
||||
|
||||
```cql
|
||||
CREATE KEYSPACE baselines
|
||||
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}
|
||||
AND durable_writes = true;
|
||||
|
||||
CREATE TABLE baselines.keyvalue (
|
||||
key text PRIMARY KEY,
|
||||
value text
|
||||
)
|
||||
```
|
||||
|
||||
Let's break down each of those command line options.
|
||||
|
||||
`run` tells nosqlbench to run an activity.
|
||||
|
||||
`driver=...` is used to specify the activity type (driver). In this case
|
||||
we are using `cql`, which tells nosqlbench to use the DataStax Java Driver
|
||||
and execute CQL statements against a database.
|
||||
|
||||
`workload=...` is used to specify the workload definition file that
|
||||
defines the activity.
|
||||
|
||||
In this example, we use `cql-keyvalue` which is a pre-built workload that
|
||||
is packaged with nosqlbench.
|
||||
|
||||
`tags=block:"schema.*"` tells nosqlbench to run the yaml block that has
|
||||
the `block:"schema.*"` defined as one of its tags.
|
||||
|
||||
In this example, that is the DDL portion of the `cql-keyvalue`
|
||||
workload. `host=...` tells nosqlbench how to connect to your database,
|
||||
only one host is necessary.
|
||||
|
||||
If you like, you can verify the result of this command by decribing your
|
||||
keyspace in cqlsh or DataStax Studio with
|
||||
`DESCRIBE KEYSPACE baselines`.
|
||||
|
||||
## Load Some Data
|
||||
|
||||
Before running a test of typical access patterns where you want to capture
|
||||
the results, you need to make the test more interesting than loading an
|
||||
empty table. For this, we use the rampup phase.
|
||||
|
||||
Before sending our test writes to the database, we will use the `stdout`
|
||||
activity type so we can see what nosqlbench is generating for CQL
|
||||
statements.
|
||||
|
||||
Go ahead and execute the following command:
|
||||
|
||||
./nb run driver=stdout workload=cql-keyvalue tags=block:rampup cycles=10
|
||||
|
||||
You should see 10 of the following statements in your console
|
||||
|
||||
```cql
|
||||
insert into baselines.keyvalue (key, value) values (0,382062539);
|
||||
insert into baselines.keyvalue (key, value) values (1,774912474);
|
||||
insert into baselines.keyvalue (key, value) values (2,949364593);
|
||||
insert into baselines.keyvalue (key, value) values (3,352527683);
|
||||
insert into baselines.keyvalue (key, value) values (4,351686621);
|
||||
insert into baselines.keyvalue (key, value) values (5,114304900);
|
||||
insert into baselines.keyvalue (key, value) values (6,439790106);
|
||||
insert into baselines.keyvalue (key, value) values (7,564330072);
|
||||
insert into baselines.keyvalue (key, value) values (8,296173906);
|
||||
insert into baselines.keyvalue (key, value) values (9,97405552);
|
||||
```
|
||||
|
||||
NoSQLBench deterministically generates data, so the generated values will
|
||||
be the same from run to run.
|
||||
|
||||
Now we are ready to write some data to our database. Go ahead and execute
|
||||
the following from your command line:
|
||||
|
||||
./nb run driver=cql workload=cql-keyvalue tags=block:rampup host=<host-or-ip> cycles=100k --progress console:1s
|
||||
|
||||
Note the differences between this and the command that we used to generate
|
||||
the schema.
|
||||
|
||||
`tags=block:rampup` is running the yaml block in `cql-keyvalue` that has
|
||||
only INSERT statements.
|
||||
|
||||
`cycles=100k` will run a total of 100,000 operations, in this case,
|
||||
100,000 writes. You will want to pick an appropriately large number of
|
||||
cycles in actual testing to make your main test meaningful.
|
||||
|
||||
**NOTE:**
|
||||
The cycles parameter is not just a quantity. It is a range of values.
|
||||
The `cycles=n` format is short for
|
||||
`cycles=0..n`, which makes cycles a zero-based range. For example,
|
||||
cycles=5 means that the activity will use cycles 0,1,2,3,4, but not 5. The
|
||||
reason for this is explained in detail in the Activity Parameters section.
|
||||
|
||||
These parameters are explained in detail in the section on _Activity
|
||||
Parameters_.
|
||||
|
||||
`--progress console:1s` will print the progression of the run to the
|
||||
console every 1 second.
|
||||
|
||||
You should see output that looks like this
|
||||
|
||||
```text
|
||||
cql-keyvalue: 0.00%/Running (details: min=0 cycle=1 max=100000)
|
||||
cql-keyvalue: 0.00%/Running (details: min=0 cycle=1 max=100000)
|
||||
cql-keyvalue: 0.32%/Running (details: min=0 cycle=325 max=100000)
|
||||
cql-keyvalue: 1.17%/Running (details: min=0 cycle=1171 max=100000)
|
||||
cql-keyvalue: 2.36%/Running (details: min=0 cycle=2360 max=100000)
|
||||
cql-keyvalue: 3.65%/Running (details: min=0 cycle=3648 max=100000)
|
||||
cql-keyvalue: 4.61%/Running (details: min=0 cycle=4613 max=100000)
|
||||
cql-keyvalue: 5.59%/Running (details: min=0 cycle=5593 max=100000)
|
||||
cql-keyvalue: 7.14%/Running (details: min=0 cycle=7138 max=100000)
|
||||
cql-keyvalue: 8.87%/Running (details: min=0 cycle=8868 max=100000)
|
||||
...
|
||||
cql-keyvalue: 100.00%/Finished (details: min=0 cycle=100000 max=100000)
|
||||
```
|
||||
|
||||
## Run the main test phase
|
||||
|
||||
Now that we have a base dataset of 100k rows in the database, we will now
|
||||
run a mixed read / write workload, by default this runs a 50% read / 50%
|
||||
write workload.
|
||||
|
||||
./nb run driver=cql workload=cql-keyvalue tags=block:main host=<host-or-ip> cycles=100k cyclerate=5000 threads=50 --progress console:1s
|
||||
|
||||
You should see output that looks like this:
|
||||
|
||||
```text
|
||||
Logging to logs/scenario_20190812_154431_028.log
|
||||
cql-keyvalue: 0.50%/Running (details: min=0 cycle=500 max=100000)
|
||||
cql-keyvalue: 2.50%/Running (details: min=0 cycle=2500 max=100000)
|
||||
cql-keyvalue: 6.70%/Running (details: min=0 cycle=6700 max=100000)
|
||||
cql-keyvalue: 11.16%/Running (details: min=0 cycle=11160 max=100000)
|
||||
cql-keyvalue: 14.25%/Running (details: min=0 cycle=14250 max=100000)
|
||||
cql-keyvalue: 18.41%/Running (details: min=0 cycle=18440 max=100000)
|
||||
cql-keyvalue: 22.76%/Running (details: min=0 cycle=22760 max=100000)
|
||||
cql-keyvalue: 27.27%/Running (details: min=0 cycle=27300 max=100000)
|
||||
cql-keyvalue: 31.81%/Running (details: min=0 cycle=31810 max=100000)
|
||||
cql-keyvalue: 36.34%/Running (details: min=0 cycle=36340 max=100000)
|
||||
cql-keyvalue: 40.90%/Running (details: min=0 cycle=40900 max=100000)
|
||||
cql-keyvalue: 45.48%/Running (details: min=0 cycle=45480 max=100000)
|
||||
cql-keyvalue: 50.05%/Running (details: min=0 cycle=50050 max=100000)
|
||||
cql-keyvalue: 54.36%/Running (details: min=0 cycle=54360 max=100000)
|
||||
cql-keyvalue: 58.91%/Running (details: min=0 cycle=58920 max=100000)
|
||||
cql-keyvalue: 63.40%/Running (details: min=0 cycle=63400 max=100000)
|
||||
cql-keyvalue: 66.96%/Running (details: min=0 cycle=66970 max=100000)
|
||||
cql-keyvalue: 71.61%/Running (details: min=0 cycle=71610 max=100000)
|
||||
cql-keyvalue: 76.11%/Running (details: min=0 cycle=76130 max=100000)
|
||||
cql-keyvalue: 80.66%/Running (details: min=0 cycle=80660 max=100000)
|
||||
cql-keyvalue: 85.22%/Running (details: min=0 cycle=85220 max=100000)
|
||||
cql-keyvalue: 89.80%/Running (details: min=0 cycle=89800 max=100000)
|
||||
cql-keyvalue: 94.46%/Running (details: min=0 cycle=94460 max=100000)
|
||||
cql-keyvalue: 98.93%/Running (details: min=0 cycle=98930 max=100000)
|
||||
cql-keyvalue: 100.00%/Finished (details: min=0 cycle=100000 max=100000)
|
||||
```
|
||||
|
||||
We have a few new command line options here:
|
||||
|
||||
`tags=block:main` is using a new block in our activity's yaml that
|
||||
contains both read and write queries.
|
||||
|
||||
`threads=50` is an important one. The default for nosqlbench is to run
|
||||
with a single thread. This is not adequate for workloads that will be
|
||||
running many operations, so threads is used as a way to increase
|
||||
concurrency on the client side.
|
||||
|
||||
`cyclerate=5000` is used to control the operations per second that are
|
||||
initiated by nosqlbench. This command line option is the primary means to
|
||||
rate limit the workload and here we are running at 5000 ops/sec.
|
||||
|
||||
## Now What?
|
||||
|
||||
Note in the above output, we
|
||||
see `Logging to logs/scenario_20190812_154431_028.log`.
|
||||
|
||||
By default nosqlbench records the metrics from the run in this file, we
|
||||
will go into detail about these metrics in the next section Viewing
|
||||
Results.
|
@ -1,58 +0,0 @@
|
||||
---
|
||||
title: 02 Results
|
||||
weight: 3
|
||||
---
|
||||
|
||||
# Example Results
|
||||
|
||||
We just ran a very simple workload against our database. In that example,
|
||||
we saw that nosqlbench writes to a log file and it is in that log file
|
||||
where the most basic form of metrics are displayed.
|
||||
|
||||
## Log File Metrics
|
||||
|
||||
For our previous run, we saw that nosqlbench was writing
|
||||
to `logs/scenario_20190812_154431_028.log`
|
||||
|
||||
Even when you don't configure nosqlbench to write its metrics to another
|
||||
location, it will periodically report all the metrics to the log file. At
|
||||
the end of a scenario, before nosqlbench shuts down, it will flush the
|
||||
partial reporting interval again to the logs. This means you can always
|
||||
look in the logs for metrics information.
|
||||
|
||||
**WARNING:**
|
||||
If you look in the logs for metrics, be aware that the last report will
|
||||
only contain a partial interval of results. When looking at the last
|
||||
partial window, only metrics which average over time or which compute the
|
||||
mean for the whole test will be meaningful.
|
||||
|
||||
Below is a sample of the log that gives us our basic metrics. There is a
|
||||
lot to digest here, for now we will only focus a subset of the most
|
||||
important metrics.
|
||||
|
||||
```text
|
||||
2019-08-12 15:46:00,274 INFO [main] i.e.c.ScenarioResult [ScenarioResult.java:48] -- BEGIN METRICS DETAIL --
|
||||
2019-08-12 15:46:00,294 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.config.burstrate, value=5500.0
|
||||
2019-08-12 15:46:00,295 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.config.cyclerate, value=5000.0
|
||||
2019-08-12 15:46:00,295 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.waittime, value=3898782735
|
||||
2019-08-12 15:46:00,298 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM, name=cql-keyvalue.resultset-size, count=100000, min=0, max=1, mean=8.0E-5, stddev=0.008943914131967056, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0
|
||||
2019-08-12 15:46:01,703 INFO [main] i.e.c.ScenarioResult [ScenarioResult.java:56] -- END METRICS DETAIL --
|
||||
```
|
||||
|
||||
The log contains lots of information on metrics, but this is obviously _
|
||||
not_ the most desirable way to consume metrics from nosqlbench.
|
||||
|
||||
We recommend that you use one of these methods, according to your
|
||||
environment or tooling available:
|
||||
|
||||
1. `--docker-metrics` with a local docker-based grafana dashboard (See the
|
||||
section on Docker Based Metrics)
|
||||
2. Send your metrics to a dedicated graphite server
|
||||
with `--report-graphite-to graphitehost`
|
||||
3. Record your metrics to local CSV files
|
||||
with `--report-csv-to my_metrics_dir`
|
||||
4. Record your metrics to HDR logs
|
||||
with `--log-histograms my_hdr_metrics.log`
|
||||
|
||||
See the command line reference for details on how to route your metrics to
|
||||
a metrics collector or format of your preference.
|
@ -1,87 +0,0 @@
|
||||
---
|
||||
title: 03 Metrics
|
||||
weight: 4
|
||||
---
|
||||
|
||||
# Example Metrics
|
||||
|
||||
A set of core metrics are provided for every workload that runs with nosqlbench, regardless of the activity type and
|
||||
protocol used. This section explains each of these metrics and shows an example of them from the log file.
|
||||
|
||||
## metric: result
|
||||
|
||||
This is the primary metric that should be used to get a quick idea of the throughput and latency for a given run. It
|
||||
encapsulates the entire operation life cycle ( ie. bind, execute, get result back ).
|
||||
|
||||
For this example we see that we averaged 3732 operations / second with 3.6ms 75th percentile latency and 23.9ms 99th
|
||||
percentile latency. Note the raw metrics are in microseconds. This duration_unit may change depending on how a user
|
||||
configures nosqlbench, so always double-check it.
|
||||
|
||||
```text
|
||||
2019-08-12 15:46:01,310 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=TIMER,
|
||||
name=cql-keyvalue.result, count=100000, min=233.48, max=358596.607, mean=3732.00338612, stddev=10254.850416061185,
|
||||
median=1874.815, p75=3648.767, p95=10115.071, p98=15855.615, p99=23916.543, p999=111292.415,
|
||||
mean_rate=4024.0234405430424, m1=3514.053841156124, m5=3307.431472596865, m15=3268.6786509004132,
|
||||
rate_unit=events/second, duration_unit=microseconds
|
||||
```
|
||||
|
||||
## metric: result-success
|
||||
|
||||
This metric shows whether there were any errors during the run. You can confirm that the count is equal to the number of
|
||||
cycles for the run if you are expecting or requiring zero failed operations.
|
||||
|
||||
Here we see that all 100k of our cycles succeeded. Note that the metrics for throughput and latency here are slightly
|
||||
different than the `results` metric simply because this is a separate timer that only includes operations which
|
||||
completed with no exceptions.
|
||||
|
||||
```text
|
||||
2019-08-12 15:46:01,452 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=TIMER,
|
||||
name=cql-keyvalue.result-success, count=100000, min=435.168, max=358645.759, mean=3752.40990808,
|
||||
stddev=10251.524945886964, median=1889.791, p75=3668.479, p95=10154.495, p98=15884.287, p99=24280.063,
|
||||
p999=111443.967, mean_rate=4003.3090048756894, m1=3523.40328629036, m5=3318.8463896065778, m15=3280.480326762243,
|
||||
rate_unit=events/second, duration_unit=microseconds
|
||||
```
|
||||
|
||||
## metric: resultset-size
|
||||
|
||||
For read workloads, this metric shows the size of result sent back to nosqlbench from the server. This is useful to
|
||||
confirm that you are reading rows that already exist in the database.
|
||||
|
||||
```text
|
||||
2019-08-12 15:46:00,298 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM,
|
||||
name=cql-keyvalue.resultset-size, count=100000, min=0, max=1, mean=8.0E-5, stddev=0.008943914131967056,
|
||||
median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0
|
||||
```
|
||||
|
||||
#### metric: tries
|
||||
|
||||
NoSQLBench will retry failures 10 times by default, this is configurable via the `maxtries` command line option for the
|
||||
cql activity type. This metric shows a histogram of the number of tries that each operation required, in this example,
|
||||
there were no retries as the `count` is 100k.
|
||||
|
||||
```text
|
||||
2019-08-12 15:46:00,341 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM,
|
||||
name=cql-keyvalue.tries, count=100000, min=1, max=1, mean=1.0, stddev=0.0, median=1.0,
|
||||
p75=1.0, p95=1.0, p98=1.0, p99=1.0, p999=1.0
|
||||
```
|
||||
|
||||
### More Metrics
|
||||
|
||||
nosqlbench extends many ways to report the metrics from a run, including:
|
||||
|
||||
- Built-in Docker Dashboard
|
||||
- Reporting to CSV
|
||||
- Reporting to Graphite
|
||||
- Reporting to HDR
|
||||
|
||||
To get more information on these options, see the output of
|
||||
|
||||
./nb --help
|
||||
|
||||
### Congratulations
|
||||
|
||||
You have completed your first run with nosqlbench!
|
||||
|
||||
In the 'Next Steps' section, you'll find options for how to continue, whether you are looking for basic testing or
|
||||
something more advanced.
|
||||
|
@ -1,42 +0,0 @@
|
||||
---
|
||||
title: Next Steps
|
||||
weight: 5
|
||||
---
|
||||
|
||||
# Next Steps
|
||||
|
||||
Now that you've run nosqlbench for the first time and seen what it does, you can choose what level of customization you
|
||||
want for further testing.
|
||||
|
||||
The sections below describe key areas that users typically customize when working with nosqlbench.
|
||||
|
||||
Everyone who uses nosqlbench will want to get familiar with the 'NoSQLBench Basics' section below. This is essential
|
||||
reading for new and experienced testers alike.
|
||||
|
||||
## High-Level Users
|
||||
|
||||
Several canonical workloads are already baked-in to nosqlbench for immediate use. If you are simply wanting to drive
|
||||
workloads from nosqlbench without building a custom workload, then you'll want to learn about the available workloads
|
||||
and their options.
|
||||
|
||||
Recommended reading for high-level testing workflow:
|
||||
1. 'Built-In Workloads'
|
||||
2. 'nosqlbench Basics'
|
||||
|
||||
## Workload Builders
|
||||
|
||||
If you want to use nosqlbench to build a tailored workload that closely emulates what a specific application would do,
|
||||
then you can build a YAML file that specifies all of the details of an iterative workload. You can specify the access
|
||||
patterns, data distributions, and more.
|
||||
|
||||
The recommended reading for this is:
|
||||
|
||||
1. 'NoSQLBench Basics'
|
||||
2. All of the 'Designing Workloads' section.
|
||||
3. The online examples (find the links in the Designing Workloads section.)
|
||||
|
||||
## Scenario Developers
|
||||
|
||||
For advanced scenario designs, iterative testing models, or analysis methods, you can use
|
||||
ECMAScript to control the scenario from start to finish. This is an advanced feature that is not recommended for
|
||||
first-time users. A guide for scenario developers will be released in increments.
|
@ -1,88 +0,0 @@
|
||||
---
|
||||
title: Quick Start Example
|
||||
weight: 20
|
||||
---
|
||||
|
||||
# Quick Start Example
|
||||
|
||||
## Getting NoSQLBench
|
||||
|
||||
NoSQLBench is packaged directly as a Linux binary named `nb` and as an executable Java jar named `nb.jar`.
|
||||
|
||||
## Downloading
|
||||
|
||||
The Linux binary is recommended, since it comes with its own JVM and eliminates the need to manage Java downloads. Both
|
||||
can be obtained at the releases section of the main NoSQLBench project:
|
||||
|
||||
- [NoSQLBench Releases](https://github.com/nosqlbench/nosqlbench/releases)
|
||||
|
||||
**NOTE:**
|
||||
Once you download the binary, you may need to `chmod +x nb` to make it
|
||||
executable. In order to run AppImage binaries, like nb, you need to have
|
||||
fuse support on your system. This is already provided on most
|
||||
distributions. If after downloading and executing nb, you get an error,
|
||||
please consult the
|
||||
[AppImage troubleshooting page](https://docs.appimage.org/user-guide/run-appimages.html#troubleshooting)
|
||||
.
|
||||
|
||||
This documentation assumes you are using the Linux binary initiating
|
||||
NoSqlBench commands with `./nb`. If you are using the jar, just
|
||||
replace `./nb` with `java -jar nb.jar` when running commands. If you are
|
||||
using the jar version, Java 15 is recommended, and will be required soon.
|
||||
|
||||
## Run a cluster
|
||||
|
||||
This section requires you to have a CQL system to connect to.
|
||||
If you don’t already have one, you can start an instance of DSE with this one-liner:
|
||||
|
||||
docker run -e DS_LICENSE=accept --name my-dse -p 9042:9042 -d datastax/dse-server:6.7.7
|
||||
|
||||
or consult the instructions at the
|
||||
[Apache Cassandra docker hub landing page](https://hub.docker.com/_/cassandra).
|
||||
|
||||
## Running
|
||||
|
||||
To run a simple built-in workload run:
|
||||
|
||||
./nb cql-iot
|
||||
|
||||
To get a list of built-in scenarios run:
|
||||
|
||||
# Get a list of all named scenarios and parameters
|
||||
./nb --list-scenarios
|
||||
|
||||
If you want a simple list of yamls which contain named scenarios, run:
|
||||
|
||||
# Get a simple list of yamls containing named scenarios
|
||||
./nb --list-workloads
|
||||
|
||||
**NOTE:**
|
||||
Note: These commands will include workloads that were shipped with nb and
|
||||
workloads in your local directory. To learn more about how to design
|
||||
custom workloads see
|
||||
[designing workloads](/index.html#/docs/designing_workloads.html)
|
||||
|
||||
To provide your own contact points (comma separated), add the `hosts=`
|
||||
parameter
|
||||
|
||||
./nb cql-iot hosts=host1,host2
|
||||
|
||||
Additionally, if you have docker installed on your local system, and your user has permissions to use it, you can use
|
||||
`--docker-metrics` to stand up a live metrics dashboard at port 3000.
|
||||
|
||||
./nb cql-iot --docker-metrics
|
||||
|
||||
This example doesn't go into much detail about what it is doing. It is here to show you how quickly you can start
|
||||
running real workloads without having to learn much about the machinery that makes it happen.
|
||||
|
||||
The rest of this section has a more elaborate example that exposes some of
|
||||
the basic options you may want to adjust for your first serious test.
|
||||
|
||||
**NOTE:**
|
||||
If you want to see system-level metrics from your cluster, it is possible
|
||||
to get these as well as Apache Cassandra level metrics by using the DSE
|
||||
Metrics Collector (if using DSE), or by setting up a metrics feed to the
|
||||
Prometheus instance in your local docker stack. You can find the DSE
|
||||
Metrics Collector docs
|
||||
[here](https://docs.datastax.com/en/monitoring/doc/monitoring/metricsCollector/mcExportMetricsDocker.html)
|
||||
.
|
@ -1,65 +0,0 @@
|
||||
---
|
||||
title: Getting Support
|
||||
weight: 10
|
||||
---
|
||||
|
||||
# Getting Support
|
||||
|
||||
In general, our goals with NoSQLBench are to make the help systems and
|
||||
examples wrap around the users like a suit of armor, so that they feel
|
||||
capable of doing most things without having to ask for help. Please keep
|
||||
this in mind when looking for personal support form our community, and
|
||||
help us find those places where the docs are lacking. Maybe you can help
|
||||
us by adding some missing docs!
|
||||
|
||||
## Built-In Docs
|
||||
|
||||
The documentation for NoSQLBench is quite thorough. On the command line,
|
||||
you can see a list of built-in docs with the command:
|
||||
|
||||
nb help topics
|
||||
|
||||
To read any topic, simply use the command:
|
||||
|
||||
nb help <topic>
|
||||
|
||||
The documentation system you are looking at right now includes the same
|
||||
docs you can find above already and more. They are automatically included
|
||||
when NoSQLBench is built.
|
||||
|
||||
## NoSQLBench Discord Server
|
||||
|
||||
We have a discord server. This is where users and developers can discuss
|
||||
anything about NoSQLBench and support each other.
|
||||
Please [join us](https://discord.gg/dBHRakusMN) there if you are a new
|
||||
user of NoSQLBench!
|
||||
|
||||
## General Feedback
|
||||
|
||||
These guidelines are mirrored at the
|
||||
[Submitting Feedback](https://github.com/nosqlbench/nosqlbench/wiki/Submitting-Feedback)
|
||||
wiki page at the nosqlbench project site, which is also where
|
||||
any `[Submit Feedback]` links should will take you.
|
||||
|
||||
## Bug Fixes
|
||||
|
||||
If you think you have found a bug, please
|
||||
[file a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug)
|
||||
. nosqlbench is actively used within DataStax, and verified bugs will get
|
||||
attention as resources permit. Bugs reports which are more detailed, or
|
||||
bug reports which include steps to reproduce will get attention first.
|
||||
|
||||
## Feature Requests
|
||||
|
||||
If you would like to see something in nosqlbench that is not there
|
||||
yet,please
|
||||
[submit a feature request](https://github.com/nosqlbench/nosqlbench/issues/new?labels=feature)
|
||||
.
|
||||
|
||||
## Documentation Requests
|
||||
|
||||
If you would like to see a specific nosqlbench or testing topic added to
|
||||
the guidebook, please
|
||||
[request docs content](https://github.com/nosqlbench/nosqlbench/issues/new?labels=docs)
|
||||
.
|
||||
|
@ -1,63 +0,0 @@
|
||||
---
|
||||
title: NoSQLBench Intro
|
||||
weight: 0
|
||||
---
|
||||
|
||||
# Welcome to NoSQLBench
|
||||
|
||||
Welcome to the documentation for NoSQLBench. This is a power tool that emulates real application workloads. This means
|
||||
that you can fast-track performance, sizing and data model testing without writing your own testing harness.
|
||||
|
||||
To get started right away, jump to the
|
||||
[Quick Start Example](/index.html#/docs/getting_started) from the menu on the left.
|
||||
|
||||
To see the ways you can get NoSQLBench, check out the project site
|
||||
[DOWNLOADS.md](https://github.com/nosqlbench/nosqlbench/blob/master/DOWNLOADS.md).
|
||||
|
||||
## What is NoSQLBench?
|
||||
|
||||
NoSQLBench is a serious performance testing tool for the NoSQL ecosystem.
|
||||
|
||||
**NoSQLBench brings advanced testing capabilities into one tool that are not found in other testing tools.**
|
||||
|
||||
- You can run common testing workloads directly from the command line. You can start doing this within 5 minutes of
|
||||
reading this.
|
||||
- You can generate virtual data sets of arbitrary size, with deterministic data and statistically shaped values.
|
||||
- You can design custom workloads that emulate your application, contained in a single file, based on statement
|
||||
templates - no IDE or coding required.
|
||||
- You can immediately plot your results in a docker and grafana stack on Linux with a single command line option.
|
||||
- When needed, you can open the access panels and rewire the runtime behavior of NoSQLBench to do advanced testing,
|
||||
including a full scripting environment with Javascript.
|
||||
|
||||
The core machinery of NoSQLBench has been built with attention to detail. It has been battle tested within DataStax as a
|
||||
way to help users validate their data models, baseline system performance, and qualify system designs for scale.
|
||||
|
||||
In short, NoSQLBench wishes to be a programmable power tool for performance testing. However, it is somewhat generic. It
|
||||
doesn't know directly about a particular type of system, or protocol. It simply provides a suitable machine harness in
|
||||
which to put your drivers and testing logic. If you know how to build a client for a particular kind of system, EB will
|
||||
let you load it like a plugin and control it dynamically.
|
||||
|
||||
Initially, NoSQLBench comes with support for CQL, but we would like to see this expanded with contributions from others.
|
||||
|
||||
## Origins
|
||||
|
||||
The code in this project comes from multiple sources. The procedural data generation capability was known before as
|
||||
'Virtual Data Set'. The core runtime and scripting harness was from the 'EngineBlock' project. The CQL support was
|
||||
previously used within DataStax. In March of 2020, DataStax and the project maintainers for these projects decided to
|
||||
put everything into one OSS project in order to make contributions and sharing easier for everyone. Thus, the new
|
||||
project name and structure was launched as nosqlbench.io. NoSQLBench is an independent project that is primarily
|
||||
sponsored by DataStax.
|
||||
|
||||
We offer NoSQLBench as a new way of thinking about testing systems. It is not limited to testing only one type of
|
||||
system. It is our wish to build a community of users and practice around this project so that everyone in the NoSQL
|
||||
ecosystem can benefit from common concepts and understanding and reliable patterns of use.
|
||||
|
||||
## Scalable User Experience
|
||||
|
||||
NoSQLBench endeavors to be valuable to all users. We do this by making it easy for you, our user, to do just what you
|
||||
need without worrying about the rest. If you need to do something simple, it should be simple to find the right settings
|
||||
and just do it. If you need something more sophisticated, then you should be able to find what you need with a
|
||||
reasonable amount of effort and no surprises.
|
||||
|
||||
That is the core design principle behind NoSQLBench. We hope you like it.
|
||||
|
@ -1,403 +0,0 @@
|
||||
---
|
||||
title: Activity Parameters
|
||||
weight: 05
|
||||
---
|
||||
|
||||
# Activity Parameters
|
||||
|
||||
Activity parameters are passed as named arguments for an activity, either
|
||||
on the command line or via a scenario script. On the command line, these
|
||||
take the form of
|
||||
|
||||
<paramname>=<paramvalue>
|
||||
|
||||
Some activity parameters are universal in that they can be used with any
|
||||
driver type. These parameters are recognized by nosqlbench whether or not
|
||||
they are recognized by a particular driver implementation. These are
|
||||
called _core parameters_. Only core activity parameters are documented
|
||||
here.
|
||||
|
||||
**NOTE:**
|
||||
To see what activity parameters are valid for a given activity type, see
|
||||
the documentation for that activity type with
|
||||
`nb help <activity type>`.
|
||||
|
||||
When starting out, you want to familiarize yourself with these parameters.
|
||||
The most important ones to learn about first are driver, cycles and
|
||||
threads.
|
||||
|
||||
## driver
|
||||
|
||||
For historic reasons, you can also use `type`. They both mean the same
|
||||
thing for now, but `driver` is more descriptive. The `type` parameter will
|
||||
continue to be supported in this major version (3.x), but it will be an
|
||||
error to use it in 4.x and newer.
|
||||
|
||||
- `driver=<activity type>`
|
||||
- _default_: inferred from `alias` or `yaml` parameters, or unset
|
||||
- _required_: yes, unless inferred
|
||||
- _dynamic_: no
|
||||
|
||||
Every activity is powered by a named ActivityType. Thus, you must set
|
||||
the `type` parameter. If you do not specify this parameter, it will be
|
||||
inferred from a substring match against the alias and/or yaml parameters.
|
||||
If there is more than one valid match for a valid type value, then you
|
||||
must set the type parameter directly.
|
||||
|
||||
Telling nosqlbench what type of an activity will be run also determines
|
||||
what other parameters are considered valid and how they will be used. So
|
||||
in this way, the type parameter is actually the base parameter for any
|
||||
activity. When used with scenario commands like `run` or `start`, an
|
||||
activity of the named type will be initialized, and then further activity
|
||||
parameters on the command line will be used to configure it before it is
|
||||
started.
|
||||
|
||||
## alias
|
||||
|
||||
- `alias=<alias>`
|
||||
- _default_: inferred from yaml, or 'UNSET'
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
You *should* set the _alias_ parameter when you have multiple activities,
|
||||
when you want to name metrics per-activity, or when you want to control
|
||||
activities via scripting.
|
||||
|
||||
Each activity can be given a symbolic name known as an _alias_. It is good
|
||||
practice to give all your activities an alias, since this determines the
|
||||
named used in logging, metrics, and even scripting control.
|
||||
|
||||
_default value_ : The name of any provided YAML filename is used as the
|
||||
basis for the default alias. Otherwise, the activity type name is used.
|
||||
This is a convenience for simple test scenarios only.
|
||||
|
||||
## threads
|
||||
|
||||
- `threads=<threads>`
|
||||
- _default_: 1
|
||||
- _required_: no
|
||||
- _dynamic_: yes
|
||||
|
||||
You *should* set the _threads_ parameter when you need to ramp up a
|
||||
workload.
|
||||
|
||||
Each activity can be created with a number of threads. It is important to
|
||||
adjust this setting to the system types used by nosqlbench.
|
||||
|
||||
_default value_ : For now, the default is simply *1*. Users must be aware
|
||||
of this setting and adjust it to a reasonable value for their workloads.
|
||||
|
||||
`threads=auto` : When you set `threads=auto`, it will set the number of
|
||||
threads to 10x the number of cores in your system. There is no distinction
|
||||
here between full cores and hardware threads. This is generally a
|
||||
reasonable number of threads to tap into the procesing power of a client
|
||||
system.
|
||||
|
||||
`threads=__x` : When you set `threads=5x` or `threads=10x`, you will set
|
||||
the number of threads to some multiplier of the logical CPUs in the local
|
||||
system.
|
||||
|
||||
**NOTE:**
|
||||
The threads parameter will work slightly differently for activities using
|
||||
the async parameter. For example, when `async=500` is provided, then the
|
||||
number of async operations is split between all configured threads, and
|
||||
each thread will juggle a number of in-flight operations asynchronously.
|
||||
Without the async parameter, threads determines the logical concurrency
|
||||
level of nosqlbench in the classic 'request-per-thread' mode. Neither mode
|
||||
is strictly correct, and both modes can be used for more accurate testing
|
||||
depending on the constraints of your environment.
|
||||
|
||||
A good rule of thumb for setting threads for maximum effect is to set it
|
||||
relatively high, such as 10XvCPU when running synchronous workloads
|
||||
(when not providing the async parameter), and to 5XvCPU for all async
|
||||
workloads. Variation in system dynamics make it difficult to peg an ideal
|
||||
number, so experimentation is encouraged while you dial in your settings
|
||||
initially.
|
||||
|
||||
## cycles
|
||||
|
||||
- `cycles=<cycle count>`
|
||||
- `cycles=<cycle min>..<cycle max>`
|
||||
- _default_: same as `stride`
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
The cycles parameter determines the starting and ending point for an
|
||||
activity. It determines the range of values which will act as seed values
|
||||
for each operation. For each cycle of the test, a statement is built from
|
||||
a statement template and executed as an operation.
|
||||
|
||||
If you do not set the cycles parameter, then it will automatically be set
|
||||
to the size of the sequence. The sequence is simply the length of the op
|
||||
sequence that is constructed from the active statements and ratios in your
|
||||
activity YAML.
|
||||
|
||||
You *should* set the cycles for every activity except for schema-like
|
||||
activities, or activities which you run just as a sanity check of active
|
||||
statements.
|
||||
|
||||
In the `cycles=<cycle count>` version, the count indicates the total
|
||||
number of cycles, and is equivalent to `cycles=0..<cycle max>`. In both
|
||||
cases, the max value is not the actual number of the last cycle. This is
|
||||
because all cycle parameters define a closed-open interval. In other
|
||||
words, the minimum value is either zero by default or the specified
|
||||
minimum value, but the maximum value is the first value *not* included in
|
||||
the interval. This means that you can easily stack intervals over
|
||||
subsequent runs while knowing that you will cover all logical cycles
|
||||
without gaps or duplicates. For example, given `cycles=1000` and then
|
||||
`cycles=1000..2000`, and then `cycles=2000..5K`, you know that all cycles
|
||||
between 0 (inclusive) and 5000 (exclusive) have been specified.
|
||||
|
||||
## stride
|
||||
|
||||
- `stride=<stride>`
|
||||
- _default_: same as op sequence length
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
Usually, you don't want to provide a setting for stride, but it is still
|
||||
important to understand what it does. Within nosqlbench, each time a
|
||||
thread needs to allocate a set of cycles to operate on, it takes a
|
||||
contiguous range of values from a shared atomic value. Thus, the stride is
|
||||
the unit of micro-batching within nosqlbench. It also means that you can
|
||||
use stride to optimize a workload by setting the value higher than the
|
||||
default. For example if you are running a single-statement workload at a
|
||||
very high rate, it doesn't make sense for threads to allocate one op at a
|
||||
time from a shared atomic value. You can simply set
|
||||
`stride=1000` to cause (ballpark estimation) about 1000X less internal
|
||||
contention.
|
||||
|
||||
The stride is initialized to the calculated sequence length. The sequence
|
||||
length is simply the number of operations in the op sequence that is
|
||||
planned from your active statements and their ratios.
|
||||
|
||||
You usually do not want to set the stride directly. If you do, make sure
|
||||
it is a multiple of what it would normally be set to if you need to ensure
|
||||
that sequences are not divided up differently. This can be important when
|
||||
simulating the access patterns of applications.
|
||||
|
||||
**NOTE:**
|
||||
When simulating multi-op access patterns in non-async mode, the stride
|
||||
metric can tell you how long it took for a whole group of operations to
|
||||
complete.
|
||||
|
||||
## async
|
||||
|
||||
- `async=<ops>`
|
||||
- _default_: unset
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
The `async=<ops>` parameter puts an activity into an asynchronous dispatch
|
||||
mode and configures each thread to juggle a proportion of the operations
|
||||
specified. If you specify `async=500 threads=10`, then each of 10 threads
|
||||
will manage execution of 50 operations at a time. With async mode, a
|
||||
thread will always prepare and send operations if there are fewer in
|
||||
flight than it is allotted before servicing any pending responses.
|
||||
|
||||
Async mode also puts threads into a different sequencing behavior. When in
|
||||
async mode, responses from an operation may arrive in a different order
|
||||
than they are sent, and thus linearized operations can't be guaranteed as
|
||||
with the non-async mode. This means that sometimes you use want to avoid
|
||||
async mode when you are intentionally simulating access patterns with
|
||||
multiple linearized operations per user as you may see in your
|
||||
application.
|
||||
|
||||
The absence of the async parameter leaves the activity in the default
|
||||
non-async mode, where each thread works through a sequence of ops one
|
||||
operation at a time.
|
||||
|
||||
## cyclerate
|
||||
|
||||
- `cyclerate=<cycle_per_second>`
|
||||
- `cyclerate=<cycles_per_second>,<burst_ratio>`
|
||||
- _default_: unset
|
||||
- _required_: no
|
||||
- _dynamic_: yes
|
||||
|
||||
The cyclerate parameter sets a maximum op rate for individual cycles
|
||||
within the activity, across the whole activity, irrespective of how many
|
||||
threads are active.
|
||||
|
||||
**NOTE:**
|
||||
The cyclerate is a rate limiter, and can thus only throttle an activity to
|
||||
be slower than it would otherwise run. Rate limiting is also an invasive
|
||||
element in a workload, and will always come at a cost. For extremely high
|
||||
throughput testing, consider carefully whether your testing would benefit
|
||||
more from concurrency-based throttling as with async or the striderate
|
||||
described below.
|
||||
|
||||
When the cyclerate parameter is provided, two additional metrics are
|
||||
tracked: the wait time and the response time. See the 'Reference|Timing
|
||||
Terms' section for more details on these metrics.
|
||||
|
||||
_default_: None. When the cyclerate parameter is not provided, an activity
|
||||
runs as fast as it can given how fast operations can complete.
|
||||
|
||||
Examples:
|
||||
|
||||
- `cyclerate=1000` - set the cycle rate limiter to 1000 ops/s and a
|
||||
default burst ratio of 1.1.
|
||||
- `cyclerate=1000,1.0` - same as above, but with burstrate set to 1.0
|
||||
(use it or lose it, not usually desired)
|
||||
- `cyclerate=1000,1.5` - same as above, with burst rate set to 1.5 (aka
|
||||
50% burst allowed)
|
||||
|
||||
Synonyms:
|
||||
|
||||
- `rate`
|
||||
- `targetrate`
|
||||
|
||||
### burst ratio
|
||||
|
||||
This is only an optional part of the cyclerate as shown in examples above.
|
||||
If you do not specify it when you initialize a cyclerate, then it defaults
|
||||
1.1. The burst ratio is only valid as part of a rate limit and can not be
|
||||
specified by itself.
|
||||
|
||||
* _default_: `1.1`
|
||||
* _dynamic_: yes
|
||||
|
||||
The nosqlbench rate limiter provides a sliding scale between strict rate
|
||||
limiting and average rate limiting. The difference between them is
|
||||
controlled by a _burst ratio_ parameter. When the burst ratio is 1.0
|
||||
(burst up to 100% relative rate), the rate limiter acts as a strict rate
|
||||
limiter, disallowing faster operations from using time that was previously
|
||||
forfeited by prior slower operations. This is a "use it or lose it" mode
|
||||
that means things like GC events can steal throughput from a running
|
||||
client as a necessary effect of losing time in a strict timing sense.
|
||||
|
||||
When the burst ratio is set to higher than 1.0, faster operations may
|
||||
recover lost time from previously slower operations. For example, a burst
|
||||
ratio of 1.3 means that the rate limiter will allow bursting up to 130% of
|
||||
the base rate, but only until the average rate is back to 100% relative
|
||||
speed. This means that any valleys created in the actual op rate of the
|
||||
client can be converted into plateaus of throughput above the strict rate,
|
||||
but only at a speed that fits within (op rate * burst ratio). This allows
|
||||
for workloads to approximate the average target rate over time, with
|
||||
controllable bursting rates. This ability allows for near-strict behavior
|
||||
while allowing clients to still track truer to rate limit expectations, so
|
||||
long as the overall workload is not saturating resources.
|
||||
|
||||
**NOTE:**
|
||||
The default burst ratio of 1.1 makes testing results slightly more stable
|
||||
on average, but can also hide some short-term slow-downs in system
|
||||
throughput. It is set at the default to fit most tester's expectations for
|
||||
averaging results, but it may not be strict enough for your testing
|
||||
purposes. However, a strict setting of 1.0 nearly always adds cold/startup
|
||||
time to the result, so if you are testing for steady state, be sure to
|
||||
account for this across test runs.
|
||||
|
||||
## striderate
|
||||
|
||||
- `striderate=<strides per second>`
|
||||
- `striderate=<strides per second>,<burst_ratio>`
|
||||
- _default_: unset
|
||||
- _required_: no
|
||||
- _dynamic_: yes
|
||||
|
||||
The `striderate` parameter allows you to limit the start of a stride
|
||||
according to some rate. This works almost exactly like the cyclerate
|
||||
parameter, except that it blocks a whole group of operations from starting
|
||||
instead of a single operation. The striderate can use a burst ratio just
|
||||
as the cyclerate.
|
||||
|
||||
This sets the target rate for strides. In nosqlbench, a stride is a group
|
||||
of operations that are dispatched and executed together within the same
|
||||
thread. This is useful, for example, to emulate application behaviors in
|
||||
which some outside request translates to multiple internal requests. It is
|
||||
also a way to optimize a client runtime for more efficiency and
|
||||
throughput. The stride rate limiter applies to the whole activity
|
||||
irrespective of how many threads it has.
|
||||
|
||||
**WARNING:**
|
||||
When using the cyclerate an striderate options together, operations are
|
||||
delayed based on both rate limiters. If the relative rates are not
|
||||
synchronised with the side of a stride, then one rate limiter will
|
||||
artificially throttle the other. Thus, it usually doesn't make sense to
|
||||
use both of these settings in the same activity.
|
||||
|
||||
## seq
|
||||
|
||||
- `seq=<bucket|concat|interval>`
|
||||
- _default_: `seq=bucket`
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
The `seq=<bucket|concat|interval>` parameter determines the type of
|
||||
sequencing that will be used to plan the op sequence. The op sequence is a
|
||||
look-up-table that is used for each stride to pick statement forms
|
||||
according to the cycle offset. It is simply the sequence of statements
|
||||
from your YAML that will be executed, but in a pre-planned, and highly
|
||||
efficient form.
|
||||
|
||||
An op sequence is planned for every activity. With the default ratio on
|
||||
every statement as 1, and the default bucket scheme, the basic result is
|
||||
that each active statement will occur once in the order specified. Once
|
||||
you start adding ratios to statements, the most obvious thing that you
|
||||
might expect wil happen: those statements will occur multiple times to
|
||||
meet their ratio in the op mix. You can customize the op mix further by
|
||||
changing the seq parameter to concat or interval.
|
||||
|
||||
**NOTE:**
|
||||
The op sequence is a look up table of statement templates, *not*
|
||||
individual statements or operations. Thus, the cycle still determines the
|
||||
uniqueness of an operation as you would expect. For example, if statement
|
||||
form ABC occurs 3x per sequence because you set its ratio to 3, then each
|
||||
of these would manifest as a distinct operation with fields determined by
|
||||
distinct cycle values.
|
||||
|
||||
There are three schemes to pick from:
|
||||
|
||||
### bucket
|
||||
|
||||
This is a round robin planner which draws operations from buckets in
|
||||
circular fashion, removing each bucket as it is exhausted. For example,
|
||||
the ratios A:4, B:2, C:1 would yield the sequence A B C A B A A. The
|
||||
ratios A:1, B5 would yield the sequence A B B B B B.
|
||||
|
||||
### concat
|
||||
|
||||
This simply takes each statement template as it occurs in order and
|
||||
duplicates it in place to achieve the ratio. The ratios above (A:4, B:2,
|
||||
C:1) would yield the sequence A A A A B B C for the concat sequencer.
|
||||
|
||||
### interval
|
||||
|
||||
This is arguably the most complex sequencer. It takes each ratio as a
|
||||
frequency over a unit interval of time, and apportions the associated
|
||||
operation to occur evenly over that time. When two operations would be
|
||||
assigned the same time, then the order of appearance establishes
|
||||
precedence. In other words, statements appearing first win ties for the
|
||||
same time slot. The ratios A:4 B:2 C:1 would yield the sequence A B C A A
|
||||
B A. This occurs because, over the unit interval (0.0,1.0), A is assigned
|
||||
the positions `A: 0.0, 0.25, 0.5, 0.75`, B is assigned the
|
||||
positions `B: 0.0, 0.5`, and C is assigned position `C: 0.0`. These
|
||||
offsets are all sorted with a position-stable sort, and then the
|
||||
associated ops are taken as the order.
|
||||
|
||||
In detail, the rendering appears
|
||||
as `0.0(A), 0.0(B), 0.0(C), 0.25(A), 0.5(A), 0.5(B), 0.75(A)`, which
|
||||
yields `A B C A A B A` as the op sequence.
|
||||
|
||||
This sequencer is most useful when you want a stable ordering of operation
|
||||
from a rich mix of statement types, where each operations is spaced as
|
||||
evenly as possible over time, and where it is not important to control the
|
||||
cycle-by-cycle sequencing of statements.
|
||||
|
||||
## hdr_digits
|
||||
|
||||
- `hdr_digits=3`
|
||||
- _default_: `4`
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
This parameter determines the number of significant digits used in all HDR
|
||||
histograms for metrics collected from this activity. The default of 4
|
||||
allows 4 significant digits, which means *up to* 10000 distinct histogram
|
||||
buckets per named metric, per histogram interval. This does not mean that
|
||||
there _will be_ 10000 distinct buckets, but it means there could be if
|
||||
there is significant volume and variety in the measurements.
|
||||
|
||||
If you are running a scenario that creates many activities, then you can
|
||||
set `hdr_digits=1` on some of them to save client resources.
|
@ -1,113 +0,0 @@
|
||||
---
|
||||
title: Advanced Metrics
|
||||
---
|
||||
|
||||
# Advanced Metrics
|
||||
|
||||
## Unit of Measure
|
||||
|
||||
All metrics collected from activities are recorded in nanoseconds and ops per second. All histograms are recorded with 4
|
||||
digits of precision using HDR histograms.
|
||||
|
||||
## Metric Outputs
|
||||
|
||||
Metrics from a scenario run can be gathered in multiple ways:
|
||||
|
||||
- In the log output
|
||||
- In CSV files
|
||||
- In HDR histogram logs
|
||||
- In Histogram Stats logs (CSV)
|
||||
- To a monitoring system via graphite
|
||||
- via the --docker-metrics option
|
||||
|
||||
With the exception of the `--docker-metrics` approach, these forms may be combined and used in combination. The command
|
||||
line options for enabling these are documented in the built-in help, although some examples of these may be found below.
|
||||
|
||||
## Metrics via Graphite
|
||||
|
||||
If you like to have all of your testing data in one place, then you may be interested in reporting your measurements to
|
||||
a monitoring system. For this, nosqlbench includes a
|
||||
[Metrics Library](https://github.com/dropwizard/metrics). Graphite reporting is baked in as the default reporter.
|
||||
|
||||
In order to enable graphite reporting, use one of these options formats:
|
||||
|
||||
--report-graphite-to <host>
|
||||
--report-graphite-to <host>:<port>
|
||||
|
||||
## Metric Naming
|
||||
|
||||
## Prefix
|
||||
|
||||
Core metrics use the prefix _nosqlbench_ by default. You can override this with the ``--metrics-prefix` option:
|
||||
|
||||
--metrics-prefix myclient.group5
|
||||
|
||||
## Identifiers
|
||||
|
||||
Metrics associated with a specific activity will have the activity alias in their name. There is a set of core metrics
|
||||
which are always present regardless of the activity type. The names and types of additional metrics provided for each
|
||||
activity type vary.
|
||||
|
||||
Sometimes, an activity type will expose metrics on a per statement basis, measuring over all invocations of a given
|
||||
statement as defined in the YAML. In these cases, you will see `--` separating the name components of the metric. At the
|
||||
most verbose, a metric name could take on the form like
|
||||
`<activity>.<docname>--<blockname>--<statementname>--<metricname>`, although this is rare when you name your statements,
|
||||
which is recommended. Just keep in mind that the double dash connects an activity's alias with named statements *within*
|
||||
that activity.
|
||||
|
||||
## HDR Histograms
|
||||
|
||||
### Recording HDR Histogram Logs
|
||||
|
||||
You can record details of histograms from any compatible metric (histograms and timers) with an option like this:
|
||||
|
||||
--log-histograms hdrdata.log
|
||||
|
||||
If you want to record only certain metrics in this way, then use this form:
|
||||
|
||||
--log-histograms 'hdrdata.log:.*suffix'
|
||||
|
||||
|
||||
Notice that the option is enclosed in single quotes. This is because the second part of the option value is a regex. The
|
||||
'.*suffix' pattern matches any metric name that ends with "suffix". Effectively, leaving out the pattern is the same as
|
||||
using '.\*', which matches all metrics. Any valid regex is allowed here.
|
||||
|
||||
Metrics may be included in multiple logs, but care should be taken not to overdo this. Keeping higher fidelity histogram
|
||||
reservoirs does come with a cost, so be sure to be specific in what you record as much as possible.
|
||||
|
||||
If you want to specify the recording interval, use this form:
|
||||
|
||||
--log-histograms 'hdrdata.log:.*suffix:5s'
|
||||
|
||||
If you want to specify the interval, you must use the third form above, although it is valid to leave the pattern empty,
|
||||
such as 'hdrdata.log::5s'.
|
||||
|
||||
Each interval specified will be tracked in a discrete reservoir in memory, so they will not interfere with each other in
|
||||
terms of accuracy.
|
||||
|
||||
### Recording HDR Histogram Stats
|
||||
|
||||
You can also record basic snapshots of histogram data on a periodic interval just like above with HDR histogram logs.
|
||||
The option to do this is:
|
||||
|
||||
--log-histostats 'hdrstats.log:.*suffix:10s'
|
||||
|
||||
Everything works the same as for hdr histogram logging, except that the format is in CSV as shown in the example below:
|
||||
|
||||
~~~
|
||||
#logging stats for session scenario-1479089852022
|
||||
#[Histogram log format version 1.0]
|
||||
#[StartTime: 1479089852.046 (seconds since epoch), Sun Nov 13 20:17:32 CST 2016]
|
||||
#Tag,Interval_Start,Interval_Length,count,min,p25,p50,p75,p90,p95,p98,p99,p999,p9999,max
|
||||
Tag=diag1.delay,0.457,0.044,1,16,31,31,31,31,31,31,31,31,31,31
|
||||
Tag=diag1.cycles,0.48,0.021,31,4096,8191,8191,8191,8191,8191,8191,8191,8191,8191,2097151
|
||||
Tag=diag1.delay,0.501,0.499,1,1,1,1,1,1,1,1,1,1,1,1
|
||||
Tag=diag1.cycles,0.501,0.499,498,1024,2047,2047,4095,4095,4095,4095,4095,4095,4095,4194303
|
||||
...
|
||||
~~~
|
||||
|
||||
This includes the metric name (Tag), the interval start time and length (from the beginning of collection time), number
|
||||
of metrics recorded (count), minimum magnitude, a number of percentile measurements, and the maximum value. Notice that
|
||||
the format used is similar to that of the HDR logging, although instead of including the raw histogram data, common
|
||||
percentiles are recorded directly.
|
||||
|
@ -1,124 +0,0 @@
|
||||
---
|
||||
title: CLI Scripting
|
||||
---
|
||||
|
||||
# Command-Line Scripting
|
||||
|
||||
Sometimes you want to to run a set of workloads in a particular order, or call other specific test setup logic in
|
||||
between phases or workloads. While the full scripting environment allows you to do this and more, it is not necessary to
|
||||
write javascript for every scenario.
|
||||
|
||||
For more basic setup and sequencing needs, you can achive a fair degree of flexibility on the command line. A few key
|
||||
API calls are supported directly on the command line. This guide explains each of them, what the do, and how to use them
|
||||
together.
|
||||
|
||||
## Script Construction
|
||||
|
||||
As the command line is parsed, from left to right, the scenario script is built in an internal scripting buffer. Once
|
||||
the command line is fully parsed, this script is executed. Each of the commands below is effectively a macro for a
|
||||
snippet of script. It is important to remember that order is important.
|
||||
|
||||
## Command line format
|
||||
|
||||
Newlines are not allowed when building scripts from the command line. As long as you follow the allowed forms below, you
|
||||
can simply string multiple commands together with spaces between. As usual, single word options without double dashes
|
||||
are commands, key=value style parameters apply to the previous command, and all other commands with
|
||||
|
||||
--this-style
|
||||
|
||||
are non-scripting options.
|
||||
|
||||
## Concurrency & Control
|
||||
|
||||
All activities that run during a scenario run under the control of, but independently from the scenario script. This
|
||||
means that you can have a number of activities running while the scenario script is doing its own thing. The scenario
|
||||
only completes when both the scenario script and the activities are finished.
|
||||
|
||||
### start an activity
|
||||
|
||||
`start driver=<activity type> alias=<alias> ...`
|
||||
|
||||
You can start an activity with this command. At the time this command is evaluated, the activity is started, and the
|
||||
script continues without blocking. This is an asynchronous start of an activity. If you start multiple activities in
|
||||
this way, they will run concurrently.
|
||||
|
||||
The type argument is required to identify the activity type to run. The alias parameter is not strictly required, unless
|
||||
you want to be able to interact with the started activity later. In any case, it is a good idea to name all your
|
||||
activities with a meaningful alias.
|
||||
|
||||
### stop an activity
|
||||
|
||||
`stop <alias>`
|
||||
|
||||
Stop an activity with the given alias. This is synchronous, and causes the scenario to pause until the activity is
|
||||
stopped. This means that all threads for the activity have completed and signalled that they're in a stopped state.
|
||||
|
||||
You can stop a single activity alias, or any which match a regex pattern. If the value provided
|
||||
contains spaces, semicolons or commas, then it is split into words and each word is used as if
|
||||
the stop command had been called on it in turn.
|
||||
|
||||
If the alias name you supply follows matches the regular expression `[a-zA-Z_][a-zA-Z0-9_.]*`, i.e. an alphanumeric
|
||||
name with dots or underscores and no leading digit, then it is taken as a literal name and only
|
||||
matches the same literal alias. However, if there are any other characters, it is presumed to be
|
||||
a regular expression and matched as such against all currently running activities.
|
||||
|
||||
### await an activity
|
||||
|
||||
`await <alias>`
|
||||
|
||||
Await the normal completion of an activity with the given alias. This causes the scenario script to pause while it waits
|
||||
for the named activity to finish. This does not tell the activity to stop. It simply puts the scenario script into a
|
||||
paused state until the named activity is complete.
|
||||
|
||||
### run an activity
|
||||
|
||||
`run driver=<activity type> alias=<alias> ...`
|
||||
|
||||
Run an activity to completion, waiting until it is complete before continuing with the scenario script. It is
|
||||
effectively the same as
|
||||
|
||||
start driver=<activity type> ... alias=<alias>
|
||||
await <alias>
|
||||
|
||||
### wait millis
|
||||
|
||||
`waitmillis <milliseconds>`
|
||||
|
||||
Pause the scenario script for this many milliseconds. This is useful for controlling workload run duration, etc.
|
||||
|
||||
### add a script
|
||||
|
||||
`script <script file>`
|
||||
|
||||
Add the contents of the named file to the scenario script buffer.
|
||||
|
||||
### add a fragment
|
||||
|
||||
`fragment <script text>`
|
||||
|
||||
Add the contents of the next argument to the scenario script buffer. If the last character is a
|
||||
semicolon, then a newline is also added immediately after.
|
||||
|
||||
# An example CLI script
|
||||
|
||||
~~~text
|
||||
./nb \
|
||||
start driver=stdout alias=a cycles=100K workload=cql-iot tags=block:main\
|
||||
start driver=stdout alias=b cycles=200K workload=cql-iot tags=block:main\
|
||||
waitmillis 10000 \
|
||||
await one \
|
||||
stop two
|
||||
~~~
|
||||
|
||||
in this CLI script, the backslashes are necessary in order keep everything on the same command line. Here is a narrative
|
||||
of what happens when it is run.
|
||||
|
||||
1. An activity named 'a' is started, with 100K cycles of work.
|
||||
2. An activity named 'b' is started, with 200K cycles of work.
|
||||
3. While these activities run, the scenario script waits for ten seconds.
|
||||
4. If a is complete, the await returns immediately. If not, the
|
||||
script waits for a to complete its 100K cycles.
|
||||
5. b is immediately stopped.
|
||||
6. Because all activities are stopped or complete, and the script is complete, the scenario exits.
|
||||
|
||||
|
@ -1,250 +0,0 @@
|
||||
---
|
||||
title: NoSQLBench CLI Options
|
||||
weight: 01
|
||||
---
|
||||
|
||||
# The NoSQLBench Command Line
|
||||
|
||||
This is the same documentation you get in markdown format with the
|
||||
`nb --help` command.
|
||||
|
||||
---------------------------------------
|
||||
|
||||
### Command-Line Options ###
|
||||
|
||||
Help ( You're looking at it. )
|
||||
|
||||
--help
|
||||
|
||||
Short options, like '-v' represent simple options, like verbosity. Using multiples increases the
|
||||
level of the option, like '-vvv'.
|
||||
|
||||
Long options, like '--help' are top-level options that may only be used once. These modify general
|
||||
behavior, or allow you to get more details on how to use nosqlbench.
|
||||
|
||||
All other options are either commands, or named arguments to commands. Any single word without
|
||||
dashes is a command that will be converted into script form. Any option that includes an equals sign
|
||||
is a named argument to the previous command. The following example is a commandline with a command *
|
||||
start*, and two named arguments to that command.
|
||||
|
||||
./nb start driver=diag alias=example
|
||||
|
||||
### Discovery options ###
|
||||
|
||||
These options help you learn more about running nosqlbench, and about the plugins that are
|
||||
present in your particular version.
|
||||
|
||||
Get a list of additional help topics that have more detailed documentation:
|
||||
|
||||
./nb help topics
|
||||
|
||||
Provide specific help for the named activity type:
|
||||
|
||||
./nb help <activity type>
|
||||
|
||||
List the available drivers:
|
||||
|
||||
--list-drivers
|
||||
|
||||
List the available scenarios:
|
||||
|
||||
--list-scenarios
|
||||
|
||||
List only the available workloads which contain the above scenarios:
|
||||
|
||||
--list-workloads
|
||||
|
||||
Copy a workload or other file to your local directory as a starting point:
|
||||
|
||||
--copy <name>
|
||||
|
||||
Provide the metrics that are available for scripting
|
||||
|
||||
--list-metrics <activity type> [ <activity name> ]
|
||||
|
||||
### Execution Options ###
|
||||
|
||||
This is how you actually tell nosqlbench what scenario to run. Each of these commands appends
|
||||
script logic to the scenario that will be executed. These are considered as commands, can occur in any order and
|
||||
quantity. The only rule is that arguments in the arg=value form will apply to the preceding script
|
||||
or activity.
|
||||
|
||||
Add the named script file to the scenario, interpolating named parameters:
|
||||
|
||||
script <script file> [arg=value]...
|
||||
|
||||
Add the named activity to the scenario, interpolating named parameters
|
||||
|
||||
activity [arg=value]...
|
||||
|
||||
### General options ###
|
||||
|
||||
These options modify how the scenario is run.
|
||||
|
||||
Specify a directory for scenario log files:
|
||||
|
||||
--logs-dir <dirname>
|
||||
|
||||
Specify a limit on logfiles (old files will be purged):
|
||||
|
||||
--logs-max <count>
|
||||
|
||||
Specify the priority level of file logs:
|
||||
|
||||
--logs-level <level>
|
||||
|
||||
where `<level>` can be one of OFF, ERROR, WARN, INFO, DEBUG, TRACE, or ALL
|
||||
|
||||
Specify an override for one or more classes:
|
||||
|
||||
--log-level-override com.foobarbaz:DEBUG,com.barfoobaz:TRACE
|
||||
|
||||
Specify the logging pattern for console and logfile:
|
||||
|
||||
--logging-pattern '%date %level [%thread] %logger{10} [%file:%line] %msg%n'
|
||||
--logging-pattern 'TERSE'
|
||||
|
||||
Specify the logging pattern for console only:
|
||||
|
||||
--console-pattern '%date %level [%thread] %logger{10} [%file:%line] %msg%n'
|
||||
--console-pattern 'TERSE-ANSI'
|
||||
|
||||
Specify the logging pattern for logfile only:
|
||||
|
||||
--logfile-pattern '%date %level [%thread] %logger{10} [%file:%line] %msg%n'
|
||||
--logfile-pattern 'VERBOSE'
|
||||
|
||||
# See https://logging.apache.org/log4j/2.x/manual/layouts.html#Pattern_Layout
|
||||
# These shortcuts are allowed
|
||||
TERSE %8r %-5level [%t] %-12logger{0} %msg%n%throwable
|
||||
VERBOSE %d{DEFAULT}{GMT} [%t] %logger %-5level: %msg%n%throwable
|
||||
TERSE-ANSI %8r %highlight{%-5level} %style{%C{1.} [%t] %-12logger{0}} %msg%n%throwable
|
||||
VERBOSE-ANSI %d{DEFAULT}{GMT} [%t] %highlight{%logger %-5level}: %msg%n%throwable
|
||||
# ANSI variants are auto promoted for console if --ansi=enable
|
||||
# ANSI variants are auto demoted for logfile in any case
|
||||
|
||||
Explicitly enable or disable ANSI logging support:
|
||||
(ANSI support is enabled if the TERM environment variable is defined)
|
||||
|
||||
--ansi=enabled
|
||||
--ansi=disabled
|
||||
|
||||
Specify a directory and enable CSV reporting of metrics:
|
||||
|
||||
--report-csv-to <dirname>
|
||||
|
||||
Specify the graphite destination and enable reporting
|
||||
|
||||
--report-graphite-to <addr>[:<port>]
|
||||
|
||||
Specify the interval for graphite or CSV reporting in seconds:
|
||||
|
||||
--report-interval 10
|
||||
|
||||
Specify the metrics name prefix for graphite reporting:
|
||||
|
||||
--metrics-prefix <metrics-prefix>
|
||||
|
||||
Log all HDR histogram data to a file:
|
||||
|
||||
--log-histograms histodata.log
|
||||
--log-histograms 'histodata.log:.*'
|
||||
--log-histograms 'histodata.log:.*:1m'
|
||||
--log-histograms 'histodata.log:.*specialmetrics:10s'
|
||||
|
||||
Log HDR histogram stats to a CSV file:
|
||||
|
||||
--log-histostats stats.csv
|
||||
--log-histostats 'stats.csv:.*' # same as above
|
||||
--log-histostats 'stats.csv:.*:1m' # with 1-minute interval
|
||||
--log-histostats 'stats.csv:.*specialmetrics:10s'
|
||||
|
||||
Adjust the HDR histogram precision:
|
||||
|
||||
--hdr-digits 3
|
||||
|
||||
The default is 3 digits, which creates 1000 equal-width histogram buckets for every named metric in
|
||||
every reporting interval. For longer running test or for test which require a finer grain of
|
||||
precision in metrics, you can set this up to 4 or 5. Note that this only sets the global default.
|
||||
Each activity can also override this value with the hdr_digits parameter. Be aware that each
|
||||
increase in this number multiples the amount of detail tracked on the client by 10x, so use
|
||||
caution.
|
||||
|
||||
Adjust the progress reporting interval:
|
||||
|
||||
--progress console:1m
|
||||
|
||||
or
|
||||
|
||||
--progress logonly:5m
|
||||
|
||||
NOTE: The progress indicator on console is provided by default unless logging levels are turned up
|
||||
or there is a script invocation on the command line.
|
||||
|
||||
If you want to add in classic time decaying histogram metrics for your histograms and timers, you
|
||||
may do so with this option:
|
||||
|
||||
--classic-histograms prefix
|
||||
--classic-histograms 'prefix:.*' # same as above
|
||||
--classic-histograms 'prefix:.*specialmetrics' # subset of names
|
||||
|
||||
Name the current session, for logfile naming, etc By default, this will be "scenario-TIMESTAMP", and
|
||||
a logfile will be created for this name.
|
||||
|
||||
--session-name <name>
|
||||
|
||||
Enlist nosqlbench to stand up your metrics infrastructure using a local docker runtime:
|
||||
|
||||
--docker-metrics
|
||||
|
||||
When this option is set, nosqlbench will start graphite, prometheus, and grafana automatically on
|
||||
your local docker, configure them to work together, and point nosqlbench to send metrics the system
|
||||
automatically. It also imports a base dashboard for nosqlbench and configures grafana snapshot
|
||||
export to share with a central DataStax grafana instance (grafana can be found on localhost:3000
|
||||
with the default credentials admin/admin).
|
||||
|
||||
### Console Options ###
|
||||
|
||||
Increase console logging levels: (Default console logging level is *warning*)
|
||||
|
||||
-v (info)
|
||||
-vv (debug)
|
||||
-vvv (trace)
|
||||
|
||||
--progress console:1m (disables itself if -v options are used)
|
||||
|
||||
These levels affect *only* the console output level. Other logging level parameters affect logging
|
||||
to the scenario log, stored by default in logs/...
|
||||
|
||||
Show version, long form, with artifact coordinates.
|
||||
|
||||
--version
|
||||
|
||||
### Summary Reporting
|
||||
|
||||
The classic metrics logging format is used to report results into the logfile for every scenario.
|
||||
This format is not generally human-friendly, so a better summary report is provided by default to
|
||||
the console and/or a specified summary file by default.
|
||||
|
||||
Examples:
|
||||
|
||||
# report to console if session ran more than 60 seconds
|
||||
--report-summary-to stdout:60
|
||||
|
||||
# report to auto-named summary file for every session
|
||||
--report-summary-to _LOGS_/_SESSION_.summary
|
||||
|
||||
# do both (the default)
|
||||
--report-summary-to stdout:60,_LOGS_/_SESSION_.summary
|
||||
|
||||
Values of `stdout` or `stderr` are send summaries directly to the console, and any other pattern is
|
||||
taken as a file name.
|
||||
|
||||
You can use `_SESSION_` and `_LOGS_` to automatically name the file according to the current session
|
||||
name and log directory.
|
||||
|
||||
The reason for the optional timing parameter is to allow for results of short scenario runs to be
|
||||
squelched. Metrics for short runs are not generally accurate nor meaningful. Spamming the console
|
||||
with boiler-plate in such cases is undesirable. If the minimum session length is not specified, it
|
||||
is assumed to be 0, meaning that a report will always show on that channel.
|
||||
|
@ -1,115 +0,0 @@
|
||||
---
|
||||
title: Core Statement Params
|
||||
weight: 06
|
||||
---
|
||||
|
||||
# Core Statement Parameters
|
||||
|
||||
Some statement parameters are recognized by the nosqlbench runtime and can
|
||||
be used on any statement in a YAML file.
|
||||
|
||||
## *ratio*
|
||||
|
||||
A statement parameter called _ratio_ is supported by every workload. It
|
||||
can be attached to a statement, or a block or a document level parameter
|
||||
block. It sets the relative ratio of a statement in the op sequence before
|
||||
an activity is started.
|
||||
|
||||
When an activity is initialized, all of the active statements are combined
|
||||
into a sequence based on their relative ratios. By default, all statement
|
||||
templates are initialized with a ratio of 1 if non is specified by the
|
||||
user.
|
||||
|
||||
For example, consider the statements below:
|
||||
|
||||
```yaml
|
||||
statements:
|
||||
- s1: "select foo,bar from baz where ..."
|
||||
ratio: 1
|
||||
- s2: "select bar,baz from foo where ..."
|
||||
ratio: 2
|
||||
- s3: "select baz,foo from bar where ..."
|
||||
ratio: 3
|
||||
```
|
||||
|
||||
If all statements are activated (there is no tag filtering), then the
|
||||
activity will be initialized with a sequence length of 6. In this case,
|
||||
the relative ratio of statement "s3" will be 50% overall. If you filtered
|
||||
out the first statement, then the sequence would be 5 operations long. In
|
||||
this case, the relative ratio of statement "s3" would be 60% overall. It
|
||||
is important to remember that statement ratios are always relative to the
|
||||
total sum of the active statements' ratios.
|
||||
|
||||
**NOTE:**
|
||||
Because the ratio works so closely with the activity parameter `seq`, the
|
||||
description for that parameter is include below.
|
||||
|
||||
### *seq* (activity level - do not use on statements)
|
||||
|
||||
- `seq=<bucket|concat|interval>`
|
||||
- _default_: `seq=bucket`
|
||||
- _required_: no
|
||||
- _dynamic_: no
|
||||
|
||||
The `seq=<bucket|concat|interval>` parameter determines the type of
|
||||
sequencing that will be used to plan the op sequence. The op sequence is a
|
||||
look-up-table that is used for each stride to pick statement forms
|
||||
according to the cycle offset. It is simply the sequence of statements
|
||||
from your YAML that will be executed, but in a pre-planned, and highly
|
||||
efficient form.
|
||||
|
||||
An op sequence is planned for every activity. With the default ratio on
|
||||
every statement as 1, and the default bucket scheme, the basic result is
|
||||
that each active statement will occur once in the order specified. Once
|
||||
you start adding ratios to statements, the most obvious thing that you
|
||||
might expect wil happen: those statements will occur multiple times to
|
||||
meet their ratio in the op mix. You can customize the op mix further by
|
||||
changing the seq parameter to concat or interval.
|
||||
|
||||
**NOTE:**
|
||||
The op sequence is a look up table of statement templates, *not*
|
||||
individual statements or operations. Thus, the cycle still determines the
|
||||
uniqueness of an operation as you would expect. For example, if statement
|
||||
form ABC occurs 3x per sequence because you set its ratio to 3, then each
|
||||
of these would manifest as a distinct operation with fields determined by
|
||||
distinct cycle values.
|
||||
|
||||
There are three schemes to pick from:
|
||||
|
||||
### bucket
|
||||
|
||||
This is a round robin planner which draws operations from buckets in
|
||||
circular fashion, removing each bucket as it is exhausted. For example,
|
||||
the ratios A:4, B:2, C:1 would yield the sequence A B C A B A A. The
|
||||
ratios A:1, B5 would yield the sequence A B B B B B.
|
||||
|
||||
### concat
|
||||
|
||||
This simply takes each statement template as it occurs in order and
|
||||
duplicates it in place to achieve the ratio. The ratios above (A:4, B:2,
|
||||
C:1) would yield the sequence A A A A B B C for the concat sequencer.
|
||||
|
||||
### interval
|
||||
|
||||
This is arguably the most complex sequencer. It takes each ratio as a
|
||||
frequency over a unit interval of time, and apportions the associated
|
||||
operation to occur evenly over that time. When two operations would be
|
||||
assigned the same time, then the order of appearance establishes
|
||||
precedence. In other words, statements appearing first win ties for the
|
||||
same time slot. The ratios A:4 B:2 C:1 would yield the sequence A B C A A
|
||||
B A. This occurs because, over the unit interval
|
||||
(0.0,1.0), A is assigned the positions `A: 0.0, 0.25, 0.5, 0.75`, B is
|
||||
assigned the positions `B: 0.0, 0.5`, and C is assigned position `C: 0.0`.
|
||||
These offsets are all sorted with a position-stable sort, and then the
|
||||
associated ops are taken as the order.
|
||||
|
||||
In detail, the rendering appears
|
||||
as `0.0(A), 0.0(B), 0.0(C), 0.25(A), 0.5(A), 0.5(B), 0.75(A)`, which
|
||||
yields `A B C A A B A` as the op sequence.
|
||||
|
||||
This sequencer is most useful when you want a stable ordering of operation
|
||||
from a rich mix of statement types, where each operations is spaced as
|
||||
evenly as possible over time, and where it is not important to control the
|
||||
cycle-by-cycle sequencing of statements.
|
||||
|
||||
|
@ -1,28 +0,0 @@
|
||||
---
|
||||
title: Grafana Metrics
|
||||
weight: 2
|
||||
---
|
||||
|
||||
# Grafana Metrics
|
||||
|
||||
NoSQLBench comes with a built-in helper to get you up and running quickly
|
||||
with client-side testing metrics. This functionality is based on docker,
|
||||
and a built-in method for bringing up a docker stack, automated by
|
||||
NoSQLBench.
|
||||
|
||||
**WARNING:**
|
||||
This feature requires that you have docker running on the local system and
|
||||
that your user is in a group that is allowed to manage docker. Using
|
||||
the `--docker-metrics` command *will* attempt to manage docker on your
|
||||
local system.
|
||||
|
||||
To ask nosqlbench to stand up your metrics infrastructure using a local
|
||||
docker runtime, use this command line option with any other nosqlbench
|
||||
commands:
|
||||
|
||||
--docker-metrics
|
||||
|
||||
When this option is set, nosqlbench will start graphite, prometheus, and grafana automatically on your local docker,
|
||||
configure them to work together, and to send metrics the system automatically. It also imports a base dashboard for
|
||||
nosqlbench and configures grafana snapshot export to share with a central DataStax grafana instance (grafana can be
|
||||
found on localhost:3000 with the default credentials admin/admin).
|
@ -1,8 +0,0 @@
|
||||
---
|
||||
title: Reference
|
||||
weight: 90
|
||||
---
|
||||
|
||||
# Reference Section
|
||||
|
||||
This section contains additional reference details across a range of nosqlbench topics.
|
@ -1,72 +0,0 @@
|
||||
---
|
||||
title: Parameter Types
|
||||
weight: 03
|
||||
---
|
||||
|
||||
# Parameter Types
|
||||
|
||||
To configure a NoSQLBench activity to do something meaningful, you have to provide parameters to it. This can occur in
|
||||
one of several ways. This section is a guide on NoSQLBench parameters, how they layer together, and when to use one form
|
||||
over another.
|
||||
|
||||
The command line is used to configure both the overall runtime (logging, etc) as well as the individual activities and
|
||||
scripts. Global options can be distinguished from scenario commands and their parameters because global options always
|
||||
start with a -single or --double-hyphen.
|
||||
|
||||
## Activity Parameters
|
||||
|
||||
Parameters for an activity always have the form of `<name>=<value>` on the command line. Activity parameters *must*
|
||||
follow a command, such as `run` or `start`, for example. Scenario commands are always single words without any leading
|
||||
hyphens. Every command-line argument that follows a scenario command in the form of `<name>=<value>` is a parameter to
|
||||
that command.
|
||||
|
||||
Activity parameters can be provided by the core runtime or they can be provided by the activity type. It's not important
|
||||
where they are provided from so long as you know what they do for your workloads, how to configure them, and where to
|
||||
find the docs. Core parameters are documented
|
||||
|
||||
*Core* Parameters are those provided by the core runtime. They are part of the core API and used by every
|
||||
activity type. Core activity params include type*, *alias*, and *threads*, for example. These parameters are explained
|
||||
individually under the next section.
|
||||
|
||||
*Driver* Parameters are those provided by an activity type. These parameters are documented for each activity type. You
|
||||
can see them by running `nb help <activity type>`.
|
||||
|
||||
Driver parameters may be dynamic. *Dynamic* Activity Parameters are parameters which may be changed while an activity is
|
||||
running. This means that scenario scripting logic may change some variables while an activity is running, and that the
|
||||
runtime should dynamically adjust to match. Dynamic parameters are mainly used in more advanced scripting scenarios.
|
||||
|
||||
Parameters that are dynamic should be documented as such in the respective activity type's help page.
|
||||
|
||||
### Template Parameters
|
||||
|
||||
If you need to provide general-purpose overrides to a named section of the standard YAML, then you may use a mechanism
|
||||
called _template parameters_. These are just like activity parameters, but they are set via macro and cna have defaults.
|
||||
This is a YAML format feature that allows you to easily template workload properties in a way that is easy to override
|
||||
on the command line or via scripting. More details on template parameters are shared under 'Designing Workloads|Template
|
||||
Params'.
|
||||
|
||||
### Parameter Loading
|
||||
|
||||
Now that we've described all the parameter types, let's tie them together. When an activity is loaded from the command
|
||||
line or script, the parameters are resolved in the following order:
|
||||
|
||||
1. The `type` parameter tells nosqlbench which activity type implementation to load.
|
||||
2. The activity type implementation creates an activity.
|
||||
3. The activity is initialized with the parameters provided.
|
||||
4. The yaml parameter is used to load the workload definition into
|
||||
a buffer without parsing the YAML.
|
||||
5. Any template parameters in the file in `<<varname:default value>>` or `TEMPLATE(varname,default value)` form are resolved, taking override values from the provided params.
|
||||
6. Finally, the activity is started.
|
||||
|
||||
## Statement Parameters
|
||||
|
||||
Some activities make use of parameters for statements. These are called _statement parameters_ and are completely
|
||||
different than _activity parameters_. Statement parameters in a YAML allow you to affect *how* a statement is used in a
|
||||
workload. Just as with activity level parameters, statement parameters may be supported by the core runtime or by an
|
||||
activity type. These are also documented in the respective activity type's documentation included in the 'Activity
|
||||
Types' section.
|
||||
|
||||
The core statement parameters are explained just below the core activity parameters in this section.
|
||||
|
||||
|
||||
|
@ -1,137 +0,0 @@
|
||||
---
|
||||
title: Scenario Scripting
|
||||
---
|
||||
|
||||
# Scenario Scripting
|
||||
|
||||
## Motive
|
||||
|
||||
The NoSQLBench runtime is a combination of a scripting sandbox and a workload execution machine. This is not accidental.
|
||||
With this particular arrangement, it should be possible to build sophisticated tests across a variety of scenarios. In
|
||||
particular, logic which can observe and react to the system under test can be powerful. With this approach, it becomes
|
||||
possible to break away from the conventional run-interpret-adjust cycle which is all too often done by human hands.
|
||||
|
||||
## Machinery, Controls & Instruments
|
||||
|
||||
All of the heavy lifting is left to Java and the core nosqlbench runtime. This includes the iterative workloads that are
|
||||
meant to test the target system. This is combined with a control layer which is provided by Nashorn and eventually
|
||||
GraalVM. This division of responsibility allows the high-level test logic to be "script" and the low-level activity
|
||||
logic to be "machinery". While the scenario script has the most control, it also is the least busy relative to activity
|
||||
workloads. The net effect is that you have the efficiency of the iterative test loads in conjunction with the open
|
||||
design palette of a first-class scripting language.
|
||||
|
||||
Essentially, the ActivityType drivers are meant to handle the workload-specific machinery. They also provide dynamic
|
||||
control points and parameters which special to that activity type (driver). This exposes a full feedback loop between a
|
||||
running scenario script and the activities that it runs. The scenario is free to read the performance metrics from a
|
||||
running activity and make changes to it on the fly.
|
||||
|
||||
## Scripting Environment
|
||||
|
||||
The nosqlbench scripting environment provided has a few modifications meant to streamline understanding and usage of
|
||||
nosqlbench dynamic parameters and metric.
|
||||
|
||||
### Active Bindings
|
||||
|
||||
Active bindings are control variables which, when assigned to, cause an immediate change in the behavior of the runtime.
|
||||
Each of the variables below is pre-wired into each script environment.
|
||||
|
||||
#### scenario
|
||||
|
||||
This is the __Scenario Controller__ object which manages the activity executors in the runtime. All the methods on this
|
||||
Java type are provided to the scripting environment directly.
|
||||
|
||||
#### activities.<alias>.<paramname>
|
||||
|
||||
Each activity parameter for a given activity alias is available at this name within the scripting environment. Thus, you
|
||||
can change the number of threads on an activity named foo (alias=foo) in the scripting environment by assigning a value
|
||||
to it as in `activities.foo.threads=3`. Any assignments take effect synchronously before the next line of the script
|
||||
continues executing.
|
||||
|
||||
#### __metrics__.<alias>.<metric name>
|
||||
|
||||
Each activity metric for a given activity alias is available at this name. This gives you access to the metrics objects
|
||||
directly. Some metrics objects have also been enhanced with wrapper logic to provide simple getters and setters, like
|
||||
`.p99ms` or `.p99ns`, for example.
|
||||
|
||||
Interaction with the nosqlbench runtime and the activities therein is made
|
||||
easy by the above variables and objects. When an assignment is made to any
|
||||
of these variables, the changes are propagated to internal listeners. For
|
||||
changes to
|
||||
_threads_, the thread pool responsible for the affected activity adjusts
|
||||
the number of active threads (AKA slots). Other changes are further
|
||||
propagated directly to the thread harnesses and components which implement
|
||||
the ActivityType.
|
||||
|
||||
**WARNING:**
|
||||
Assignment to the _workload_ and _alias_ activity parameters has no
|
||||
special effect, as you can't change an activity to a different driver once
|
||||
it has been created.
|
||||
|
||||
You can make use of more extensive Java or Javascript libraries as needed,
|
||||
mixing then with the runtime controls provided above.
|
||||
|
||||
## Enhanced Metrics for Scripting
|
||||
|
||||
The metrics available in nosqlbench are slightly different than the standard kit with dropwizard metrics. The key
|
||||
differences are:
|
||||
|
||||
### HDR Histograms
|
||||
|
||||
All histograms use HDR histograms with *four* significant digits.
|
||||
|
||||
All histograms reset on snapshot, automatically keeping all data until you report the snapshot or access the snapshot
|
||||
via scripting. (see below).
|
||||
|
||||
The metric types that use histograms have been replaced with nicer version for scripting. You don't have to do anything
|
||||
differently in your reporter config to use them. However, if you need to use the enhanced versions in your local
|
||||
scripting, you can. This means that Timer and Histogram types are enhanced. If you do not use the scripting extensions,
|
||||
then you will automatically get the standard behavior that you are used to, only with higher-resolution HDR and full
|
||||
snapshots for each report to your downstream metrics systems.
|
||||
|
||||
### Scripting with Delta Snapshots
|
||||
|
||||
For both the timer and the histogram types, you can call getDeltaReader(), or access it simply as
|
||||
<metric>.deltaReader. When you do this, the delta snapshotting behavior is maintained until you use the
|
||||
deltaReader to access it. You can get a snapshot from the deltaReader by calling getDeltaSnapshot(10000), which causes
|
||||
the snapshot to be reset for collection, but retains a cache of the snapshot for any other consumer of getSnapshot() for
|
||||
that duration in milliseconds. If, for example, metrics reporters access the snapshot in the next 10 seconds, the
|
||||
reported snapshot will be exactly what was used in the script.
|
||||
|
||||
This is important for using local scripting methods and calculations with aggregate views downstream. It means that the
|
||||
histograms will match up between your local script output and your downstream dashboards, as they will both be using the
|
||||
same frame of data, when done properly.
|
||||
|
||||
### Histogram Convenience Methods
|
||||
|
||||
All histogram snapshots have additional convenience methods for accessing every percentile in (P50, P75, P90, P95, P98,
|
||||
P99, P999, P9999) and every time unit in (s, ms, us, ns). For example, getP99ms() is supported, as is getP50ns(), and
|
||||
every other possible combination. This means that you can access the 99th percentile metric value in your scripts for
|
||||
activity _foo_ as _metrics.foo.cycles.snapshot.p99ms_.
|
||||
|
||||
## Control Flow
|
||||
|
||||
When a script is run, it has absolute control over the scenario runtime while it is active. Once the script reaches its
|
||||
end, however, it will only exit if all activities have completed. If you want to explicitly stop a script, you must stop
|
||||
all activities.
|
||||
|
||||
## Strategies
|
||||
|
||||
You can use nosqlbench in the classic form with `run driver=<activity_type> param=value ...` command line syntax. There
|
||||
are reasons, however, that you will sometimes want customize and modify your scripts directly, such as:
|
||||
|
||||
- Permute test variables to cover many sub-conditions in a test.
|
||||
- Automatically adjust load factors to identify the nominal capacity of a system.
|
||||
- Adjust rate of a workload in order to get a specific measurement of system behavior.
|
||||
- React to changes in test or target system state in order to properly sequence a test.
|
||||
|
||||
## Script Input & Output
|
||||
|
||||
Internal buffers are kept for _stdin_, _stdout_, and _stderr_ for the scenario script execution. These are logged to the
|
||||
logfile upon script completion, with markers showing the timestamp and file descriptor (stdin, stdout, or stderr) that
|
||||
each line was recorded from.
|
||||
|
||||
## External Docs
|
||||
|
||||
- [Java Platform, Standard Edition Nashorn User's Guide (Java 8)](https://docs.oracle.com/javase/8/docs/technotes/guides/scripting/nashorn/api.html)
|
||||
- [Nashorn extensions on OpenJDK Wiki](https://wiki.openjdk.java.net/display/Nashorn/Nashorn+extensions)
|
||||
- [Scripting for the Java (8) Platform](http://docs.oracle.com/javase/8/docs/technotes/guides/scripting/)
|
@ -1,38 +0,0 @@
|
||||
---
|
||||
title: Standard Metrics
|
||||
---
|
||||
|
||||
# Standard Metrics
|
||||
|
||||
nosqlbench comes with a set of standard metrics that will be part of every activity type (driver). Each activity type
|
||||
(driver) enhances the metrics available by adding their own metrics with the nosqlbench APIs. This section explains what
|
||||
the standard metrics are, and how to interpret them.
|
||||
|
||||
## read-input
|
||||
|
||||
Within nosqlbench, a data stream provider called an _Input_ is responsible for providing the actual cycle number that
|
||||
will be used by consumer threads. Because different _Input_ implementations may perform differently, a separate metric
|
||||
is provided to track the performance in terms of client-side overhead. The **read-input** metric is a timer that only
|
||||
measured the time it takes for a given activity thread to read the input value, nothing more.
|
||||
|
||||
## strides
|
||||
|
||||
A stride represents the work-unit for a thread within nosqlbench. It allows a set of cycles to be logically grouped
|
||||
together for purposes of optimization -- or in some cases -- to simulate realistic client-side behavior over multiple
|
||||
operations. The stride is the number of cycles that will be allocated to each thread before it starts iterating on them.
|
||||
|
||||
The **strides** timer measures the time each stride takes, including all cycles within the stride. It starts measuring
|
||||
time before the cycle starts, and stops measuring after the last cycle in the stride has run.
|
||||
|
||||
## cycles
|
||||
|
||||
Within nosqlbench, each logical iteration of a statement is handled within a distinct cycle. A cycle represents an
|
||||
iteration of a workload. This corresponds to a single operation executed according to some statement definition.
|
||||
|
||||
The **cycles** metric is a timer that starts counting at the start of a cycle, before any specific activity behavior has
|
||||
control. It stops timing once the logical cycle is complete. This includes and additional phases that are executed by
|
||||
multi-phase actions.
|
||||
|
||||
|
||||
|
||||
|
@ -1,48 +0,0 @@
|
||||
---
|
||||
title: Timing Terms
|
||||
---
|
||||
|
||||
# Timing Terms
|
||||
|
||||
Often, terms used to describe latency can create confusion. In fact, the term _latency_ is so overloaded in practice
|
||||
that it is not useful by itself. Because of this, nosqlbench will avoid using the term latency _except in a specific
|
||||
way_. Instead, the terms described in this section will be used.
|
||||
|
||||
nosqlbench is a client-centric testing tool. The measurement of operations occurs on the client, without visibility to
|
||||
what happens in transport or on the server. This means that the client *can* see how long an operation takes, but it
|
||||
*cannot see* how much of the operational time is spent in transport and otherwise. This has a bearing on the terms that
|
||||
are adopted with nosqlbench.
|
||||
|
||||
Some terms are anchored by the context in which they are used. For latency terms, *service time* can be subjective. When
|
||||
using this term to describe other effects in your system, what is included depends on the perspective of the requester.
|
||||
The concept of service is universal, and every layer in a system can be seen as a service. Thus, the service time is
|
||||
defined by the vantage point of the requester. This is the perspective taken by the nosqlbench approach for naming and
|
||||
semantics below.
|
||||
|
||||
## responsetime
|
||||
|
||||
**The duration of time a user has to wait for a response from the time they submitted the request.** Response time is
|
||||
the duration of time from when a request was expected to start, to the time at which the response is finally seen by the
|
||||
user. A request is generally expected to start immediately when users make a request. For example, when a user enters a
|
||||
URL into a browser, they expect the request to start immediately when they hit enter.
|
||||
|
||||
In nosqlbench, the response time for any operation can be calculated by adding its wait time and its the service time
|
||||
together.
|
||||
|
||||
## waittime
|
||||
|
||||
**The duration of time between when an operation is intended to start and when it actually starts on a client.** This is
|
||||
also called *scheduling delay* in some places. Wait time occurs because clients are not able to make all requests
|
||||
instantaneously when expected. There is an ideal time at which the request would be made according to user demand. This
|
||||
ideal time is always earlier than the actual time in practice. When there is a shortage of resources *of any kind* that
|
||||
delays a client request, it must wait.
|
||||
|
||||
Wait time can accumulate when you are running something according to a dispatch rate, as with a rate limiter.
|
||||
|
||||
## servicetime
|
||||
|
||||
**The duration of time it takes a server or other system to fully process to a request and send a response.** From the
|
||||
perspective of a testing client, the _system_ includes the infrastructure as well as remote servers. As such, the
|
||||
service time metrics in nosqlbench include any operational time that is external to the client, including transport
|
||||
latency.
|
||||
|
@ -1,59 +0,0 @@
|
||||
---
|
||||
title: Scripting
|
||||
weight: 95
|
||||
---
|
||||
|
||||
# Scripting with NoSQLBench
|
||||
|
||||
NoSQLBench is designed to be used as both a turnkey testing system as well as a toolkit for advanced
|
||||
testing. The approach that enables this is based on a few key principles:
|
||||
|
||||
1. NoSQLBench is packaged by default for users who want to use pre-built testing configurations.
|
||||
2. The whole runtime is modular and designed for composition.
|
||||
3. The default testing configurations are assembled from the modules components as needed.
|
||||
4. Users can choose to build their own testing configurations from these modules.
|
||||
5. When a user moves from using pre-built configurations to custom configurations,
|
||||
is an incremental process.
|
||||
|
||||
Why base the internal logic on a scripting engine?
|
||||
|
||||
The principles described above apply all the way to the scripting layer. Every NoSQLBench
|
||||
scenario is after-all, a script. For users who just need to run the pre-package
|
||||
configurations, the fact a scripting engine is at the core is an implementation detail that
|
||||
doesn't matter. For others who need to create advanced testing logic, this feature
|
||||
allows them to build on the self-same concepts and components that other NoSQLBench users
|
||||
are already familiar with and using. This common ground pays for itself in terms of reusability,
|
||||
clarity, and a shared approach to testing at different levels of detail.
|
||||
|
||||
## Unique Capabilities
|
||||
|
||||
Unlike some other systems which define what a user is allowed to do during a scenario with something
|
||||
like a DSL, NoSQLBench comes with no limitations. In other words, rather than pick a set of behaviors
|
||||
from a limited list of DSL verbs, you can do anything you want during a scenario as long as it can
|
||||
be expressed in Javascript.
|
||||
|
||||
That said, if you want to use a DSL within NoSQLBench, it doesn't prevent you from doing so. It just
|
||||
doesn't come with a DSL to tell you what you can (and can't) do. Instead, it comes with a set of
|
||||
scripting libraries and extensions that have proven useful for advanced testing scenarios.
|
||||
|
||||
NoSQLBench scripting is supported with realtime interaction between the scripting environment
|
||||
and the running scenario. Activities, metrics, and control variables that are needed to dynamically
|
||||
interact with a running workload are all wired in and ready to go.
|
||||
|
||||
Contributors can add to the scripting runtime by adding extensions to NoSQLBench. These extensions
|
||||
are generally added to the integrated tests with full-roundtrip content checking to ensure that
|
||||
they perform exactly as expected.
|
||||
|
||||
## Getting Started
|
||||
|
||||
For users who want to tap into the programmatic power of NoSQLBench, it's easy to get started by
|
||||
using the `--show-script` option. For any normal command line that you might use with NoSQLBench,
|
||||
this option causes it to dump the scenario script to stdout and exist instead of running the scenario.
|
||||
|
||||
You can store this into a file with a `.js` extension, and then use a command line like
|
||||
|
||||
nosqlbench script myfile.js
|
||||
|
||||
to invoke it. This is exactly the same as running the original command line, only with a couple of
|
||||
extra steps that let you see what it is doing directly in the scenario script.
|
||||
|
@ -1,59 +0,0 @@
|
||||
---
|
||||
title: Script Parameters
|
||||
weight: 20
|
||||
---
|
||||
|
||||
# Script Parameters
|
||||
|
||||
When running a script, it is sometimes necessary to pass parameters to it in the same way
|
||||
that you would for an activity. For example, you might have a scenario script like this:
|
||||
|
||||
# myscript.js
|
||||
scenario.run({
|
||||
driver: 'stdout',
|
||||
workload: 'test.yaml',
|
||||
cycles: '1000'
|
||||
});
|
||||
|
||||
This is what the script form of starting an activity might look like. It is
|
||||
simply passing a parameter map with the activity parameters to the scenario controller.
|
||||
|
||||
You might invoke it like this:
|
||||
|
||||
nb script myscript
|
||||
|
||||
Suppose that you want to allow the user to run such an activity by calling the script directly,
|
||||
but you also want them to allow them to add their own parameters specifically to the
|
||||
activity.
|
||||
|
||||
NoSQLBench supports this type of flexibility by providing any command-line arguments to the
|
||||
script as a script object. It is possible to then combine the parameters that a user provides
|
||||
with any templated parameters in your script. You can make either one the primary, while allowing
|
||||
the other to backfill values. In either case, it's a matter of using helper methods that are
|
||||
baked into the command line parameters object.
|
||||
|
||||
To force parameters to specific values while allowing user command line parameters to backfill,
|
||||
use a pattern like this:
|
||||
|
||||
myparams = params.withOverrides(
|
||||
{
|
||||
myparam: 'myvalue'
|
||||
}
|
||||
);
|
||||
|
||||
This will force 'myparam' to the specified values irrespective of what the user has provided for
|
||||
that value, and will add the value if it is not present already.
|
||||
|
||||
To force _unset_ a parameter, use a similar pattern, but with the value `UNSET` instead:
|
||||
|
||||
|
||||
myparams = params.withOverrides(
|
||||
{
|
||||
myparam: 'UNSET'
|
||||
}
|
||||
);
|
||||
|
||||
If this form is used, then any parameter which has already been provided for `myparam` will be
|
||||
removed from the resulting map.
|
||||
|
||||
|
@ -1,43 +0,0 @@
|
||||
---
|
||||
title: Scripting Extensions
|
||||
weight: 30
|
||||
---
|
||||
|
||||
# Scripting Extensions
|
||||
|
||||
Extensions are injected into the scripting environment as plugins. They appear as service
|
||||
objects in the script environment under a name determined by the plugin.
|
||||
|
||||
This section describes some of the scripting extensions available.
|
||||
|
||||
## csvmetrics
|
||||
|
||||
Allows a script to log some or all metrics to CSV files.
|
||||
|
||||
## files
|
||||
|
||||
Allows for convenient read access to local files.
|
||||
|
||||
## globalvars
|
||||
|
||||
Allows access to the shared variable state that can be populated from operations.
|
||||
|
||||
## histologger
|
||||
|
||||
Allows script control of HDR histogram interval logging.
|
||||
|
||||
## histostatslogger
|
||||
|
||||
Allows script control of histogram stats logging in CSV files.
|
||||
|
||||
## http
|
||||
|
||||
Easily use http get and post in scripts.
|
||||
|
||||
## optimos
|
||||
|
||||
Allows use of the BOBYQA optimizer in scripts.
|
||||
|
||||
## scriptingmetrics
|
||||
|
||||
Allows you to create and append metrics within your scenario scripts
|
@ -1,102 +0,0 @@
|
||||
---
|
||||
title: Advanced Testing
|
||||
weight: 13
|
||||
---
|
||||
|
||||
# Advanced Testing
|
||||
|
||||
**NOTE:**
|
||||
Some of the features discussed here are only for advanced testing
|
||||
scenarios. First-time users should become familiar with the basic options
|
||||
first.
|
||||
|
||||
## Hybrid Rate Limiting
|
||||
|
||||
Rate limiting is a complicated endeavor, if you want to do it well. The
|
||||
basic rub is that going fast means you have to be less accurate, and
|
||||
vice-versa. As such, rate limiting is a parasitic drain on any system. The
|
||||
act of rate limiting itself poses a limit to the maximum rate, regardless
|
||||
of the settings you pick. This occurs as a side-effect of forcing your
|
||||
system to interact with some hardware notion of time passing, which takes
|
||||
CPU cycles that could be going to the thing you are limiting.
|
||||
|
||||
This means that in practice, rate limiters are often very featureless.
|
||||
It's daunting enough to need rate limiting, and asking for anything more
|
||||
than that is often wishful thinking. Not so in NoSQLBench.
|
||||
|
||||
The rate limiter in NoSQLBench provides a comparable degree of performance
|
||||
and accuracy to others found in the Java ecosystem, but it *also* has
|
||||
advanced features:
|
||||
|
||||
- It allows a sliding scale between average rate limiting and strict rate
|
||||
limiting, called _bursting_.
|
||||
- It internally accumulates delay time, for C.O. friendly metrics which
|
||||
are separately tracked for each and every operation.
|
||||
- It is resettable and reconfigurable on the fly, including the bursting
|
||||
rate.
|
||||
- It provides its configured values in addition to performance data in
|
||||
metrics, capturing your rate limiter settings as a simple matter of
|
||||
metrics collection.
|
||||
- It comes with advanced scripting helpers which allow you to read data
|
||||
directly from histogram reservoirs, or control the reservoir window
|
||||
programmatically.
|
||||
|
||||
## Flexible Error Handling
|
||||
|
||||
An emergent facility in NoSQLBench is the way that error are handled
|
||||
within an activity. For example, with the CQL activity type, you are able
|
||||
to route error handling for any of the known exception types. You can
|
||||
count errors, you can log them. You can cause errored operations to
|
||||
auto-retry if possible, up to a configurable number of tries.
|
||||
|
||||
This means, that as a user, you get to decide what your test is about. Is
|
||||
it about measuring some nominal but anticipated level of errors due to
|
||||
intentional over-saturation? If so, then count the errors, and look at
|
||||
their histogram data for timing details within the available timeout.
|
||||
|
||||
Are you doing a basic stability test, where you want the test to error out
|
||||
for even the slightest error? You can configure for that if you need.
|
||||
|
||||
## Cycle Logging
|
||||
|
||||
It is possible to record the result status of each and every cycles in a
|
||||
NoSQLBench test run. If the results are mostly homogeneous, the RLE
|
||||
encoding of the results will reduce the output file down to a small
|
||||
fraction of the number of cycles. The errors are mapped to ordinals by
|
||||
error type, and these ordinals are stored into a direct RLE-encoded log
|
||||
file. For most testing where most of the results are simply success, this
|
||||
file will be tiny. You can also convert the cycle log into textual form
|
||||
for other testing and post-processing and vice-versa.
|
||||
|
||||
## Op Sequencing
|
||||
|
||||
The way that operations are planned for execution in NoSQLBench is based
|
||||
on a stable ordering that is configurable. The statement forms are mixed
|
||||
together based on their relative ratios. The three schemes currently
|
||||
supported are round-robin with exhaustion (bucket), duplicate in order
|
||||
(concat), and a way to spread each statement out over the unit interval
|
||||
(interval). These account for most configuration scenarios without users
|
||||
having to micro-manage their statement templates.
|
||||
|
||||
## Sync and Async
|
||||
|
||||
There are two distinct usage modes in NoSQLBench when it comes to
|
||||
operation dispatch and thread management:
|
||||
|
||||
### Sync
|
||||
|
||||
Sync is the default form. In this mode, each thread reads its sequence and
|
||||
dispatches one statement at a time, holding only one operation in flight
|
||||
per thread. This is the mode you often use when you want to emulate an
|
||||
application's request-per-thread model, as it implicitly linearizes the
|
||||
order of operations within the computed sequence of statements.
|
||||
|
||||
### Async
|
||||
|
||||
In Async mode, each thread in an activity is responsible for juggling a
|
||||
number of operations in-flight. This allows a NoSQLBench client to juggle
|
||||
an arbitrarily high number of connections, limited primarily by how much
|
||||
memory you have.
|
||||
|
||||
Internally, the Sync and Async modes have different code paths. It is
|
||||
possible for an activity type to support one or both of these.
|
@ -1,76 +0,0 @@
|
||||
---
|
||||
title: Core Concepts
|
||||
weight: 2
|
||||
---
|
||||
|
||||
# Refined Core Concepts
|
||||
|
||||
The core concepts that NoSQLBench is built on have been scrutinized,
|
||||
replaced, refined, and hardened through several years of use by users of
|
||||
various needs and backgrounds.
|
||||
|
||||
This level of refinement is important when trying to find a way to express
|
||||
common patterns in what is often a highly fragmented practice. Testing is
|
||||
hard. Scale testing is hard. Distributed testing is hard. Combined, the
|
||||
challenge of executing realistic tests is often quite daunting to all but
|
||||
seasons test engineers. To make this worse, existing tools have only
|
||||
skirmished with this problem enough to make dents, but none has tackled
|
||||
full-on the lack of conceptual building blocks.
|
||||
|
||||
This has to change. We need a set of conceptual building blocks that can
|
||||
span across workloads and system types, and machinery to put these
|
||||
concepts to use. This is why it is important to focus on finding a useful
|
||||
and robust set of concepts to use as the foundation for the rest of the
|
||||
toolkit to be built on. Finding these building blocks is often one of the
|
||||
most difficult tasks in systems design. Once you find and validate a
|
||||
useful set of concepts, everything else gets easier
|
||||
|
||||
We feel that the success that we've already had using NoSQLBench has been
|
||||
strongly tied to the core concepts. Some concepts used in NoSQLBench are
|
||||
shared below for illustration, but this is by no means an exhaustive list.
|
||||
|
||||
### The Cycle
|
||||
|
||||
Cycles in NoSQLBench are whole numbers on a number line. Each operation in
|
||||
a NoSQLBench scenario is derived from a single cycle. It's a long value,
|
||||
and a seed. The cycle determines not only which statements is selected for
|
||||
execution, but also what synthetic payload data will be attached to it.
|
||||
|
||||
Cycles are specified as a closed-open `[min,max)` interval, just as slices
|
||||
in some languages. That is, the min value is included in the range, but
|
||||
the max value is not. This means that you can stack slices using common
|
||||
numeric reference points without overlaps or gaps. It means you can have
|
||||
exact awareness of what data is in your dataset, even incrementally.
|
||||
|
||||
You can think of a cycle as a single-valued coordinate system for data
|
||||
that lives adjacent to that number on the number line. In this way,
|
||||
virtual dataset functions are ways of converting coordinates into data.
|
||||
|
||||
### The Activity
|
||||
|
||||
An activity is a multi-threaded flywheel of statements in some sequence
|
||||
and ratio. Activities run over the numbers in a cycle range. Each activity
|
||||
has a driver type which determines the native protocol that it speaks.
|
||||
|
||||
### The Driver Type
|
||||
|
||||
A driver type is a high level driver for a protocol. It is like a
|
||||
statement-aware cartridge that knows how to take a basic statement
|
||||
template and turn it into an operation for an activity to execute within
|
||||
the scenario.
|
||||
|
||||
### The Scenario
|
||||
|
||||
The scenario is a runtime session that holds the activities while they
|
||||
run. A NoSQLBench scenario is responsible for aggregating global runtime
|
||||
settings, metrics reporting channels, log files, and so on. All activities
|
||||
run within a scenario, under the control of the scenario script.
|
||||
|
||||
### The Scenario Script
|
||||
|
||||
Each scenario is governed by a script runs single-threaded, asynchronously
|
||||
from activities, but in control of activities. If needed, the scenario
|
||||
script is automatically created for the user, and the user never knows it
|
||||
is there. If the user has advanced testing requirements, then they may
|
||||
take advantage of the scripting capability at such time. When the script
|
||||
exits, *AND* all activities are complete, then the scenario is complete.
|
@ -1,53 +0,0 @@
|
||||
---
|
||||
title: High Fidelity Metrics
|
||||
weight: 12
|
||||
---
|
||||
|
||||
# High-Fidelity Metrics
|
||||
|
||||
Since NoSQLBench has been built as a serious testing tool for all users,
|
||||
some attention was necessary on the way metric are used.
|
||||
|
||||
## Discrete Reservoirs
|
||||
|
||||
In NoSQLBench, we avoid the use of time-decaying metrics reservoirs.
|
||||
Internally, we use HDR reservoirs with discrete time boundaries. This is
|
||||
so that you can look at the min and max values and know that they apply
|
||||
accurately to the whole sampling window.
|
||||
|
||||
## Metric Naming
|
||||
|
||||
All running activities have a symbolic alias that identifies them for the
|
||||
purposes of automation and metrics. If you have multiple activities
|
||||
running concurrently, they will have different names and will be
|
||||
represented distinctly in the metrics flow.
|
||||
|
||||
## Precision and Units
|
||||
|
||||
By default, the internal HDR histogram reservoirs are kept at 4 digits of
|
||||
precision. All timers are kept at nanosecond resolution.
|
||||
|
||||
## Metrics Reporting
|
||||
|
||||
Metrics can be reported via graphite as well as CSV, logs, HDR logs, and
|
||||
HDR stats summary CSV files.
|
||||
|
||||
## Coordinated Omission
|
||||
|
||||
The metrics naming and semantics in NoSQLBench are setup so that you can
|
||||
have coordinated omission metrics when they are appropriate, but there are
|
||||
no there changes when they are not. This means that the metric names and
|
||||
meanings remain stable in any case.
|
||||
|
||||
Particularly, NoSQLBench avoids the term "latency" altogether as it is
|
||||
often overused and thus prone to confusing people.
|
||||
|
||||
Instead, the terms `service time`, `wait time`, and `response time` are
|
||||
used. These are abbreviated in metrics as `servicetime`, `waittime`, and
|
||||
`responsetime`.
|
||||
|
||||
The `servicetime` metric is the only one which is always present. When a
|
||||
rate limiter is used, then additionally `waittime` and `responsetime` are
|
||||
reported.
|
||||
|
||||
|
@ -1,26 +0,0 @@
|
||||
---
|
||||
title: NoSQLBench Showcase
|
||||
weight: 10
|
||||
---
|
||||
|
||||
# NoSQLBench Showcase
|
||||
|
||||
Since NoSQLBench is new on the scene in its current form, you may be
|
||||
wondering why you would want to use it over any other tool. That is what
|
||||
this section is all about.
|
||||
|
||||
You don't have to read all of this! It is here for those who want to know
|
||||
the answer to the question "So, what's the big deal??" Just remember it is
|
||||
here for later if you want to skip to the next section and get started
|
||||
testing.
|
||||
|
||||
NoSQLBench can do nearly everything that other testing tools can do, and
|
||||
more. It achieves this by focusing on a scalable user experience in
|
||||
combination with a modular internal architecture.
|
||||
|
||||
NoSQLBench is a workload construction and simulation tool for scalable
|
||||
systems testing. That is an entirely different scope of endeavor than most
|
||||
other tools.
|
||||
|
||||
The pages in this section all speak to a selection of advanced
|
||||
capabilities that are unique to NoSQLBench.
|
@ -1,27 +0,0 @@
|
||||
---
|
||||
title: Modular Architecture
|
||||
weight: 11
|
||||
---
|
||||
|
||||
# Modular Architecture
|
||||
|
||||
The internal architecture of NoSQLBench is modular throughout. Everything
|
||||
from the scripting extensions to data generation is enumerated at compile
|
||||
time into a service descriptor, and then discovered at runtime by the SPI
|
||||
mechanism in Java.
|
||||
|
||||
This means that extending and customizing bundles and features is quite
|
||||
manageable.
|
||||
|
||||
It also means that it is relatively easy to provide a suitable API for
|
||||
multi-protocol support. In fact, there are several drivers available in
|
||||
the current NoSQLBench distribution. You can list them out with `nb
|
||||
--list-drivers`, and you can get help on how to use each of them with `nb
|
||||
help <driver name>`.
|
||||
|
||||
This also is a way for us to encourage and empower other contributors to
|
||||
help develop the capabilities and reach of NoSQLBench. By encouraging
|
||||
others to help us build NoSQLBench modules and extensions, we can help
|
||||
more users in the NoSQL community at large.
|
||||
|
||||
|
@ -1,50 +0,0 @@
|
||||
---
|
||||
title: Portable Workloads
|
||||
weight: 2
|
||||
---
|
||||
|
||||
# Portable Workloads
|
||||
|
||||
All of the workloads that you can build with NoSQLBench are self-contained
|
||||
in a workload file. This is a statement-oriented configuration file that
|
||||
contains templates for the operations you want to run in a workload.
|
||||
|
||||
This defines part of an activity - the iterative flywheel part that is run
|
||||
directly within an activity type. This file contains everything needed to
|
||||
run a basic activity -- A set of statements in some ratio. It can be used
|
||||
to start an activity, or as part of several activities within a scenario.
|
||||
|
||||
## Standard YAML Format
|
||||
|
||||
The format for describing statements in NoSQLBench is generic, but in a
|
||||
particular way that is specialized around describing statements for a
|
||||
workload. That means that you can use the same YAML format to describe a
|
||||
workload for kafka as you can for Apache Cassandra or DSE.
|
||||
|
||||
The YAML structure has been tailored to describing statements, their data
|
||||
generation bindings, how they are grouped and selected, and the parameters
|
||||
needed by drivers, like whether they should be prepared statements or not.
|
||||
|
||||
Further, the YAML format allows for defaults and overrides with a very
|
||||
simple mechanism that reduces editing fatigue for frequent users.
|
||||
|
||||
You can also template document-wide macro parameters which are taken from
|
||||
the command line just like any other parameter. This is a way of
|
||||
templating a workload and make it multi-purpose or adjustable on the fly.
|
||||
|
||||
## Experimentation Friendly
|
||||
|
||||
Because the workload YAML format is generic across driver types, it is
|
||||
possible to ask one driver type to interpret the statements that are meant
|
||||
for another. This isn't generally a good idea, but it becomes extremely
|
||||
handy when you want to have a high level driver type like `stdout`
|
||||
interpret the syntax of another driver like `cql`. When you do this, the
|
||||
stdout activity type _plays_ the statements to your console as they would
|
||||
be executed in CQL, data bindings and all.
|
||||
|
||||
This means you can empirically and directly demonstrate and verify access
|
||||
patterns, data skew, and other dataset details before you change back to
|
||||
cql mode and turn up the settings for a higher scale test. It takes away
|
||||
the guess work about what your test is actually doing, and it works for
|
||||
all drivers.
|
||||
|
@ -1,97 +0,0 @@
|
||||
---
|
||||
title: Scripting Environment
|
||||
weight: 3
|
||||
---
|
||||
|
||||
# Scripting Environment
|
||||
|
||||
The ability to write open-ended testing simulations is provided in
|
||||
NoSQLBench by means of a scripted runtime, where each scenario is driven
|
||||
from a control script that can do anything the user wants.
|
||||
|
||||
## Dynamic Parameters
|
||||
|
||||
Some configuration parameters of activities are designed to be assignable
|
||||
while a workload is running. This makes things like threads, rates, and
|
||||
other workload dynamics in real-time. The internal APIs work with the
|
||||
scripting environment to expose these parameters directly to scenario
|
||||
scripts. Drivers that are provided to NoSQLBench can also expose dynamic
|
||||
parameters in the same way so that anything can be scripted dynamically
|
||||
when needed.
|
||||
|
||||
## Scripting Automatons
|
||||
|
||||
When a NoSQLBench scenario is running, it is under the control of a
|
||||
single-threaded script. Each activity that is started by this script is
|
||||
run within its own thread pool, simultaneously and asynchronously.
|
||||
|
||||
The control script has executive control of the activities, as well as
|
||||
full visibility into the metrics that are provided by each activity. The
|
||||
way these two parts of the runtime meet is through the service objects
|
||||
which are installed into the scripting runtime. These service objects
|
||||
provide a named access point for each running activity and its metrics.
|
||||
|
||||
This means that the scenario script can do something simple, like start
|
||||
activities and wait for them to complete, OR, it can do something more
|
||||
sophisticated like dynamically and iteratively scrutinize the metrics and
|
||||
make real-time adjustments to the workload while it runs.
|
||||
|
||||
## Analysis Methods
|
||||
|
||||
Scripting automatons that do feedback-oriented analysis of a target system
|
||||
are called analysis methods in NoSQLBench. We have prototypes a couple of
|
||||
these already, but there is nothing keeping the adventurous from coming up
|
||||
with their own.
|
||||
|
||||
## Command Line Scripting
|
||||
|
||||
The command line has the form of basic test commands and parameters. These
|
||||
command get converted directly into scenario control script in the order
|
||||
they appear. The user can choose whether to stay in high level executive
|
||||
mode, with simple commands like `nb test-scenario ...`, or to drop down
|
||||
directly into script design. They can look at the equivalent script for
|
||||
any command line by running --show-script. If you take the script that is
|
||||
dumped to console and run it, it will do exactly the same thing as if you
|
||||
hadn't even looked at it and just ran basic commands on the command line.
|
||||
|
||||
There are even ways to combine script fragments, full commands, and calls
|
||||
to scripts on the command line. Since each variant is merely a way of
|
||||
constructing scenario script, they all get composited together before the
|
||||
scenario script is run.
|
||||
|
||||
New introductions to NoSQLBench should focus on the command line. Once a
|
||||
user is familiar with this, it is up to them whether to tap into the
|
||||
deeper functionality. If they don't need to know about scenario scripting,
|
||||
then they shouldn't have to learn about it to be effective. This is what
|
||||
we are calling a _scalable user experience_.
|
||||
|
||||
## Compared to DSLs
|
||||
|
||||
Other tools may claim that their DSL makes scenario "simulation" easier.
|
||||
In practice, any DSL is generally dependent on a development tool to lay
|
||||
the language out in front of a user in a fluent way. This means that DSLs
|
||||
are almost always developer-targeted tools, and mostly useless for casual
|
||||
users who don't want to break out an IDE.
|
||||
|
||||
One of the things a DSL proponent may tell you is that it tells you "all
|
||||
the things you can do!". This is de-facto the same thing as it telling you
|
||||
"all the things you can't do" because it's not part of the DSL. This is
|
||||
not a win-win for the user. For DSL-based systems, the user has to use the
|
||||
DSL whether or not it enhances their creative control, while in fact, most
|
||||
DSLs aren't rich enough to do much that is interesting from a simulation
|
||||
perspective.
|
||||
|
||||
In NoSQLBench, we don't force the user to use the programming abstractions
|
||||
except at a very surface level -- the CLI. It is up to the user whether or
|
||||
not to open the secret access panel for the more advance functionality. If
|
||||
they decide to do this, we give them a commodity language (ECMAScript),
|
||||
and we wire it into all the things they were already using. We don't take
|
||||
away their creative freedom by telling them what they can't do. This way,
|
||||
users can pick their level of investment and reward as best fits their
|
||||
individual needs, as it should be.
|
||||
|
||||
## Scripting Extensions
|
||||
|
||||
Also mentioned under the section on modularity, it is relatively easy for
|
||||
a developer to add their own scripting extensions into NoSQLBench as named
|
||||
service objects.
|
@ -1,126 +0,0 @@
|
||||
---
|
||||
title: Virtual Datasets
|
||||
weight: 1
|
||||
---
|
||||
|
||||
# Virtual Datasets
|
||||
|
||||
The _Virtual Dataset_ capabilities within NoSQLBench allow you to generate
|
||||
data on the fly. There are many reasons for using this technique in
|
||||
testing, but it is often a topic that is overlooked or taken for granted.
|
||||
|
||||
This has multiple positive effects on the fidelity of the test:
|
||||
|
||||
1) It is much much more efficient than interacting with storage systems and piping data around. Even loading
|
||||
data from lightweight storage like NVMe will be more time intensive than simply generating it in most cases.
|
||||
2) As such, it leaves significant headroom on the table for introducing other valuable capabilities into
|
||||
the test system, like advanced rate metering, coordinated ommission awareness, etc.
|
||||
3) Changing the data which is generated is as easy as changing the recipe.
|
||||
4) The efficiency of the client is often high enough to support single-client test setups without appreciable
|
||||
loss of capacity.
|
||||
5) Because of modern procedural generation techniques, the variety and shape of data available is significant.
|
||||
Increasing the space of possibilities is a matter of adding new algorithms. There is no data bulk to manage.
|
||||
6) Sophisticated test setups that are highly data dependent are portable. All you need is the test client.
|
||||
The building blocks for data generation are included, and many pre-built testing scenarios are already
|
||||
wired to use them.
|
||||
|
||||
Additional details of this approach are explained below.
|
||||
|
||||
## Industrial Strength
|
||||
|
||||
The algorithms used to generate data are based on advanced techniques in
|
||||
the realm of variate sampling. The authors have gone to great lengths to
|
||||
ensure that data generation is efficient and as much O(1) in processing
|
||||
time as possible.
|
||||
|
||||
For example...
|
||||
|
||||
One technique that is used to achieve this is to initialize and cache data
|
||||
in high resolution look-up tables for distributions which may otherwise
|
||||
perform differently depending on their respective density functions. The
|
||||
existing Apache Commons Math libraries have been adapted into a set of
|
||||
interpolated Inverse Cumulative Distribution sampling functions. This
|
||||
means that you can use them all in the same place as you would a Uniform
|
||||
distribution, and once initialized, they sample with identical overhead.
|
||||
This means that by changing your test definition, you don't accidentally
|
||||
change the behavior of your test client, only the data as intended.
|
||||
|
||||
## A Purpose-Built Tool
|
||||
|
||||
Many other testing systems avoid building a dataset generation component.
|
||||
It's a tough problem to solve, so it's often just avoided. Instead, they
|
||||
use libraries like "faker" or other sources of data which weren't designed
|
||||
for testing at scale. Faker is well named, no pun intended. It was meant
|
||||
as a vignette and wire-framing library, not a source of test data for
|
||||
realistic results. If you are using a testing tool for scale testing and
|
||||
relying on a faker variant, then you will almost certainly get invalid
|
||||
results that do not represent how a system would perform in production.
|
||||
|
||||
The virtual dataset component of NoSQLBench is a library that was designed
|
||||
for high scale and realistic data streams. It uses the limits of the data
|
||||
types in the JVM to simulate high cardinality datasets which approximate
|
||||
production data distributions for realistic and reproducible results.
|
||||
|
||||
## Deterministic
|
||||
|
||||
The data that is generated by the virtual dataset libraries is
|
||||
deterministic. This means that for a given cycle in a test, the operation
|
||||
that is synthesized for that cycle will be the same from one session to
|
||||
the next. This is intentional. If you want to perturb the test data from
|
||||
one session to the next, then you can most easily do it by simply
|
||||
selecting a different set of cycles as your basis.
|
||||
|
||||
This means that if you find something interesting in a test run, you can
|
||||
go back to it just by specifying the cycles in question. It also means
|
||||
that you aren't losing comparative value between tests with additional
|
||||
randomness thrown in. The data you generate will still look random to the
|
||||
human eye, but that doesn't mean that it can't be reproducible.
|
||||
|
||||
## Statistically Shaped
|
||||
|
||||
All this means is that the values you use to tie your dataset together can
|
||||
be specific to any distribution that is appropriate. You can ask for a
|
||||
stream of floating point values 1 trillion values long, in any order. You
|
||||
can use discrete or continuous distributions, with whatever distribution
|
||||
parameters you need.
|
||||
|
||||
## Best of Both Worlds
|
||||
|
||||
Some might worry that fully synthetic testing data is not realistic
|
||||
enough. The devil is in the details on these arguments, but suffice it to
|
||||
say that you can pick the level of real data you use as seed data with
|
||||
NoSQLBench. You don't have to choose between realism and agility. The
|
||||
procedural data generation approach allows you to have all the benefits of
|
||||
testing agility of low-entropy testing tools while retaining nearly all of
|
||||
the benefits of real testing data.
|
||||
|
||||
For example, using the alias sampling method and a published US census
|
||||
(public domain) list of names and surnames tha occurred more than 100x, we
|
||||
can provide extremely accurate samples of names according to the published
|
||||
labels and weights. The alias method allows us to sample accurately in
|
||||
O(1) time from the entire dataset by turning a large number of weights
|
||||
into two uniform samples. You will simply not find a better way to sample
|
||||
realistic (US) names than this. (If you do, please file an issue!)
|
||||
Actually, any data set that you have in CSV form with a weight column can
|
||||
also be used this way, so you're not strictly limited to US census data.
|
||||
|
||||
## Java Idiomatic Extension
|
||||
|
||||
The way that the virtual dataset component works allows Java developers to
|
||||
write any extension to the data generation functions simply in the form of
|
||||
Java 8 or newer Functional interfaces. As long as they include the
|
||||
annotation processor and annotate their classes, they will show up in the
|
||||
runtime and be available to any workload by their class name.
|
||||
|
||||
Additionally, annotation based examples and annotation processing is used
|
||||
to hoist function docs directly into the published docs that go along with
|
||||
any version of NoSQLBench.
|
||||
|
||||
## Binding Recipes
|
||||
|
||||
It is possible to stitch data generation functions together directly in a
|
||||
workload YAML. These are data-flow sketches of functions that can be
|
||||
copied and pasted between workload descriptions to share or remix data
|
||||
streams. This allows for the adventurous to build sophisticated virtual
|
||||
datasets that emulate nuances of real datasets, but in a form that takes
|
||||
up less space on the screen than this paragraph!
|
@ -1,8 +0,0 @@
|
||||
---
|
||||
title: Testing Practices
|
||||
weight: 5
|
||||
---
|
||||
|
||||
# Testing Practices
|
||||
|
||||
This section contains some introductory details on setting up test scenarios for new NoSQLBench users.
|
Binary file not shown.
Before Width: | Height: | Size: 125 KiB |
@ -1,53 +0,0 @@
|
||||
@startuml
|
||||
header Layers of Messaging
|
||||
hide footbox
|
||||
|
||||
box "User View" #white
|
||||
actor Alice as user
|
||||
collections "User\nAgent" as agent
|
||||
collections "Client\nSide\nApp" as capp
|
||||
end box
|
||||
|
||||
box "WebApp View" #white
|
||||
boundary "App Server\nor node, ..." as sapp
|
||||
boundary "Database\nDriver" as driver
|
||||
end box
|
||||
|
||||
box "DB View" #white
|
||||
database DB as db
|
||||
boundary "Storage\nService" as store
|
||||
entity "Storage\nDevice" as device
|
||||
end box
|
||||
|
||||
user -> agent: user clicks link
|
||||
activate agent
|
||||
agent -> capp: event\nhandler
|
||||
activate capp
|
||||
capp -> sapp: [http\nrequest]
|
||||
activate sapp
|
||||
sapp -> driver: read op
|
||||
activate driver
|
||||
driver -> db: read data
|
||||
activate db
|
||||
db -> store: read data
|
||||
activate store
|
||||
store -> device: read data
|
||||
activate device
|
||||
|
||||
store <- device: [data]
|
||||
deactivate device
|
||||
db <- store: [data]
|
||||
deactivate store
|
||||
driver <- db: [data]
|
||||
deactivate db
|
||||
sapp <- driver: [data]
|
||||
deactivate driver
|
||||
capp <- sapp: web content\n[http]
|
||||
deactivate sapp
|
||||
agent <- capp: web content\n[http]
|
||||
deactivate capp
|
||||
user <- agent: browser\nrenders page
|
||||
deactivate agent
|
||||
|
||||
|
||||
@enduml
|
@ -1,430 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
contentScriptType="application/ecmascript"
|
||||
contentStyleType="text/css" height="865px" preserveAspectRatio="none"
|
||||
style="width:930px;height:865px;" version="1.1" viewBox="0 0 930 865"
|
||||
width="930px" zoomAndPan="magnify">
|
||||
<defs>
|
||||
<filter height="300%" id="f6q2vew7ewgyq" width="300%" x="-1" y="-1">
|
||||
<feGaussianBlur result="blurOut" stdDeviation="2.5"/>
|
||||
<feColorMatrix in="blurOut" result="blurOut2" type="matrix"
|
||||
values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 .4 0"/>
|
||||
<feOffset dx="5.0" dy="5.0" in="blurOut2" result="blurOut3"/>
|
||||
<feBlend in="SourceGraphic" in2="blurOut3" mode="normal"/>
|
||||
</filter>
|
||||
</defs>
|
||||
<g>
|
||||
<rect fill="#FFFFFF" height="817.7353"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="346.875"
|
||||
x="5" y="25.775"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
font-weight="bold" lengthAdjust="spacingAndGlyphs"
|
||||
textLength="80" x="138.4375" y="43.1463">User View
|
||||
</text>
|
||||
<rect fill="#FFFFFF" height="817.7353"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="208.75"
|
||||
x="390" y="25.775"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
font-weight="bold" lengthAdjust="spacingAndGlyphs"
|
||||
textLength="110" x="439.375" y="43.1463">WebApp View
|
||||
</text>
|
||||
<rect fill="#FFFFFF" height="817.7353"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="283.75"
|
||||
x="630.625" y="25.775"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
font-weight="bold" lengthAdjust="spacingAndGlyphs"
|
||||
textLength="66.25" x="739.375" y="43.1463">DB View
|
||||
</text>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="625.8854"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="166.25" y="202.6249"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="502.3553"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="302.5" y="264.39"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="378.8252"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="438.75" y="326.155"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="277.4276"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="544.375" y="365.7875"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="198.1626"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="651.875" y="405.42"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="118.8976"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="759.375" y="445.0526"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="39.6325"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="866.875" y="484.6851"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="33.75" x2="33.75" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="172.5" x2="172.5" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="308.125" x2="308.125" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="445" x2="445" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="550" x2="550" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="658.125" x2="658.125" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="765.625" x2="765.625" y1="160.4924" y2="851.0103"/>
|
||||
<line
|
||||
style="stroke: #A80036; stroke-width: 1.25; stroke-dasharray: 5.0,5.0;"
|
||||
x1="873.125" x2="873.125" y1="160.4924" y2="851.0103"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="40" x="10"
|
||||
y="155.3649">Alice
|
||||
</text>
|
||||
<ellipse cx="33.75" cy="65.4075" fill="#FEFECE"
|
||||
filter="url(#f6q2vew7ewgyq)" rx="10" ry="10"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<path
|
||||
d="M33.75,75.4075 L33.75,109.1575 M17.5,85.4075 L50,85.4075 M33.75,109.1575 L17.5,127.9075 M33.75,109.1575 L50,127.9075 "
|
||||
fill="none" filter="url(#f6q2vew7ewgyq)"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="65.1698"
|
||||
style="stroke: #A80036; stroke-width: 1.875;" width="67.5"
|
||||
x="138.75" y="84.0726"/>
|
||||
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="65.1698"
|
||||
style="stroke: #A80036; stroke-width: 1.875;" width="67.5"
|
||||
x="133.75" y="89.0726"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="38.75" x="148.125"
|
||||
y="116.53">User
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="50" x="142.5"
|
||||
y="140.3649">Agent
|
||||
</text>
|
||||
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="89.0047"
|
||||
style="stroke: #A80036; stroke-width: 1.875;" width="66.25"
|
||||
x="275.625" y="60.2377"/>
|
||||
<rect fill="#FEFECE" filter="url(#f6q2vew7ewgyq)" height="89.0047"
|
||||
style="stroke: #A80036; stroke-width: 1.875;" width="66.25"
|
||||
x="270.625" y="65.2377"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="48.75" x="279.375"
|
||||
y="92.6951">Client
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="36.25" x="285.625"
|
||||
y="116.53">Side
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="33.75" x="286.875"
|
||||
y="140.3649">App
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="92.5" x="395"
|
||||
y="131.53">App Server
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="90" x="396.25"
|
||||
y="155.3649">or node, ...
|
||||
</text>
|
||||
<path
|
||||
d="M419.375,77.8226 L419.375,107.8226 M419.375,92.8226 L440.625,92.8226 "
|
||||
fill="none" filter="url(#f6q2vew7ewgyq)"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<ellipse cx="455.625" cy="92.8226" fill="#FEFECE"
|
||||
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="78.75" x="507.5"
|
||||
y="131.53">Database
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="51.25" x="521.25"
|
||||
y="155.3649">Driver
|
||||
</text>
|
||||
<path d="M525,77.8226 L525,107.8226 M525,92.8226 L546.25,92.8226 "
|
||||
fill="none" filter="url(#f6q2vew7ewgyq)"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<ellipse cx="561.25" cy="92.8226" fill="#FEFECE"
|
||||
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="23.75" x="642.5"
|
||||
y="155.3649">DB
|
||||
</text>
|
||||
<path
|
||||
d="M635.625,91.6575 C635.625,79.1575 658.125,79.1575 658.125,79.1575 C658.125,79.1575 680.625,79.1575 680.625,91.6575 L680.625,124.1575 C680.625,136.6575 658.125,136.6575 658.125,136.6575 C658.125,136.6575 635.625,136.6575 635.625,124.1575 L635.625,91.6575 "
|
||||
fill="#FEFECE" filter="url(#f6q2vew7ewgyq)"
|
||||
style="stroke: #000000; stroke-width: 1.875;"/>
|
||||
<path
|
||||
d="M635.625,91.6575 C635.625,104.1575 658.125,104.1575 658.125,104.1575 C658.125,104.1575 680.625,104.1575 680.625,91.6575 "
|
||||
fill="none" style="stroke: #000000; stroke-width: 1.875;"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="65" x="729.375"
|
||||
y="131.53">Storage
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="60" x="731.875"
|
||||
y="155.3649">Service
|
||||
</text>
|
||||
<path d="M740,77.8226 L740,107.8226 M740,92.8226 L761.25,92.8226 "
|
||||
fill="none" filter="url(#f6q2vew7ewgyq)"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<ellipse cx="776.25" cy="92.8226" fill="#FEFECE"
|
||||
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="65" x="836.875"
|
||||
y="131.53">Storage
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="17.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="55" x="841.875"
|
||||
y="155.3649">Device
|
||||
</text>
|
||||
<ellipse cx="873.125" cy="92.8226" fill="#FEFECE"
|
||||
filter="url(#f6q2vew7ewgyq)" rx="15" ry="15"
|
||||
style="stroke: #A80036; stroke-width: 2.5;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 2.5;" x1="858.125"
|
||||
x2="888.125" y1="110.3226" y2="110.3226"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="625.8854"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="166.25" y="202.6249"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="502.3553"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="302.5" y="264.39"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="378.8252"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="438.75" y="326.155"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="277.4276"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="544.375" y="365.7875"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="198.1626"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="651.875" y="405.42"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="118.8976"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="759.375" y="445.0526"/>
|
||||
<rect fill="#FFFFFF" filter="url(#f6q2vew7ewgyq)" height="39.6325"
|
||||
style="stroke: #A80036; stroke-width: 1.25;" width="12.5"
|
||||
x="866.875" y="484.6851"/>
|
||||
<polygon fill="#A80036"
|
||||
points="151.25,197.6249,163.75,202.6249,151.25,207.6249,156.25,202.6249"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="33.75"
|
||||
x2="158.75" y1="202.6249" y2="202.6249"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="105" x="42.5"
|
||||
y="195.3637">user clicks link
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="287.5,259.39,300,264.39,287.5,269.39,292.5,264.39"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="178.75"
|
||||
x2="295" y1="264.39" y2="264.39"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="42.5" x="187.5"
|
||||
y="234.9962">event
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="57.5" x="187.5"
|
||||
y="257.1287">handler
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="423.75,321.155,436.25,326.155,423.75,331.155,428.75,326.155"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="315"
|
||||
x2="431.25" y1="326.155" y2="326.155"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="37.5" x="323.75"
|
||||
y="296.7612">[http
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="62.5" x="323.75"
|
||||
y="318.8938">request]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="529.375,360.7875,541.875,365.7875,529.375,370.7875,534.375,365.7875"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="451.25"
|
||||
x2="536.875" y1="365.7875" y2="365.7875"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="57.5" x="460"
|
||||
y="358.5263">read op
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="636.875,400.42,649.375,405.42,636.875,410.42,641.875,405.42"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="556.875"
|
||||
x2="644.375" y1="405.42" y2="405.42"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="71.25" x="565.625"
|
||||
y="398.1588">read data
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="744.375,440.0526,756.875,445.0526,744.375,450.0526,749.375,445.0526"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="664.375"
|
||||
x2="751.875" y1="445.0526" y2="445.0526"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="71.25" x="673.125"
|
||||
y="437.7913">read data
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="851.875,479.6851,864.375,484.6851,851.875,489.6851,856.875,484.6851"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="771.875"
|
||||
x2="859.375" y1="484.6851" y2="484.6851"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="71.25" x="780.625"
|
||||
y="477.4238">read data
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="785.625,519.3176,773.125,524.3176,785.625,529.3176,780.625,524.3176"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="778.125"
|
||||
x2="871.875" y1="524.3176" y2="524.3176"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="793.125"
|
||||
y="517.0564">[data]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="678.125,558.9501,665.625,563.9501,678.125,568.9501,673.125,563.9501"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="670.625"
|
||||
x2="764.375" y1="563.9501" y2="563.9501"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="685.625"
|
||||
y="556.6889">[data]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="570.625,598.5826,558.125,603.5826,570.625,608.5826,565.625,603.5826"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="563.125"
|
||||
x2="656.875" y1="603.5826" y2="603.5826"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="578.125"
|
||||
y="596.3214">[data]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="465,638.2152,452.5,643.2152,465,648.2152,460,643.2152"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="457.5"
|
||||
x2="549.375" y1="643.2152" y2="643.2152"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="43.75" x="472.5"
|
||||
y="635.9539">[data]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="328.75,699.9802,316.25,704.9802,328.75,709.9802,323.75,704.9802"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="321.25"
|
||||
x2="443.75" y1="704.9802" y2="704.9802"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="93.75" x="336.25"
|
||||
y="675.5864">web content
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="42.5" x="336.25"
|
||||
y="697.719">[http]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="192.5,761.7453,180,766.7453,192.5,771.7453,187.5,766.7453"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="185" x2="307.5"
|
||||
y1="766.7453" y2="766.7453"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="93.75" x="200"
|
||||
y="737.3515">web content
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="42.5" x="200"
|
||||
y="759.484">[http]
|
||||
</text>
|
||||
<polygon fill="#A80036"
|
||||
points="47.5,823.5103,35,828.5103,47.5,833.5103,42.5,828.5103"
|
||||
style="stroke: #A80036; stroke-width: 1.25;"/>
|
||||
<line style="stroke: #A80036; stroke-width: 1.25;" x1="40" x2="171.25"
|
||||
y1="828.5103" y2="828.5103"/>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="61.25" x="55"
|
||||
y="799.1165">browser
|
||||
</text>
|
||||
<text fill="#000000" font-family="sans-serif" font-size="16.25"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="98.75" x="55"
|
||||
y="821.249">renders page
|
||||
</text>
|
||||
<text fill="#888888" font-family="sans-serif" font-size="12.5"
|
||||
lengthAdjust="spacingAndGlyphs" textLength="122.5" x="793.125"
|
||||
y="17.1125">Layers of Messaging
|
||||
</text><!--MD5=[349deb3338ea8e90f753903e3c65ec3b]
|
||||
@startuml
|
||||
header Layers of Messaging
|
||||
hide footbox
|
||||
|
||||
box "User View" #clear
|
||||
actor Alice as user
|
||||
collections "User\nAgent" as agent
|
||||
collections "Client\nSide\nApp" as capp
|
||||
end box
|
||||
|
||||
box "WebApp View" #clear
|
||||
boundary "App Server\nor node, ..." as sapp
|
||||
boundary "Database\nDriver" as driver
|
||||
end box
|
||||
|
||||
box "DB View" #clear
|
||||
database DB as db
|
||||
boundary "Storage\nService" as store
|
||||
entity "Storage\nDevice" as device
|
||||
end box
|
||||
|
||||
user -> agent: user clicks link [[{tooltip}]]
|
||||
activate agent
|
||||
agent -> capp: event\nhandler
|
||||
activate capp
|
||||
capp -> sapp: [http\nrequest]
|
||||
activate sapp
|
||||
sapp -> driver: read op
|
||||
activate driver
|
||||
driver -> db: read data
|
||||
activate db
|
||||
db -> store: read data
|
||||
activate store
|
||||
store -> device: read data
|
||||
activate device
|
||||
|
||||
store <- device: [data]
|
||||
deactivate device
|
||||
db <- store: [data]
|
||||
deactivate store
|
||||
driver <- db: [data]
|
||||
deactivate db
|
||||
sapp <- driver: [data]
|
||||
deactivate driver
|
||||
capp <- sapp: web content\n[http]
|
||||
deactivate sapp
|
||||
agent <- capp: web content\n[http]
|
||||
deactivate capp
|
||||
user <- agent: browser\nrenders page
|
||||
deactivate agent
|
||||
|
||||
|
||||
@enduml
|
||||
|
||||
PlantUML version 1.2020.09(Sun May 10 05:51:06 CDT 2020)
|
||||
(GPL source distribution)
|
||||
Java Runtime: OpenJDK Runtime Environment
|
||||
JVM: OpenJDK 64-Bit Server VM
|
||||
Java Version: 11.0.8+10-b944.31
|
||||
Operating System: Linux
|
||||
Default Encoding: UTF-8
|
||||
Language: en
|
||||
Country: US
|
||||
-->
|
||||
</g>
|
||||
</svg>
|
Before Width: | Height: | Size: 20 KiB |
@ -1,99 +0,0 @@
|
||||
---
|
||||
title: Multiple Clients
|
||||
weight: 9
|
||||
---
|
||||
|
||||
# Multiple Clients (Q&A)
|
||||
|
||||
This page is a basic FAQ regarding multiple clients with NoSQLBench.
|
||||
The details in this section will be absorbed into the docs unless users find this format more useful. (Please give feedback on the Q&A format!)
|
||||
|
||||
-----
|
||||
|
||||
**question**
|
||||
|
||||
What is the right approach to run multiple instances of NoSQLBench (nb) for a given test?
|
||||
|
||||
**answer**
|
||||
|
||||
NoSQLBench can generate a significant amount of traffic. If you are testing with more than 5 nodes on the server side (for comparable hardware) then it may be necessary to add more clients if you are indeed wanting to generate a saturating workload. Otherwise, one client is nearly always enough. Of course, you may want to double check the resource usage on your client and then decide. Generally speaking, if your CPU is over 50% on the client, then it's a good idea to add more clients.
|
||||
|
||||
If you need to add more clients, then you can make sure they are using different
|
||||
data and thus splitting the workload by ensuring that they each operate on a
|
||||
different set of cyles. For example, with a total workload size of 100M cycles,
|
||||
you can split it by setting `cycles=0..50M` on the first client, and then `cycles=50M..100M` on the second. This approach can be used to split cycles among any number of clients.
|
||||
|
||||
-----
|
||||
|
||||
**question**
|
||||
|
||||
I observed that nb is creating exactly the same transactions each time it is run. I do understand that this is a feature and not a bug and supports reproducibly. I thought I could run nb from multiple drivers, but then I would need a more randomized behavior in nb.
|
||||
|
||||
**answer**
|
||||
|
||||
Yes, The cycle range used in the test actually changes the data used in the data bindings. If you are generating pseudo-random data already, you can simply use a different cycle range. For example cycles=100M (shorthand for cycles=0..100M) is one set of operations, and cycles=100M..200M is a different set of operations (also 100M total, but different values are used within the operations)
|
||||
|
||||
This is a common enough request that we are going to add a way to hash the
|
||||
input different for different tests when desired. This will not be applied
|
||||
by default, but when needed it will become the easiest way to handle this type of scenario.
|
||||
|
||||
-----
|
||||
|
||||
**question**
|
||||
|
||||
Is there a more verbose documentation on the syntax of the yaml files that describe the benchmark. A list of examples would be welcome, too.
|
||||
|
||||
**answer**
|
||||
|
||||
The section of the docs called "Building Workloads" is actually a detailed
|
||||
explanation of the yaml format. The YAML format and the concepts that
|
||||
one needs to understand it are woven together here with detailed examples
|
||||
from start to end.
|
||||
|
||||
-----
|
||||
|
||||
**question**
|
||||
|
||||
I installed ops center and used it to visualize metrics such as Read Requests, Read Request Latency, OS: CPU and others. This works.
|
||||
|
||||
I also tried --docker-metrics on the nb command line. I was able to open Grafana on port 3000 and found some metrics, but not Read Request, Write Request etc. It seems to be that ops center has more information.
|
||||
|
||||
**answer**
|
||||
|
||||
The metrics recorded by NoSQLBench are client-side. OpsCenter looks at server-side metrics. You can have both in one place if you use dsemetricscollector and combine the configs, but it is not as easy as just using --docker-metrics. We will add better docs for this.
|
||||
|
||||
When lookin at metrics, it is critical that you know what the vantage point is for each one, and what it means for the test results. A new section has been added to this section of the docs called "Vantage Points" as a primer for this.
|
||||
|
||||
There are generally 4 vantage points used of some significance in C* testing:
|
||||
|
||||
1. Application (same as nb in this case)
|
||||
2. Driver/Data Layer (generally the same as nb in this case, but we do offer driver metrics separately if needed)
|
||||
3. Coordinator (sometimes called Proxy)
|
||||
4. Replica
|
||||
|
||||
The latter 2 are the only ones you will see in OpsCenter. It usually makes sense to look at the path and do some deduction about the differences, say the difference in read latency from the client, proxy, or storage levels.
|
||||
|
||||
|
||||
-----
|
||||
|
||||
**question**
|
||||
|
||||
I also checked the metrics at end of the log file that are created by nb and didn’t find a breakdown into read/write metrics either. I used Cassandra-stress in the past and remember that it provided such information in their log file.
|
||||
|
||||
**answer**
|
||||
|
||||
If you want to instrument your statements in nosqblench so that metrics are provided separately for each statement, you can do that by throwing the `instrument: true` option on your statements in the yaml. This works for the CQL driver and we will look at ways to support it in other drivers too.
|
||||
|
||||
-----
|
||||
|
||||
**question**
|
||||
|
||||
I’m looking for something that can be scripted so that I can run multiple variations and extract results automatically.
|
||||
|
||||
**answer**
|
||||
|
||||
NoSQLBench can definitely do that. It is what it was built for. For multiple variations either use the cycle range setting as described above, or add a permutation function to the head of your binding recipes.
|
||||
|
||||
Getting down to the details of what you mean by "variation" might be a quick conversation, but it could also be in-depth depending on your requirements. For simple cases, just throwing a Hash() into the front will cause the data to be randomized. You can also consider the Shuffle(...) functions with different bank numbers.
|
||||
|
||||
The feature mentioned above for pre-hashing will be the easiest way to do this once it is implemented.
|
@ -1,60 +0,0 @@
|
||||
---
|
||||
title: Random Data
|
||||
weight: 5
|
||||
---
|
||||
|
||||
# Random Data
|
||||
|
||||
This section touches on topics of using randomized data within NoSQLBench tests.
|
||||
|
||||
## Benefits
|
||||
|
||||
The benefits of using procedural generation for the purposes of load testing is taken as granted in
|
||||
this section. For a more thorough discussion on the assumed merits, please see _Showcase_, _Virtual Datasets_
|
||||
section.
|
||||
|
||||
## Basic Theory
|
||||
|
||||
In NoSQLBench, the data used for each operation is generated on the fly. However, the data is also deterministic
|
||||
by default. That means, for a given activity, any numbered cycle will produce the same operation from test to test,
|
||||
so long as the parameters are the same.
|
||||
|
||||
NoSQLBench runs each activity over a specific range of cycles. Each cycle is based on a specific number
|
||||
from the cycle range. This cycle number is used as the seed value for that cycle. It determines not
|
||||
only which operation is selected, but also what data is generated and bound to that operation for execution.
|
||||
The data generation is initialized at the start, and optimized for rapid access during steady state operation.
|
||||
|
||||
This is by-design. However, there are ways of selecting how much variation you have from one test scenario to another.
|
||||
|
||||
## Managing Variation
|
||||
|
||||
Sometimes you will want to run the same test with the same operations, access patterns, and data.
|
||||
For certain types of testing and comparisons, this is the only way to shed a light on a specific
|
||||
issue, or variation in performance. The ability to run the same test between different target systems
|
||||
is extremely valuable.
|
||||
|
||||
### Selecting Cycles
|
||||
|
||||
You can cause an activity to run a different set of operations simply by changing the cycle range used
|
||||
in the test.
|
||||
|
||||
For an activity that is configured with `cycles=100M`, 100 million independent cycles will be used.
|
||||
These cycles will be automatically apportioned to the client threads as needed until they are all
|
||||
used up.
|
||||
|
||||
If you want to run 100 million different cycles, all you have to do is specify a different set
|
||||
of seeds. This is as simple as specifying `cycles=100M..200M`, as the first example above is only short-hand
|
||||
for `cycles=0..100M`.
|
||||
|
||||
### Selecting Bindings
|
||||
|
||||
The built-in workloads come with bindings which support the "rampup" and "main" phases appropriately. This means that the cycles for rampup will use a binding that lays data into a dataset incrementally, as you would build a log cabin. Each cycle adds to the data. The bindings are chosen for this effect so that the rampup phase is incremental with the cycle value.
|
||||
|
||||
The main phase is selected differently. In the main phase, you don't want to address over the data in order. To emulate a real workload, you need to select the data pseudo-randomly so that storage devices don't get to cheat with read-ahead (more than they would realistically) and so on. That means that the main phase bindings are also specifically chosen for the "random" access patterns that you might expect in some workloads.
|
||||
|
||||
The distinction between these two types of bindings should tell you something about the binding capabilities. You can really do what ever you want as long as you can stitch the right functions together to get there. Although the data produced by some of the functions (like `Hash()` for example) look random, it is not. It is, however, effectively random enough for most distributed systems performance testing.
|
||||
|
||||
If you need to add randomization to fields, it doesn't hurt to add an additional `Hash()` to the front. Just be advised that the same constructions from one binding recipe to the next will yield the same outputs, so season to taste.
|
||||
|
||||
|
||||
|
@ -1,65 +0,0 @@
|
||||
---
|
||||
title: Vantage Points
|
||||
weight: 8
|
||||
---
|
||||
|
||||
# Vantage Points
|
||||
|
||||
A successful test of a system results in a set of measurements. However, there are many ways to
|
||||
take measurements and they all serve to answer different questions. Where you take your measurements
|
||||
also determines what you measure.
|
||||
|
||||
Consider the following diagram:
|
||||
|
||||

|
||||
|
||||
This diagram illustrates a prototypical set of services and their inner service dependencies. This view only shows synchronous calls to keep the diagram simple.
|
||||
|
||||
## User Impact
|
||||
|
||||
The outer-most layer of the onion is what the user interacts with. In most modern services, this is the browser. As well, most modern applications, there is an active client-side component which acts as part of the composed application, with local page state being pseudo-persistent except for cache controls and full reloads. This highlights how far designers will go to make interactions "local" for users to avoid the cost of long request loops.
|
||||
|
||||
As such, the browser is subject to any response times included within the inner service layers. Still, the browser represents the outer-most and thus most authentic vantage point from which to measure user impact of service time. This is called the _User View_ in the above diagram.
|
||||
|
||||
## Looking Inward
|
||||
|
||||
Beyond the outer layer, you'll usually find more layers. In terms of what these layers are called: "endpoint", "service", "web app", "app server", there is a ton of subjectivity. although the names change, the underlying mechanisms are generally the same. The naming conventions come more from local norms within a tech space or community of builders. One person's "App Server" is another's "RESTful endpoint". What is important to notice is how the layers form a cascade of dependencies down to some physical device which is reponsible for storing data. This pattern will be durable in nearly every system you look at.
|
||||
|
||||
Between each layer is a type of messaging component. These are sometimes called "media", or "transport" in RFCs. Each connection between the layers carries with it a set of fundamental trade-offs that, if understood, can establish reasonably durable minimum and maximum response times in the realm of possibilities.
|
||||
|
||||
For example, a storage device that is using NVMe as the host bus will, all else being equal, perform better than one service by a SATA channel. The specification for these "transports" say as much, but more importantly, real-world results back this up.
|
||||
|
||||
Understanding of the connections between each layer of abstraction is essential. At least,
|
||||
knowing the limits of technology at each layer, theoretical and practical is useful. Not to fear, a good testing setup can help you find these limits in specific terms.
|
||||
|
||||
## Service Time Math
|
||||
|
||||
There will be a limit to how much data you can collect, and from which vantage points you
|
||||
can get it from. That means that sometimes you need to do some sleuthing with the data you
|
||||
have in order to tease out important details.
|
||||
|
||||
For example, say you have a good set of metrics for the app server in the diagram above. You know that the p95 service time is 121ms. Suppose you also know the p95 service time for _the same calls_ at the DB layer. That is 32ms. If you don't know _anything else_ about the calls, you can at least infer that the difference between these two layers is around 89ms (P95). That means that, for 5 out of every 100 operations, somewhere between your web app, your db driver, and your db service, you are spending at least 89ms doing *something*. This could be in the active processing, or in the passive transport of data -- the ethernet layer or otherwise. At least you can set book-end expectations between these layers.
|
||||
|
||||
## Applied Principles
|
||||
|
||||
**outside-in**
|
||||
|
||||
Generally speaking, to understand how service times impact users, you generally want to measure from outer vantage points. To understand why the user sees these service times, you look at the inner layers.
|
||||
|
||||
**detailed enough**
|
||||
|
||||
When constructing layered views of your metrics, it is useful to add the elements you need and
|
||||
can instrument for metrics first. The above diagram goes to a degree of detail that may be too much to be useful in a practical analysis scenario. You could add place holders to capture elements of the transport and inter-connections, additional internal subsystems of layers, etc. This is only useful if it helps tell an important story about the details of your system, i.e. details that you can use to take action for an improvement or to help you focus effort in the right place.
|
||||
|
||||
**clear labeling**
|
||||
|
||||
When you are capturing metrics, make sure that the nesting and vantage points are very clear to observers. A little detail in naming goes a long way to keeping operators honest with each other about what is actually happening in the system.
|
||||
|
||||
**contextual views**
|
||||
|
||||
As you learn to build operational views of systems, be sure to tailor them to the user-impacting services that your business is measured by. This starts on the outside of your system, and cuts through critical paths, focusing on those areas which have the highest variability in responsiveness or availability. It includes the details that need the most attention. You can't start from a rich dashboard of data that includes the kitchen sink to arrive at this. It is an art form that you must constantly practice in order to keep operational relevant. Yes, there will be long-standing themes and objectives, but the more ephemeral factors need to be treated as such.
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,63 +0,0 @@
|
||||
---
|
||||
title: Built-In Workloads
|
||||
weight: 40
|
||||
---
|
||||
|
||||
# Built-In Workloads
|
||||
|
||||
There are a few built-in workloads which you may want to run. These can be run from a command without having to
|
||||
configure anything, or they can be tailored with their built-in parameters.
|
||||
|
||||
## Finding Workloads
|
||||
|
||||
To find the build-in scenarios, ask NoSQLBench like this:
|
||||
|
||||
nb --list-workloads
|
||||
|
||||
This specifically lists the workloads which provide named scenarios. Only named scenarios are included. Workloads are
|
||||
contained in yaml files. If a yaml file is in the standard path and contains a root `scenarios` element, then it is
|
||||
included in the listing above.
|
||||
|
||||
Each of these scenarios has a set of parameters which can be changed on the command line.
|
||||
|
||||
## Running Workloads
|
||||
|
||||
You can run them directly, by name with `nb <workload> [<scenario>] [<params>...]`. If not provided, scenario is assumed
|
||||
to be `default`.
|
||||
|
||||
For example, the `cql-iot` workload is listed with the above command, and can be executed like this:
|
||||
|
||||
# put your normal extra params in ... below, like hosts, for example
|
||||
nb cql-iot default ...
|
||||
|
||||
# OR, with scenario name default
|
||||
nb cql-iot ...
|
||||
|
||||
You can add any parameters to the end, and these parameters will be passed automatically to each stage of the scenario
|
||||
as needed. Within the scenario, designers have the ability to lock parameters so that overrides are used appropriately.
|
||||
|
||||
## Conventions
|
||||
|
||||
The built-in workloads follow a set of conventions so that they can be used interchangeably. This is more for users who
|
||||
are using the stages of these workloads directly, or for users who are designing new scenarios to be included in the
|
||||
built-ins.
|
||||
|
||||
### Phases
|
||||
|
||||
Each built-in contains the following tags that can be used to break the workload up into uniform phases:
|
||||
|
||||
- schema - selected with `tags=block:"schema.*"`
|
||||
- rampup - selected with `tags=block:rampup`
|
||||
- main - selected with `tags=block:main`
|
||||
|
||||
### Parameters
|
||||
|
||||
Each built-in has a set of adjustable parameters which is documented below per workload. For example, the cql-iot
|
||||
workload has a `sources` parameter which determines the number of unique devices in the dataset.
|
||||
|
||||
## Adding Workloads
|
||||
|
||||
If you want to add your own workload to NoSQLBench, or request a specific type of workload, please
|
||||
[Request a workload](https://github.com/nosqlbench/nosqlbench/issues) or
|
||||
[Submit a pull request](https://github.com/nosqlbench/nosqlbench/pulls).
|
||||
|
@ -43,12 +43,6 @@
|
||||
<version>${revision}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.nosqlbench</groupId>
|
||||
<artifactId>engine-docs</artifactId>
|
||||
<version>${revision}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.nosqlbench</groupId>
|
||||
<artifactId>engine-core</artifactId>
|
||||
|
2
pom.xml
2
pom.xml
@ -45,7 +45,6 @@
|
||||
<module.engine-extensions>engine-extensions</module.engine-extensions>
|
||||
<module.engine-docker>engine-docker</module.engine-docker>
|
||||
<module.engine-cli>engine-cli</module.engine-cli>
|
||||
<module.engine-docs>engine-docs</module.engine-docs>
|
||||
|
||||
<module.nb5>nb5</module.nb5>
|
||||
<module.nbr>nbr</module.nbr>
|
||||
@ -96,7 +95,6 @@
|
||||
<module>engine-core</module>
|
||||
<module>engine-extensions</module>
|
||||
<module>engine-docker</module>
|
||||
<module>engine-docs</module>
|
||||
<module>engine-clients</module>
|
||||
<module>engine-cli</module>
|
||||
<module>adapters-api</module>
|
||||
|
Loading…
Reference in New Issue
Block a user