diff --git a/engine-docs/docs/slack_invite.md b/engine-docs/docs/slack_invite.md deleted file mode 100644 index ef50cb1db..000000000 --- a/engine-docs/docs/slack_invite.md +++ /dev/null @@ -1 +0,0 @@ -[Slack Invite](https://docs.google.com/forms/d/e/1FAIpQLSdUOJ8iAPqyxsLfh1nBBsKShI53RAeuzYW4bKExmRMWjj4ufQ/viewform) diff --git a/engine-docs/pom.xml b/engine-docs/pom.xml deleted file mode 100644 index 88ae32cc2..000000000 --- a/engine-docs/pom.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - 4.0.0 - - - mvn-defaults - io.nosqlbench - ${revision} - ../mvn-defaults - - - engine-docs - jar - ${project.artifactId} - CLI for nosqlbench. - - - nosqlbench Docs - - - - - - - - - - - - io.nosqlbench - docsys - ${revision} - - - - - - - - src/main/resources - true - - - - - - - assemble - - true - - - - - org.apache.maven.plugins - maven-assembly-plugin - 3.3.0 - - jar-with-dependencies - - - - - - - - - diff --git a/engine-docs/src/main/java/io/nosqlbench/engine/docs/NosqlBenchRawMarkdownSource.java b/engine-docs/src/main/java/io/nosqlbench/engine/docs/NosqlBenchRawMarkdownSource.java deleted file mode 100644 index 6007a0734..000000000 --- a/engine-docs/src/main/java/io/nosqlbench/engine/docs/NosqlBenchRawMarkdownSource.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2022 nosqlbench - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.nosqlbench.engine.docs; - -import io.nosqlbench.nb.annotations.Service; -import io.nosqlbench.api.markdown.providers.DocsRootDirectory; -import io.nosqlbench.api.markdown.providers.RawMarkdownSource; - -@Service(value = RawMarkdownSource.class, selector = "docs-for-eb") -public class NosqlBenchRawMarkdownSource extends DocsRootDirectory { - - @Override - protected String getRootPathName() { - return "docs-for-eb"; - } - -} diff --git a/engine-docs/src/main/java/io/nosqlbench/engine/docs/NosqlbenchMarkdownManifest.java b/engine-docs/src/main/java/io/nosqlbench/engine/docs/NosqlbenchMarkdownManifest.java deleted file mode 100644 index 7b7613afa..000000000 --- a/engine-docs/src/main/java/io/nosqlbench/engine/docs/NosqlbenchMarkdownManifest.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022 nosqlbench - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.nosqlbench.engine.docs; - -import io.nosqlbench.api.docsapi.Docs; -import io.nosqlbench.api.docsapi.DocsBinder; -import io.nosqlbench.docsys.api.DocsysDynamicManifest; -import io.nosqlbench.nb.annotations.Service; - -@Service(value = DocsysDynamicManifest.class, selector = "docs-for-eb") -public class NosqlbenchMarkdownManifest implements DocsysDynamicManifest { - @Override - public DocsBinder getDocs() { - return new Docs().namespace("docs-for-eb") - .addFirstFoundPath( - "engine-docs/src/main/resources/docs-for-nb/", - "docs-for-nb/" - ).setEnabledByDefault(true) - .asDocsBinder(); - } -} diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/00_yaml_org.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/00_yaml_org.md deleted file mode 100644 index 2acdef628..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/00_yaml_org.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: 00 YAML Organization -weight: 00 ---- - -# YAML Organization - -It is best to keep every workload self-contained within a single YAML -file, including schema, data rampup, and the main phase of testing. The -phases of testing are controlled by tags as described below. - -**NOTE:** -The phase names described below have been adopted as a convention within -the built-in workloads. It is strongly advised that new workload YAMLs use -the same tagging scheme so that workload are more plugable across YAMLs. - -## Schema phase - -The schema phase is simply a phase of your test which creates the -necessary schema on your target system. For CQL, this generally consists -of a keyspace and one ore more table statements. There is no special -schema layer in nosqlbench. All statements executed are simply statements. -This provides the greatest flexibility in testing since every activity -type is allowed to control its DDL and DML using the same machinery. - -The schema phase is normally executed with defaults for most parameters. -This means that statements will execute in the order specified in the -YAML, in serialized form, exactly once. This is a welcome side-effect of -how the initial parameters like _cycles_ is set from the statements which -are activated by tagging. - -You can mark statements as schema phase statements by adding this set of -tags to the statements, either directly, or by block: - - tags: - block: schema - -## Rampup phase - -When you run a performance test, it is very important to be aware of how -much data is present. Higher density tests are more realistic for systems -which accumulate data over time, or which have a large working set of -data. The amount of data on the system you are testing should recreate a -realistic amount of data that you would run in production, ideally. In -general, there is a triangular trade-off between service time, op rate, -and data density. - -It is the purpose of the _rampup_ phase to create the backdrop data on a -target system that makes a test meaningful for some level of data density. -Data density is normally discussed as average per node, but it is also -important to consider distribution of data as it varies from the least -dense to the most dense nodes. - -Because it is useful to be able to add data to a target cluster in an -incremental way, the bindings which are used with a _rampup_ phase may -actually be different from the ones used for a _main_ phase. In most -cases, you want the rampup phase to create data in a way that -incrementally adds to the population of data in the cluster. This allows -you to add some data to a cluster with `cycles=0..1M` and then decide -whether to continue adding data using the next contiguous range of cycles, -with `cycles=1M..2M` and so on. - -You can mark statements as rampup phase statements by adding this set of -tags to the statements, either directly, or by block: - - tags: - block: rampup - -## Main phase - -The main phase of a nosqlbench scenario is the one during which you really -care about the metric. This is the actual test that everything else has -prepared your system for. - -You can mark statement as schema phase statements by adding this set of -tags to the statements, either directly, or by block: - - tags: - block: main diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/01_statement_templates.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/01_statement_templates.md deleted file mode 100644 index 7c823a1d7..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/01_statement_templates.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: 01 Statement Templates -weight: 01 ---- - -# Statement Templates - -A valid config file for an activity consists of statement templates, parameters for them, bindings to generate the data -to use with them, and tags for organizing them. - -In essence, the config format is *all about configuring statements*. Every other element in the config format is in some -way modifying or otherwise helping create statements to be used in an activity. - -Statement templates are the single most important part of a YAML config. - -```yaml -# a single statement -statements: - - a single statement body -``` - -This is a valid activity YAML file in and of itself. It has a single statement template. - -It is up to the individual activity types like _cql_, or _stdout_ to interpret the statement template in some way. The -example above is valid as a statement in the stdout activity, but it does not produce a valid CQL statement with the CQL -activity type. The contents of the statement template are free form text. If the statement template is valid CQL, then -the CQL activity type can use it without throwing an error. Each activity type determines what a statement means, and -how it will be used. - -You can provide multiple statements, and you can use the YAML pipe to put them on multiple lines, indented a little -further in: - -```yaml -statements: - - | - This is a statement, and the file format doesn't - know how statements will be used! - - | - submit job {alpha} on queue {beta} with options {gamma}; -``` - -Statements can be named: - -```yaml -statements: - - s1: | - This is a statement, and the file format doesn't - know how statements will be used! - - s2: | - submit job {alpha} on queue {beta} with options {gamma}; -``` - -Actually, every statement in a YAML has a name. If you don't provide one, then a name is auto-generated for the -statement based on its position in the YAML file. - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/02_data_bindings.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/02_data_bindings.md deleted file mode 100644 index bfd125ad4..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/02_data_bindings.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: 02 Data Bindings -weight: 02 ---- - -# Data Bindings - -Procedural data generation is built-in to the nosqlbench runtime by way of the -[Virtual DataSet](http://virtdata.io/) library. This allows us to create named data generation recipes. These named -recipes for generated data are called bindings. Procedural generation for test data has -[many benefits](http://docs.virtdata.io/why_virtdata/why_virtdata/) over shipping bulk test data around, including speed -and deterministic behavior. With the VirtData approach, most of the hard work is already done for us. We just have to -pull in the recipes we want. - -You can add a bindings section like this: - -```yaml -bindings: - alpha: Identity() - beta: NumberNameToString() - gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;') - delta: WeightedStrings('one:1;six:6;three:3;') -``` - -This is a YAML map which provides names and function specifiers. The specifier named _alpha_ provides a function that -takes an input value and returns the same value. Together, the name and value constitute a binding named alpha. All of -the four bindings together are called a bindings set. - -The above bindings block is also a valid activity YAML, at least for the _stdout_ activity type. The _stdout_ activity -can construct a statement template from the provided bindings if needed, so this is valid: - -```text -[test]$ cat > stdout-test.yaml - bindings: - alpha: Identity() - beta: NumberNameToString() - gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;') - delta: WeightedStrings('one:1;six:6;three:3;') -# EOF (control-D in your terminal) - -[test]$ ./nb run driver=stdout workload=stdout-test cycles=10 -0,zero,00A_pro,six -1,one,00B_pro,six -2,two,00C_pro,three -3,three,00D_pro,three -4,four,00E_pro,six -5,five,00F_pro,six -6,six,00G_pro,six -7,seven,00H_pro,six -8,eight,00I_pro,six -9,nine,00J_pro,six -``` - -Above, you can see that the stdout activity type is idea for experimenting with data generation recipes. It uses the -default `format=csv` parameter above, but it also supports formats like json, inlinejson, readout, and assignments. - -This is all you need to provide a formulaic recipe for converting an ordinal value to a set of field values. Each time -nosqlbench needs to create a set of values as parameters to a statement, the functions are called with an input, known -as the cycle. The functions produce a set of named values that, when combined with a statement template, can yield an -individual statement for a database operation. In this way, each cycle represents a specific operation. Since the -functions above are pure functions, the cycle number of an operation will always produce the same operation, thus making -all nosqlbench workloads deterministic. - -In the example above, you can see the cycle numbers down the left. - -If you combine the statement section and the bindings sections above into one activity yaml, you get a slightly -different result, as the bindings apply to the statements that are provided, rather than creating a default statement -for the bindings. See the example below: - -```text -[test]$ cat > stdout-test.yaml -statements: - - | - This is a statement, and the file format doesn't - know how statements will be used! - - | - submit job {alpha} on queue {beta} with options {gamma}; -bindings: - alpha: Identity() - beta: NumberNameToString() - gamma: Combinations('0-9A-F;0-9;A-Z;_;p;r;o;') - delta: WeightedStrings('one:1;six:6;three:3;') -# EOF (control-D in your terminal) - -[test]$ ./nb run driver=stdout workload=stdout-test cycles=10 -This is a statement, and the file format doesn't -know how statements will be used! -submit job 1 on queue one with options 00B_pro; -This is a statement, and the file format doesn't -know how statements will be used! -submit job 3 on queue three with options 00D_pro; -This is a statement, and the file format doesn't -know how statements will be used! -submit job 5 on queue five with options 00F_pro; -This is a statement, and the file format doesn't -know how statements will be used! -submit job 7 on queue seven with options 00H_pro; -This is a statement, and the file format doesn't -know how statements will be used! -submit job 9 on queue nine with options 00J_pro; -``` - -There are a few things to notice here. First, the statements that are executed are automatically alternated between. If -you had 10 different statements listed, they would all get their turn with 10 cycles. Since there were two, each was run -5 times. - -Also, the statement that had named anchors acted as a template, whereas the other one was evaluated just as it was. In -fact, they were both treated as templates, but one of them had no anchors. - -On more minor but important detail is that the fourth binding *delta* was not referenced directly in the statements. -Since the statements did not pair up an anchor with this binding name, it was not used. No values were generated for it. - -This is how activities are expected to work when they are implemented correctly. This means that the bindings themselves -are templates for data generation, only to be used when necessary. This means that the bindings that are defined around -a statement are more like a menu for the statement. If the statement uses those bindings with `{named}` anchors, then -the recipes will be used to construct data when that statement is selected for a specific cycle. The cycle number both -selects the statement (via the op sequence) and also provides the input value at the left side of the binding functions. - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/03_stmt_params.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/03_stmt_params.md deleted file mode 100644 index 2e580842d..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/03_stmt_params.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: 03 Statement Params -weight: 03 - ---- - -# Statement Parameters - -Statements within a YAML can be accessorized with parameters. These are known as _statement params_ and are different -than the parameters that you use at the activity level. They apply specifically to a statement template, and are -interpreted by an activity type when the statement template is used to construct a native statement form. - -For example, the statement parameter `ratio` is used when an activity is initialized to construct the op sequence. In -the _cql_ activity type, the statement parameter `prepared` is a boolean that can be used to designated when a CQL -statement should be prepared or not. - -As with the bindings, a params section can be added at the same level, setting additional parameters to be used with -statements. Again, this is an example of modifying or otherwise creating a specific type of statement, but always in a -way specific to the activity type. Params can be thought of as statement properties. As such, params don't really do -much on their own, although they have the same basic map syntax as bindings: - -```yaml -params: - ratio: 1 -``` - -As with statements, it is up to each activity type to interpret params in a useful way. - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/04_stmt_tags.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/04_stmt_tags.md deleted file mode 100644 index bea327599..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/04_stmt_tags.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -title: 04 Statement Tags -weight: 04 ---- - -# Statement Tags - -Tags are used to mark and filter groups of statements for controlling which ones get used in a given scenario. Tags are -generally free-form, but there is a set of conventions that can make your testing easier. - -An example: - -```yaml -tags: - name: foxtrot - unit: bravo -``` - -### Tag Filtering - -The tag filters provide a flexible set of conventions for filtering tagged statements. Tag filters are usually provided -as an activity parameter when an activity is launched. The rules for tag filtering are (updated in version 3.12): - -0. If no conjugate is specified, `all(...)` is assumed. This is in keeping with the previous default. If you do specify - a conjugate wrapper around the tag filter, it must be in the above form. `all(...)`, `any(...)`, and `none(...)` are - allowed. -1. If no tag filter is specified, then the statement matches. -2. A tag name predicate like `tags=name` asserts the presence of a specific tag name, regardless of its value. -3. A tag value predicate like `tags=name:foxtrot` asserts the presence of a specific tag name and a specific value for it. -4. A tag pattern predicate like `tags=name:'fox.*'` asserts the presence of a specific tag name and a value that matches - the provided regular expression. -5. Multiple tag predicates may be specified as in `tags=name:'fox.*',unit:bravo` -6. - 0. If the `all` conjugate form is used (the default), then if any predicate fails to match a tagged element, then the - whole tag filtering expression fails to match. - 1. If the `any` conjugate form is used, then if all predicates fail to match a tagged element, then the whole tag filtering - expression fails to match. - 2. If the `none` conjugate form is used, then if any predicate _matches_, a tagged element, then the whole expression - matches. - -A demonstration: - -```text -[test]$ cat > stdout-test.yaml -tags: - name: foxtrot - unit: bravo -statements: - - "I'm alive!\n" -# EOF (control-D in your terminal) - -# no tag filter matches any -[test]$ ./nb run driver=stdout workload=stdout-test -I'm alive! - -# tag name assertion matches -[test]$ ./nb run driver=stdout workload=stdout-test tags=name -I'm alive! - -# tag name assertion does not match -[test]$ ./nb run driver=stdout workload=stdout-test tags=name2 -02:25:28.158 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured. - -# tag value assertion does not match -[test]$ ./nb run driver=stdout workload=stdout-test tags=name:bravo -02:25:42.584 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured. - -# tag value assertion matches -[test]$ ./nb run driver=stdout workload=stdout-test tags=name:foxtrot -I'm alive! - -# tag pattern assertion matches -[test]$ ./nb run driver=stdout workload=stdout-test tags=name:'fox.*' -I'm alive! - -# tag pattern assertion does not match -[test]$ ./nb run driver=stdout workload=stdout-test tags=name:'tango.*' -02:26:05.149 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured. - -# compound tag predicate matches every assertion -[test]$ ./nb run driver=stdout workload=stdout-test tags='name=fox.*',unit=bravo -I'm alive! - -# compound tag predicate does not fully match -[test]$ ./nb run driver=stdout workload=stdout-test tags='name=fox.*',unit=delta -11:02:53.490 [scenarios:001] ERROR i.e.activities.stdout.StdoutActivity - Unable to create a stdout statement if you have no active statements or bindings configured. - -# any(...) form will work as long as one of the tags match -[test]$ ./nb run driver=stdout workload=stdout-test tags='any(name=fox.*,thisone:wontmatch)',unit=bravo -I'm alive! -``` - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/05_stmt_blocks.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/05_stmt_blocks.md deleted file mode 100644 index f19783f94..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/05_stmt_blocks.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: 05 Statement Blocks -weight: 05 ---- - -# Statement Blocks - -All the basic primitives described above (names, statements, bindings, params, tags) can be used to describe and -parameterize a set of statements in a yaml document. In some scenarios, however, you may need to structure your -statements in a more sophisticated way. You might want to do this if you have a set of common statement forms or -parameters that need to apply to many statements, or perhaps if you have several *different* groups of statements that -need to be configured independently. - -This is where blocks become useful: - -```text -[test]$ cat > stdout-test.yaml -bindings: - alpha: Identity() - beta: Combinations('u;n;u;s;e;d;') -blocks: - - statements: - - "{alpha},{beta}\n" - bindings: - beta: Combinations('b;l;o;c;k;1;-;COMBINATIONS;') - - statements: - - "{alpha},{beta}\n" - bindings: - beta: Combinations('b;l;o;c;k;2;-;COMBINATIONS;') -# EOF (control-D in your terminal) - -[test]$ ./nb run driver=stdout workload=stdout-test cycles=10 -0,block1-C -1,block2-O -2,block1-M -3,block2-B -4,block1-I -5,block2-N -6,block1-A -7,block2-T -8,block1-I -9,block2-O -``` - -This shows a couple of important features of blocks. All blocks inherit defaults for bindings, params, and tags from the -root document level. Any of these values that are defined at the base document level apply to all blocks contained in -that document, unless specifically overridden within a given block. - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/06_more_statements.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/06_more_statements.md deleted file mode 100644 index c6a75a230..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/06_more_statements.md +++ /dev/null @@ -1,289 +0,0 @@ ---- -title: 06 More on Statements -weight: 06 ---- - -# More on Statements - -The template forms available in nosqlbench are very flexible. That means that there are multiple ways -of expressing templates for statements or operations. Thankfully, in most cases, the forms look like -what they do, and most of the ways you can imagine constructing a statement will simply work, as long -as the required details are provided for which driver you are using. - -## Statement Delimiting - -Sometimes, you want to specify the text of a statement in different ways. Since statements are strings, the simplest way -for small statements is in double quotes. If you need to express a much longer statement with special characters an -newlines, then you can use YAML's literal block notation (signaled by the '|' character) to do so: - -```yaml -statements: - - | - This is a statement, and the file format doesn't - know how statements will be used! - - | - submit job {alpha} on queue {beta} with options {gamma}; -``` - -Notice that the block starts on the following line after the pipe symbol. This is a very popular form in practice -because it treats the whole block exactly as it is shown, except for the initial indentations, which are removed. - -Statements in this format can be raw statements, statement templates, or anything that is appropriate for the specific -activity type they are being used with. Generally, the statements should be thought of as a statement form that you want -to use in your activity -- something that has place holders for data bindings. These place holders are called *named -anchors*. The second line above is an example of a statement template, with anchors that can be replaced by data for -each cycle of an activity. - -There is a variety of ways to represent block statements, with folding, without, with the newline removed, with it -retained, with trailing newlines trimmed or not, and so forth. For a more comprehensive guide on the YAML conventions -regarding multi-line blocks, see -[YAML Spec 1.2, Chapter 8, Block Styles](http://www.yaml.org/spec/1.2/spec.html#Block) - -## Statement Sequences - -To provide a degree of flexibility to the user for statement definitions, multiple statements may be provided together -as a sequence. - -```yaml -# a list of statements -statements: - - "This a statement." - - "The file format doesn't know how statements will be used." - - "submit job {job} on queue {queue} with options {options};" - -# an ordered map of statements by name -statements: - name1: statement one - name2: "statement two" -``` - -In the first form, the names are provided automatically by the YAML loader. In the second form, they are specified as -ordered map keys. - -## Statement Properties - -You can also configure individual statements with named properties, using the **statement properties** form: - -```yaml -# a list of statements with properties -statements: - - name: name1 - stmt: statement one - - name: name2 - stmt: statement two -``` - -This is the most flexible configuration format at the statement level. It is also the most verbose. Because this format -names each property of the statement, it allows for other properties to be defined at this level as well. This includes -all of the previously described configuration elements: `name`, `bindings`, `params`, `tags`, and additionally `stmt`. A -detailed example follows: - -```yaml -statements: - - name: foostmt - stmt: "{alpha},{beta}\n" - bindings: - beta: Combinations('COMBINATIONS;') - params: - parm1: pvalue1 - tags: - tag1: tvalue1 - freeparam3: a value, as if it were assigned under the params block. -``` - -In this case, the values for `bindings`, `params`, and `tags` take precedence, overriding those set by the enclosing -block or document or activity when the names match. Parameters called **free parameters** are allowed here, such as -`freeparam3`. These are simply values that get assigned to the params map once all other processing has completed. - -## Named Statement form - -It is possible to mix the **`: `** form as above in the example for mapping statement by name, so long -as some specific rules are followed. An example, which is equivalent to the above: - -```yaml -statements: - - foostmt: "{alpha},{beta}\n" - parm1: pvalue1 - bindings: - beta: Combinations('COMBINATIONS;') - tags: - tag1: tvalue1 -``` - -The rules: - -1. You must avoid using both the name property and the initial - **`: `** together. Doing so will cause an error to be thrown. -2. Do not use the **`: `** form in combination with a - **`stmt: `** property. It is not possible to detect if this occurs. Use caution if you choose to mix these forms. - -As explained above, `parm1: pvalue1` is a *free parameter*, and is simply short-hand for setting values in the params -map for the statement. - - -## Named Statement Maps - -By combining all the forms together with a map in the middle, we get this form, which allows for the -enumeration of multiple statements, each with an obvious name, and a set of properties: - -```yaml -statements: - - foostmt: - stmt: "{alpha},{beta}\n" - parm1: pvalue1 - bindings: - beta: Combinations('COMBINATIONS;') - tags: - tag1: tvalue1 - - barstmt: - optype: setvar - parm3: 42 - parm5: true - userid: 2342 -``` - -This form is arguably the easiest to read, but retains all the expressive power of the other forms too. -The distinction between this form and the named properties form is that the structure underneath the -first value is a map rather than a single value. Particularly, under the 'foostmt' name above, all of -content contained within it is formatted as properties of it -- indented properties. - -Here are the basic rules for using this form: - -1. Each statement is indicated by a YAML list entry like '-'. -2. Each entry is a map with a single key. This key is taken as the statement name. -3. The properties of this map work exactly the same as for named properties above, but repeating - the name will throw an error since this is ambiguous. -4. If the template is being used for CQL or another driver type which expects a 'stmt' property, - it must be provided as an explicitly named 'stmt' property as in the foostmt example above. - -Notice in the 'barstmt' example above that there is no "stmt" property. Some drivers -have more flexible op templates may not require this. This is just a property name that was chosen -to represent the "main body" of a statement template in the shorter YAML forms. While the 'stmt' -property is required for drivers like CQL which have a solid concept for "statement body", it isn't -required for all driver types which may build their operations from other properties. - -### Per-Statement Format - -It is indeed possible to use any of the three statement formats within each entry of a statement sequence: - -```yaml -statements: - - first statement body - - name: statement3 - stmt: third statement body - - second: second statement body - - forth: fourth statement body - freeparam1: freeparamvalue1 - tags: - type: preload - - fifth: - stmt: fifth statement body - freeparam2: freeparamvalue2 - tags: - tag2: tagvalue2 -``` - -The above is valid nosqlbench YAML, although a reader would need to know about the rules explained above in order to -really make sense of it. For most cases, it is best to follow one format convention, but there is flexibility for -overrides and naming when you need it. The main thing to remember is that the statement form is determined on an -element-by-element basis for maximum flexibility. - -## Detailed Examples - -The above examples are explained in detail below in JSON schematic form, to assist users and developers -understanding of the structural rules: - -```yaml -statements: - - # --------------------------------------------------------------------------------------- - - # string form - # detected when the element is a single string value - - - first statement body - - # read as: - # { - # name: 'stmt1', // a generated name is also added - # stmt: 'first stmt body' - # } - - # --------------------------------------------------------------------------------------- - - # properties form - - # detected when the element is a map and the value of the first entry is not a map - - - name: statement3 - stmt: third statement body - - # read as: - # { - # name: 'statement3', - # stmt: 'third statement body' - # } - - # --------------------------------------------------------------------------------------- - - # named statement form: - # detected when reading properties form and the first property name is not a reserved - # word, like stmt, name, params, bindings, tags, ... - - - second: second statement body - - # read as: - # { - # name: 'second', - # stmt: 'second statement body' - # } - - # --------------------------------------------------------------------------------------- - - # properties form with free parameters: - # detected when properties are used which are not reserved words. - # Unrecognized words are pushed into the parameters map automatically. - - - forth: fourth statement body - freeparam1: freeparamvalue1 - tags: - type: preload - - # read as: - # { - # name: 'fourth', - # stmt: 'fourth statement body', - # params: { - # freeparam1: 'freeparamvalue1' - # }, - # tags: { - # tag2: 'tagvalue2' - # } - # } - - # --------------------------------------------------------------------------------------- - - # named statement maps - # detected when the element is a map and the only entry is a map. - - - fifth: - stmt: fifth statement body - freeparam2: freeparamvalue2 - tags: - tag2: tagvalue2 - - # read as: - # { - # name: 'fifth', - # stmt: 'fifth statement body' - # params: { - # freeparam2: 'freeparamvalue2' - # }, - # tags: { - # tag2: 'tagvalue2' - # } - # } - - # --------------------------------------------------------------------------------------- -``` - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/07_multi_docs.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/07_multi_docs.md deleted file mode 100644 index ea0aef88b..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/07_multi_docs.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: 07 Multi-Docs -weight: 07 ---- - -# Multi-Docs - -The YAML spec allows for multiple yaml documents to be concatenated in the -same file with a separator: - -```yaml ---- -``` - -This offers an additional convenience when configuring activities. If you -want to parameterize or tag some a set of statements with their own -bindings, params, or tags, but alongside another set of uniquely -configured statements, you need only put them in separate logical -documents, separated by a triple-dash. - -For example: - -```text -[test]$ cat > stdout-test.yaml -bindings: - docval: WeightedStrings('doc1.1:1;doc1.2:2;') -statements: - - "doc1.form1 {docval}\n" - - "doc1.form2 {docval}\n" ---- -bindings: - numname: NumberNameToString() -statements: - - "doc2.number {numname}\n" -# EOF (control-D in your terminal) -[test]$ ./nb run driver=stdout workload=stdout-test cycles=10 -doc1.form1 doc1.1 -doc1.form2 doc1.2 -doc2.number two -doc1.form1 doc1.2 -doc1.form2 doc1.1 -doc2.number five -doc1.form1 doc1.2 -doc1.form2 doc1.2 -doc2.number eight -doc1.form1 doc1.1 -``` - -This shows that you can use the power of blocks and tags together at one -level and also allow statements to be broken apart into a whole other -level of partitioning if desired. - -**WARNING:** -The multi-doc support is there as a ripcord when you need it. However, it -is strongly advised that you keep your YAML workloads simple to start and -only use features like the multi-doc when you absolutely need it. For -this, blocks are generally a better choice. See examples in the standard -workloads. diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/08_template_params.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/08_template_params.md deleted file mode 100644 index 467b315b7..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/08_template_params.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: 08 Template Params -weight: 08 ---- - -# Template Params - -All nosqlbench YAML formats support a parameter macro format that applies before YAML processing starts. It is a basic -macro facility that allows named anchors to be placed in the document as a whole: - -```text -<> -# or -TEMPLATE(varname,defaultval) -``` - -In this example, the name of the parameter is `varname`. It is given a default value of `defaultval`. If an activity -parameter named *varname* is provided, as in `varname=barbaz`, then this whole expression will be replaced with -`barbaz`. If none is provided then the default value will be used instead. For example: - -```text -[test]$ cat > stdout-test.yaml -statements: - - "<>\n" -# EOF (control-D in your terminal) - -[test]$ ./nb run driver=stdout workload=stdout-test cycles=1 -MISSING - -[test]$ ./nb run driver=stdout workload=stdout-test cycles=1 linetoprint="THIS IS IT" -THIS IS IT -``` - -If an empty value is desired by default, then simply use an empty string in your template, like `<>` or -`TEMPLATE(varname,)`. - - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/09_stmt_naming.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/09_stmt_naming.md deleted file mode 100644 index 8f7eedf65..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/09_stmt_naming.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: 09 Statement Naming -weight: 09 ---- - -# Statement Naming - -Docs, Blocks, and Statements can all have names: - -```yaml -name: doc1 -blocks: - - name: block1 - statements: - - stmt1: statement1 - - name: st2 - stmt: statement2 ---- -name: doc2 -... -``` - -This provides a layered naming scheme for the statements themselves. It is -not usually important to name things except for documentation or metric -naming purposes. - -If no names are provided, then names are automatically created for blocks -and statements. Statements assigned at the document level are assigned -to "block0". All other statements are named with the -format `doc#--block#--stmt#`. - -For example, the full name of statement1 above would -be `doc1--block1--stmt1`. - -**NOTE:** -If you anticipate wanting to get metrics for a specific statement in -addition to the other metrics, then you will want to adopt the habit of -naming all your statements something basic and descriptive. - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/10_named_scenarios.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/10_named_scenarios.md deleted file mode 100644 index 4f50e084d..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/10_named_scenarios.md +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: 10 Named Scenarios -weight: 10 ---- - -# Named Scenarios - -There is one final element of a yaml that you need to know about: _named -scenarios_. - -**Named Scenarios allow anybody to run your testing workflows with a -single command.** - -You can provide named scenarios for a workload like this: - -```yaml -# contents of myworkloads.yaml -scenarios: - default: - - run driver=diag cycles=10 alias=first-ten - - run driver=diag cycles=10..20 alias=second-ten - longrun: - - run driver=diag cycles=10M -``` - -This provides a way to specify more detailed workflows that users may want -to run without them having to build up a command line for themselves. - -A couple of other forms are supported in the YAML, for terseness: - -```yaml -scenarios: - oneliner: run driver=diag cycles=10 - mapform: - part1: run driver=diag cycles=10 alias=part2 - part2: run driver=diag cycles=20 alias=part2 -``` - -These forms simply provide finesse for common editing habits, but they are -automatically read internally as a list. In the map form, the names are -discarded, but they may be descriptive enough for use as inline docs for -some users. The order is retained as listed, since the names have no -bearing on the order. - -## Scenario selection - -When a named scenario is run, it is *always* named, so that it can be -looked up in the list of named scenarios under your `scenarios:` property. -The only exception to this is when an explicit scenario name is not found -on the command line, in which case it is automatically assumed to be _ -default_. - -Some examples may be more illustrative: - -```text -# runs the scenario named 'default' if it exists, or throws an error if it does not. -nb myworkloads -# or -nb myworkloads default - -# runs the named scenario 'longrun' if it exists, or throws an error if it does not. -nb myworkloads longrun - -# runs the named scenario 'longrun' if it exists, or throws an error if it does not. -# this is simply the canonical form which is more verbose, but more explicit. -nb scenario myworkloads longrun - -# run multiple named scenarios from one workload, and then some from another -nb scenario myworkloads longrun default longrun scenario another.yaml name1 name2 -# In this form ^ you may have to add the explicit form to avoid conflicts between -# workload names and scenario names. That's why the explicit form is provided, afterall. -``` - -## Workload selection - -The examples above contain no reference to a workload (formerly called _ -yaml_). They don't need to, as they refer to themselves implicitly. You -may add a `workload=` parameter to the command templates if you like, but -this is never needed for basic use, and it is error prone to keep the -filename matched to the command template. Just leave it out by default. - -_However_, if you are doing advanced scripting across multiple systems, -you can actually provide a `workload=` parameter particularly to use -another workload description in your test. - -**NOTE:** -This is a powerful feature for workload automation and organization. -However, it can get unweildy quickly. Caution is advised for deep-linking -too many scenarios in a workspace, as there is no mechanism for keeping -them in sync when small changes are made. - -## Named Scenario Discovery - -For named scenarios, there is a way for users to find all the named -scenarios that are currently bundled or in view of their current -directory. A couple simple rules must be followed by scenario publishers -in order to keep things simple: - -1. Workload files in the current directory `*.yaml` are considered. -2. Workload files under in the relative path `activities/` with - name `*.yaml` are considered. -3. The same rules are used when looking in the bundled nosqlbench, so - built-ins come along for the ride. -4. Any workload file that contains a `scenarios:` tag is included, but all - others are ignored. - -This doesn't mean that you can't use named scenarios for workloads in -other locations. It simply means that when users use -the `--list-scenarios` option, these are the only ones they will see -listed. - -## Parameter Overrides - -You can override parameters that are provided by named scenarios. Any -parameter that you specify on the command line after your workload and -optional scenario name will be used to override or augment the commands -that are provided for the named scenario. - -This is powerful, but it also means that you can sometimes munge -user-provided activity parameters on the command line with the named -scenario commands in ways that may not make sense. To solve this, the -parameters in the named scenario commands may be locked. You can lock them -silently, or you can provide a verbose locking that will cause an error if -the user even tries to adjust them. - -Silent locking is provided with a form like `param==value`. Any silent -locked parameters will reject overrides from the command line, but will -not interrupt the user. - -Verbose locking is provided with a form like `param===value`. Any time a -user provides a parameter on the command line for the named parameter, an -error is thrown and they are informed that this is not possible. This -level is provided for cases in which you would not want the user to be -unaware of an unset parameter which is germain and specific to the named -scenario. - -All other parameters provided by the user will take the place of the -same-named parameters provided in *each* command templates, in the order -they appear in the template. Any other parameters provided by the user -will be added to *each* -of the command templates in the order they appear on the command line. - -This is a little counter-intuitive at first, but once you see some -examples it should make sense. - -## Parameter Override Examples - -Consider a simple workload with three named scenarios: - -```yaml -# basics.yaml -scenarios: - s1: run driver=stdout cycles=10 - s2: run driver=stdout cycles==10 - s3: run driver=stdout cycles===10 - -bindings: - c: Identity() - -statements: - - A: "cycle={c}\n" -``` - -Running this with no options prompts the user to select one of the named -scenarios: - -```text -$ nb basics -ERROR: Unable to find named scenario 'default' in workload 'basics', but you can pick from s1,s2,s3 -$ -``` - -### Basic Override example - -If you run the first scenario `s1` with your own value for `cycles=7`, it -does as you ask: - -```text -$ nb basics s1 cycles=7 -Logging to logs/scenario_20200324_205121_554.log -cycle=0 -cycle=1 -cycle=2 -cycle=3 -cycle=4 -cycle=5 -cycle=6 -$ -``` - -### Silent Locking example - -If you run the second scenario `s2` with your own value for `cycles=7`, -then it does what the locked parameter -`cycles==10` requires, without telling you that it is ignoring the -specified value on your command line. - -```text -$ nb basics s2 cycles=7 -Logging to logs/scenario_20200324_205339_486.log -cycle=0 -cycle=1 -cycle=2 -cycle=3 -cycle=4 -cycle=5 -cycle=6 -cycle=7 -cycle=8 -cycle=9 -$ -``` - -Sometimes, this is appropriate, such as when specifying settings -like `threads==` for schema phases. - -### Verbose Locking example - -If you run the third scenario `s3` with your own value for `cycles=7`, -then you will get an error telling you that this is not possible. -Sometimes you want to make sure tha the user knows a parameter should not -be changed, and that if they want to change it, they'll have to make their -own custom version of the scenario in question. - -```text -$ nb basics s3 cycles=7 -ERROR: Unable to reassign value for locked param 'cycles===7' -$ -``` - -Ultimately, it is up to the scenario designer when to lock parameters for -users. The built-in workloads offer some examples on how to set these -parameters so that the right value are locked in place without bother the -user, but some values are made very clear in how they should be set. -Please look at these examples for inspiration when you need. - -## Forcing Undefined (default) Parameters - -If you want to ensure that any parameter in a named scenario template -remains unset in the generated scenario script, you can assign it a value -of UNDEF. The locking behaviors described above apply to this one as well. -Thus, for schema commands which rely on the default sequence length (which -is based on the number of active statements), you can set cycles==UNDEF to -ensure that when a user passes a cycles parameter the schema phase doesn't -break with too many cycles. - -## Automatic Parameters - -Some parameters are already known due to the fact that you are using named -scenarios. - -### workload - -The `workload` parameter is, by default, set to the logical path (fully -qualified workload name) of the yaml file containing the named scenario. -However, if the command template contains this parameter, it may be -overridden by users as any other parameter depending on the assignment -operators as explained above. - -### alias - -The `alias` parameter is, by default, set to the expanded name of -WORKLOAD_SCENARIO_STEP, which means that each activity within the scenario -has a distinct and symbolic name. This is important for distinguishing -metrics from one another across workloads, named scenarios, and steps -within a named scenario. The above words are interpolated into the alias -as follows: - -- WORKLOAD - The simple name part of the fully qualified workload name. - For example, with a workload (yaml path) of foo/bar/baz.yaml, the - WORKLOAD name used here would be `baz`. - -- SCENARIO - The name of the scenario as provided on the command line. - -- STEP - The name of the step in the named scenario. If you used the list - or string forms to provide a command template, then the steps are - automatically named as a zero-padded number representing the step in the - named scenario, starting from `000`, per named scenario. (The numbers - are not globally assigned) - -Because it is important to have uniquely named activities for the sake of -sane metrics and logging, any alias provided when using named scenarios -which does not include the three tokens above will cause a warning to be -issued to the user explaining why this is a bad idea. - -**NOTE:** -UNDEF is handled before alias expansion above, so it is possible to force -the default activity naming behavior above with `alias===UNDEF`. This is -generally recommended, and will inform users if they try to set the alias -in an unsafe way. - - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/99_yaml_diagnostics.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/99_yaml_diagnostics.md deleted file mode 100644 index 38903ac83..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/99_yaml_diagnostics.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: YAML Diagnostics -weight: 99 ---- - -# YAML Diagnostics - -This section describes errors that you might see if you have a YAML loading issue, and what you can do to fix them. - -### Undefined Name-Statement Tuple - -This exception is thrown when the statement body is not found in a statement definition in any of the supported formats. -For example, the following block will cause an error: - -```yaml -statements: - - name: statement-foo - params: - aparam: avalue -``` - -This is because `name` and `params` are reserved property names -- removed from the list of name-value pairs before free -parameters are read. If the statement is not defined before free parameters are read, then the first free parameter is -taken as the name and statement in `name: statement` form. - -To correct this error, supply a statement property in the map, or simply replace the `name: statement-foo` entry with a -`statement-foo: statement body` at the top of the map: - -Either of these will work: - -```yaml -statements: - - name: statement-foo - stmt: statement body - params: - aparam: avalue ---- -statements: - - statement-foo: statement body - params: - aparam: avalue -``` - -In both cases, it is clear to the loader where the statement body should come from, and what (if any) explicit naming -should occur. - -### Redefined Name-Statement Tuple - -This exception is thrown when the statement name is defined in multiple ways. This is an explicit exception to avoid -possible ambiguity about which value the user intended. For example, the following statements definition will cause an -error: - -```yaml -statements: - - name: name1 - name2: statement body -``` - -This is an error because the statement is not defined before free parameters are read, and the `name: statement` form -includes a second definition for the statement name. In order to correct this, simply remove the separate `name` entry, -or use the `stmt` property to explicitly set the statement body. Either of these will work: - -```yaml -statements: - - name2: statement body ---- -statements: - - name: name1 - stmt: statement body -``` - -In both cases, there is only one name defined for the statement according to the supported formats. - -### YAML Parsing Error - -This exception is thrown when the YAML format is not recognizable by the YAML parser. If you are not working from -examples that are known to load cleanly, then please review your document for correctness according to the -[YAML Specification](). - -If you are sure that the YAML should load, then please -[submit a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) with details on the type of YAML -file you are trying to load. - -### YAML Construction Error - -This exception is thrown when the YAML was loaded, but the configuration object was not able to be constructed from the -in-memory YAML document. If this error occurs, it may be a bug in the YAML loader implementation. Please -[submit a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) with details on the type of YAML -file you are trying to load. - diff --git a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/index.md b/engine-docs/src/main/resources/docs-for-nb/designing_workloads/index.md deleted file mode 100644 index fc8b76a0e..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/designing_workloads/index.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: Designing Workloads -weight: 40 ---- - -# Designing Workloads - -Workloads in nosqlbench are always controlled by a workload definition. -Even the built-in workloads are simply pre-configured and controlled -from a single YAML file which is bundled internally. - -With nosqlbench a standard YAML configuration format is provided that is -used across all activity types. This makes it easy to specify -statements, statement parameters, data bindings, and tags. This section -describes the standard YAML format and how to use it. - -It is recommended that you read through the examples in each of the -design sections in order. This guide was designed to give you a detailed -understanding of workload construction with nosqlbench. The examples -will also give you better insight into how nosqlbench works at a -fundamental level. - -## Multi-Protocol Support - -You will notice that this guide is not overly CQL-specific. That is -because nosqlbench is a multi-protocol tool. All that is needed for you -to use this guide with other protocols is the release of more activity -types. Try to keep that in mind as you think about designing workloads. - -## Advice for new builders - -### Review existing examples - -The built-in workloads that are include with nosqlbench are also easy to copy out as a starting point. You just need to -use two commands: - - # find a workload you want to copy - nb --list-workloads - - - # copy a workload to your local directory - nb --copy cql-iot - -### Follow the conventions - -The tagging conventions described under the YAML Conventions section -will make your testing go smoother. All of the baselines that we publish -for nosqlbench will use this form. - - diff --git a/engine-docs/src/main/resources/docs-for-nb/drivers/index.md b/engine-docs/src/main/resources/docs-for-nb/drivers/index.md deleted file mode 100644 index 08599d00e..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/drivers/index.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: Driver Types -weight: 50 ---- - -# Driver Types - -Each nosqlbench scenario is comprised of one or more activities of a specific type. The types of activities available -are provided by the version of nosqlbench. - -You can see this list at any time by running the command: - - nb --list-drivers - -Each one comes with its own built-in documentation. It can be accessed with this command: - - nb help - -This section contains the per-driver documentation that you get when you run the above command. These driver docs are -auto-populated when NoSQLBench is built, so they are exactly the same as you will see with the above command, only -rendered in HTML. - -There may be additional documentation related to a given driver. To see the list of help topics, you -can run this command: - - nb help topics - -The help for any topic can be read this way: - - nb help diff --git a/engine-docs/src/main/resources/docs-for-nb/getting_started/01_example_commands.md b/engine-docs/src/main/resources/docs-for-nb/getting_started/01_example_commands.md deleted file mode 100644 index 270635dc4..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/getting_started/01_example_commands.md +++ /dev/null @@ -1,196 +0,0 @@ ---- -title: 01 Commands -weight: 2 ---- - -# Example Commands - -Let's run a simple test against a cluster to establish some basic -familiarity with the NoSQLBench. - -## Create a Schema - -We will start by creating a simple schema in the database. From your -command line, go ahead and execute the following command, replacing -the `host=` with that of one of your database nodes. - -```text -./nb run driver=cql workload=cql-keyvalue tags=block:"schema.*" host= -``` - -This command is creating the following schema in your database: - -```cql -CREATE KEYSPACE baselines - WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} - AND durable_writes = true; - -CREATE TABLE baselines.keyvalue ( - key text PRIMARY KEY, - value text -) -``` - -Let's break down each of those command line options. - -`run` tells nosqlbench to run an activity. - -`driver=...` is used to specify the activity type (driver). In this case -we are using `cql`, which tells nosqlbench to use the DataStax Java Driver -and execute CQL statements against a database. - -`workload=...` is used to specify the workload definition file that -defines the activity. - -In this example, we use `cql-keyvalue` which is a pre-built workload that -is packaged with nosqlbench. - -`tags=block:"schema.*"` tells nosqlbench to run the yaml block that has -the `block:"schema.*"` defined as one of its tags. - -In this example, that is the DDL portion of the `cql-keyvalue` -workload. `host=...` tells nosqlbench how to connect to your database, -only one host is necessary. - -If you like, you can verify the result of this command by decribing your -keyspace in cqlsh or DataStax Studio with -`DESCRIBE KEYSPACE baselines`. - -## Load Some Data - -Before running a test of typical access patterns where you want to capture -the results, you need to make the test more interesting than loading an -empty table. For this, we use the rampup phase. - -Before sending our test writes to the database, we will use the `stdout` -activity type so we can see what nosqlbench is generating for CQL -statements. - -Go ahead and execute the following command: - - ./nb run driver=stdout workload=cql-keyvalue tags=block:rampup cycles=10 - -You should see 10 of the following statements in your console - -```cql -insert into baselines.keyvalue (key, value) values (0,382062539); -insert into baselines.keyvalue (key, value) values (1,774912474); -insert into baselines.keyvalue (key, value) values (2,949364593); -insert into baselines.keyvalue (key, value) values (3,352527683); -insert into baselines.keyvalue (key, value) values (4,351686621); -insert into baselines.keyvalue (key, value) values (5,114304900); -insert into baselines.keyvalue (key, value) values (6,439790106); -insert into baselines.keyvalue (key, value) values (7,564330072); -insert into baselines.keyvalue (key, value) values (8,296173906); -insert into baselines.keyvalue (key, value) values (9,97405552); -``` - -NoSQLBench deterministically generates data, so the generated values will -be the same from run to run. - -Now we are ready to write some data to our database. Go ahead and execute -the following from your command line: - - ./nb run driver=cql workload=cql-keyvalue tags=block:rampup host= cycles=100k --progress console:1s - -Note the differences between this and the command that we used to generate -the schema. - -`tags=block:rampup` is running the yaml block in `cql-keyvalue` that has -only INSERT statements. - -`cycles=100k` will run a total of 100,000 operations, in this case, -100,000 writes. You will want to pick an appropriately large number of -cycles in actual testing to make your main test meaningful. - -**NOTE:** -The cycles parameter is not just a quantity. It is a range of values. -The `cycles=n` format is short for -`cycles=0..n`, which makes cycles a zero-based range. For example, -cycles=5 means that the activity will use cycles 0,1,2,3,4, but not 5. The -reason for this is explained in detail in the Activity Parameters section. - -These parameters are explained in detail in the section on _Activity -Parameters_. - -`--progress console:1s` will print the progression of the run to the -console every 1 second. - -You should see output that looks like this - -```text -cql-keyvalue: 0.00%/Running (details: min=0 cycle=1 max=100000) -cql-keyvalue: 0.00%/Running (details: min=0 cycle=1 max=100000) -cql-keyvalue: 0.32%/Running (details: min=0 cycle=325 max=100000) -cql-keyvalue: 1.17%/Running (details: min=0 cycle=1171 max=100000) -cql-keyvalue: 2.36%/Running (details: min=0 cycle=2360 max=100000) -cql-keyvalue: 3.65%/Running (details: min=0 cycle=3648 max=100000) -cql-keyvalue: 4.61%/Running (details: min=0 cycle=4613 max=100000) -cql-keyvalue: 5.59%/Running (details: min=0 cycle=5593 max=100000) -cql-keyvalue: 7.14%/Running (details: min=0 cycle=7138 max=100000) -cql-keyvalue: 8.87%/Running (details: min=0 cycle=8868 max=100000) -... -cql-keyvalue: 100.00%/Finished (details: min=0 cycle=100000 max=100000) -``` - -## Run the main test phase - -Now that we have a base dataset of 100k rows in the database, we will now -run a mixed read / write workload, by default this runs a 50% read / 50% -write workload. - - ./nb run driver=cql workload=cql-keyvalue tags=block:main host= cycles=100k cyclerate=5000 threads=50 --progress console:1s - -You should see output that looks like this: - -```text -Logging to logs/scenario_20190812_154431_028.log -cql-keyvalue: 0.50%/Running (details: min=0 cycle=500 max=100000) -cql-keyvalue: 2.50%/Running (details: min=0 cycle=2500 max=100000) -cql-keyvalue: 6.70%/Running (details: min=0 cycle=6700 max=100000) -cql-keyvalue: 11.16%/Running (details: min=0 cycle=11160 max=100000) -cql-keyvalue: 14.25%/Running (details: min=0 cycle=14250 max=100000) -cql-keyvalue: 18.41%/Running (details: min=0 cycle=18440 max=100000) -cql-keyvalue: 22.76%/Running (details: min=0 cycle=22760 max=100000) -cql-keyvalue: 27.27%/Running (details: min=0 cycle=27300 max=100000) -cql-keyvalue: 31.81%/Running (details: min=0 cycle=31810 max=100000) -cql-keyvalue: 36.34%/Running (details: min=0 cycle=36340 max=100000) -cql-keyvalue: 40.90%/Running (details: min=0 cycle=40900 max=100000) -cql-keyvalue: 45.48%/Running (details: min=0 cycle=45480 max=100000) -cql-keyvalue: 50.05%/Running (details: min=0 cycle=50050 max=100000) -cql-keyvalue: 54.36%/Running (details: min=0 cycle=54360 max=100000) -cql-keyvalue: 58.91%/Running (details: min=0 cycle=58920 max=100000) -cql-keyvalue: 63.40%/Running (details: min=0 cycle=63400 max=100000) -cql-keyvalue: 66.96%/Running (details: min=0 cycle=66970 max=100000) -cql-keyvalue: 71.61%/Running (details: min=0 cycle=71610 max=100000) -cql-keyvalue: 76.11%/Running (details: min=0 cycle=76130 max=100000) -cql-keyvalue: 80.66%/Running (details: min=0 cycle=80660 max=100000) -cql-keyvalue: 85.22%/Running (details: min=0 cycle=85220 max=100000) -cql-keyvalue: 89.80%/Running (details: min=0 cycle=89800 max=100000) -cql-keyvalue: 94.46%/Running (details: min=0 cycle=94460 max=100000) -cql-keyvalue: 98.93%/Running (details: min=0 cycle=98930 max=100000) -cql-keyvalue: 100.00%/Finished (details: min=0 cycle=100000 max=100000) -``` - -We have a few new command line options here: - -`tags=block:main` is using a new block in our activity's yaml that -contains both read and write queries. - -`threads=50` is an important one. The default for nosqlbench is to run -with a single thread. This is not adequate for workloads that will be -running many operations, so threads is used as a way to increase -concurrency on the client side. - -`cyclerate=5000` is used to control the operations per second that are -initiated by nosqlbench. This command line option is the primary means to -rate limit the workload and here we are running at 5000 ops/sec. - -## Now What? - -Note in the above output, we -see `Logging to logs/scenario_20190812_154431_028.log`. - -By default nosqlbench records the metrics from the run in this file, we -will go into detail about these metrics in the next section Viewing -Results. diff --git a/engine-docs/src/main/resources/docs-for-nb/getting_started/02_example_results.md b/engine-docs/src/main/resources/docs-for-nb/getting_started/02_example_results.md deleted file mode 100644 index 86a28b03e..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/getting_started/02_example_results.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: 02 Results -weight: 3 ---- - -# Example Results - -We just ran a very simple workload against our database. In that example, -we saw that nosqlbench writes to a log file and it is in that log file -where the most basic form of metrics are displayed. - -## Log File Metrics - -For our previous run, we saw that nosqlbench was writing -to `logs/scenario_20190812_154431_028.log` - -Even when you don't configure nosqlbench to write its metrics to another -location, it will periodically report all the metrics to the log file. At -the end of a scenario, before nosqlbench shuts down, it will flush the -partial reporting interval again to the logs. This means you can always -look in the logs for metrics information. - -**WARNING:** -If you look in the logs for metrics, be aware that the last report will -only contain a partial interval of results. When looking at the last -partial window, only metrics which average over time or which compute the -mean for the whole test will be meaningful. - -Below is a sample of the log that gives us our basic metrics. There is a -lot to digest here, for now we will only focus a subset of the most -important metrics. - -```text -2019-08-12 15:46:00,274 INFO [main] i.e.c.ScenarioResult [ScenarioResult.java:48] -- BEGIN METRICS DETAIL -- -2019-08-12 15:46:00,294 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.config.burstrate, value=5500.0 -2019-08-12 15:46:00,295 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.config.cyclerate, value=5000.0 -2019-08-12 15:46:00,295 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=GAUGE, name=cql-keyvalue.cycles.waittime, value=3898782735 -2019-08-12 15:46:00,298 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM, name=cql-keyvalue.resultset-size, count=100000, min=0, max=1, mean=8.0E-5, stddev=0.008943914131967056, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0 -2019-08-12 15:46:01,703 INFO [main] i.e.c.ScenarioResult [ScenarioResult.java:56] -- END METRICS DETAIL -- -``` - -The log contains lots of information on metrics, but this is obviously _ -not_ the most desirable way to consume metrics from nosqlbench. - -We recommend that you use one of these methods, according to your -environment or tooling available: - -1. `--docker-metrics` with a local docker-based grafana dashboard (See the - section on Docker Based Metrics) -2. Send your metrics to a dedicated graphite server - with `--report-graphite-to graphitehost` -3. Record your metrics to local CSV files - with `--report-csv-to my_metrics_dir` -4. Record your metrics to HDR logs - with `--log-histograms my_hdr_metrics.log` - -See the command line reference for details on how to route your metrics to -a metrics collector or format of your preference. diff --git a/engine-docs/src/main/resources/docs-for-nb/getting_started/03_reading_metrics.md b/engine-docs/src/main/resources/docs-for-nb/getting_started/03_reading_metrics.md deleted file mode 100644 index ff739d3dc..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/getting_started/03_reading_metrics.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: 03 Metrics -weight: 4 ---- - -# Example Metrics - -A set of core metrics are provided for every workload that runs with nosqlbench, regardless of the activity type and -protocol used. This section explains each of these metrics and shows an example of them from the log file. - -## metric: result - -This is the primary metric that should be used to get a quick idea of the throughput and latency for a given run. It -encapsulates the entire operation life cycle ( ie. bind, execute, get result back ). - -For this example we see that we averaged 3732 operations / second with 3.6ms 75th percentile latency and 23.9ms 99th -percentile latency. Note the raw metrics are in microseconds. This duration_unit may change depending on how a user -configures nosqlbench, so always double-check it. - -```text -2019-08-12 15:46:01,310 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=TIMER, - name=cql-keyvalue.result, count=100000, min=233.48, max=358596.607, mean=3732.00338612, stddev=10254.850416061185, - median=1874.815, p75=3648.767, p95=10115.071, p98=15855.615, p99=23916.543, p999=111292.415, - mean_rate=4024.0234405430424, m1=3514.053841156124, m5=3307.431472596865, m15=3268.6786509004132, - rate_unit=events/second, duration_unit=microseconds -``` - -## metric: result-success - -This metric shows whether there were any errors during the run. You can confirm that the count is equal to the number of -cycles for the run if you are expecting or requiring zero failed operations. - -Here we see that all 100k of our cycles succeeded. Note that the metrics for throughput and latency here are slightly -different than the `results` metric simply because this is a separate timer that only includes operations which -completed with no exceptions. - -```text -2019-08-12 15:46:01,452 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=TIMER, - name=cql-keyvalue.result-success, count=100000, min=435.168, max=358645.759, mean=3752.40990808, - stddev=10251.524945886964, median=1889.791, p75=3668.479, p95=10154.495, p98=15884.287, p99=24280.063, - p999=111443.967, mean_rate=4003.3090048756894, m1=3523.40328629036, m5=3318.8463896065778, m15=3280.480326762243, - rate_unit=events/second, duration_unit=microseconds -``` - -## metric: resultset-size - -For read workloads, this metric shows the size of result sent back to nosqlbench from the server. This is useful to -confirm that you are reading rows that already exist in the database. - -```text -2019-08-12 15:46:00,298 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM, - name=cql-keyvalue.resultset-size, count=100000, min=0, max=1, mean=8.0E-5, stddev=0.008943914131967056, - median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0 -``` - -#### metric: tries - -NoSQLBench will retry failures 10 times by default, this is configurable via the `maxtries` command line option for the -cql activity type. This metric shows a histogram of the number of tries that each operation required, in this example, -there were no retries as the `count` is 100k. - -```text -2019-08-12 15:46:00,341 INFO [main] i.e.c.ScenarioResult [Slf4jReporter.java:373] type=HISTOGRAM, - name=cql-keyvalue.tries, count=100000, min=1, max=1, mean=1.0, stddev=0.0, median=1.0, - p75=1.0, p95=1.0, p98=1.0, p99=1.0, p999=1.0 -``` - -### More Metrics - -nosqlbench extends many ways to report the metrics from a run, including: - -- Built-in Docker Dashboard -- Reporting to CSV -- Reporting to Graphite -- Reporting to HDR - -To get more information on these options, see the output of - - ./nb --help - -### Congratulations - -You have completed your first run with nosqlbench! - -In the 'Next Steps' section, you'll find options for how to continue, whether you are looking for basic testing or -something more advanced. - diff --git a/engine-docs/src/main/resources/docs-for-nb/getting_started/04_next_steps.md b/engine-docs/src/main/resources/docs-for-nb/getting_started/04_next_steps.md deleted file mode 100644 index 7103945e5..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/getting_started/04_next_steps.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Next Steps -weight: 5 ---- - -# Next Steps - -Now that you've run nosqlbench for the first time and seen what it does, you can choose what level of customization you -want for further testing. - -The sections below describe key areas that users typically customize when working with nosqlbench. - -Everyone who uses nosqlbench will want to get familiar with the 'NoSQLBench Basics' section below. This is essential -reading for new and experienced testers alike. - -## High-Level Users - -Several canonical workloads are already baked-in to nosqlbench for immediate use. If you are simply wanting to drive -workloads from nosqlbench without building a custom workload, then you'll want to learn about the available workloads -and their options. - -Recommended reading for high-level testing workflow: -1. 'Built-In Workloads' -2. 'nosqlbench Basics' - -## Workload Builders - -If you want to use nosqlbench to build a tailored workload that closely emulates what a specific application would do, -then you can build a YAML file that specifies all of the details of an iterative workload. You can specify the access -patterns, data distributions, and more. - -The recommended reading for this is: - -1. 'NoSQLBench Basics' -2. All of the 'Designing Workloads' section. -3. The online examples (find the links in the Designing Workloads section.) - -## Scenario Developers - -For advanced scenario designs, iterative testing models, or analysis methods, you can use -ECMAScript to control the scenario from start to finish. This is an advanced feature that is not recommended for -first-time users. A guide for scenario developers will be released in increments. diff --git a/engine-docs/src/main/resources/docs-for-nb/getting_started/index.md b/engine-docs/src/main/resources/docs-for-nb/getting_started/index.md deleted file mode 100644 index 1fa06e9df..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/getting_started/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: Quick Start Example -weight: 20 ---- - -# Quick Start Example - -## Getting NoSQLBench - -NoSQLBench is packaged directly as a Linux binary named `nb` and as an executable Java jar named `nb.jar`. - -## Downloading - -The Linux binary is recommended, since it comes with its own JVM and eliminates the need to manage Java downloads. Both -can be obtained at the releases section of the main NoSQLBench project: - -- [NoSQLBench Releases](https://github.com/nosqlbench/nosqlbench/releases) - -**NOTE:** -Once you download the binary, you may need to `chmod +x nb` to make it -executable. In order to run AppImage binaries, like nb, you need to have -fuse support on your system. This is already provided on most -distributions. If after downloading and executing nb, you get an error, -please consult the -[AppImage troubleshooting page](https://docs.appimage.org/user-guide/run-appimages.html#troubleshooting) -. - -This documentation assumes you are using the Linux binary initiating -NoSqlBench commands with `./nb`. If you are using the jar, just -replace `./nb` with `java -jar nb.jar` when running commands. If you are -using the jar version, Java 15 is recommended, and will be required soon. - -## Run a cluster - -This section requires you to have a CQL system to connect to. -If you don’t already have one, you can start an instance of DSE with this one-liner: - - docker run -e DS_LICENSE=accept --name my-dse -p 9042:9042 -d datastax/dse-server:6.7.7 - -or consult the instructions at the -[Apache Cassandra docker hub landing page](https://hub.docker.com/_/cassandra). - -## Running - -To run a simple built-in workload run: - - ./nb cql-iot - -To get a list of built-in scenarios run: - - # Get a list of all named scenarios and parameters - ./nb --list-scenarios - -If you want a simple list of yamls which contain named scenarios, run: - - # Get a simple list of yamls containing named scenarios - ./nb --list-workloads - -**NOTE:** -Note: These commands will include workloads that were shipped with nb and -workloads in your local directory. To learn more about how to design -custom workloads see -[designing workloads](/index.html#/docs/designing_workloads.html) - -To provide your own contact points (comma separated), add the `hosts=` -parameter - - ./nb cql-iot hosts=host1,host2 - -Additionally, if you have docker installed on your local system, and your user has permissions to use it, you can use -`--docker-metrics` to stand up a live metrics dashboard at port 3000. - - ./nb cql-iot --docker-metrics - -This example doesn't go into much detail about what it is doing. It is here to show you how quickly you can start -running real workloads without having to learn much about the machinery that makes it happen. - -The rest of this section has a more elaborate example that exposes some of -the basic options you may want to adjust for your first serious test. - -**NOTE:** -If you want to see system-level metrics from your cluster, it is possible -to get these as well as Apache Cassandra level metrics by using the DSE -Metrics Collector (if using DSE), or by setting up a metrics feed to the -Prometheus instance in your local docker stack. You can find the DSE -Metrics Collector docs -[here](https://docs.datastax.com/en/monitoring/doc/monitoring/metricsCollector/mcExportMetricsDocker.html) -. diff --git a/engine-docs/src/main/resources/docs-for-nb/nosqlbench/getting_support.md b/engine-docs/src/main/resources/docs-for-nb/nosqlbench/getting_support.md deleted file mode 100644 index fe9ee2a1e..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/nosqlbench/getting_support.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Getting Support -weight: 10 ---- - -# Getting Support - -In general, our goals with NoSQLBench are to make the help systems and -examples wrap around the users like a suit of armor, so that they feel -capable of doing most things without having to ask for help. Please keep -this in mind when looking for personal support form our community, and -help us find those places where the docs are lacking. Maybe you can help -us by adding some missing docs! - -## Built-In Docs - -The documentation for NoSQLBench is quite thorough. On the command line, -you can see a list of built-in docs with the command: - - nb help topics - -To read any topic, simply use the command: - - nb help - -The documentation system you are looking at right now includes the same -docs you can find above already and more. They are automatically included -when NoSQLBench is built. - -## NoSQLBench Discord Server - -We have a discord server. This is where users and developers can discuss -anything about NoSQLBench and support each other. -Please [join us](https://discord.gg/dBHRakusMN) there if you are a new -user of NoSQLBench! - -## General Feedback - -These guidelines are mirrored at the -[Submitting Feedback](https://github.com/nosqlbench/nosqlbench/wiki/Submitting-Feedback) -wiki page at the nosqlbench project site, which is also where -any `[Submit Feedback]` links should will take you. - -## Bug Fixes - -If you think you have found a bug, please -[file a bug report](https://github.com/nosqlbench/nosqlbench/issues/new?labels=bug) -. nosqlbench is actively used within DataStax, and verified bugs will get -attention as resources permit. Bugs reports which are more detailed, or -bug reports which include steps to reproduce will get attention first. - -## Feature Requests - -If you would like to see something in nosqlbench that is not there -yet,please -[submit a feature request](https://github.com/nosqlbench/nosqlbench/issues/new?labels=feature) -. - -## Documentation Requests - -If you would like to see a specific nosqlbench or testing topic added to -the guidebook, please -[request docs content](https://github.com/nosqlbench/nosqlbench/issues/new?labels=docs) -. - diff --git a/engine-docs/src/main/resources/docs-for-nb/nosqlbench/index.md b/engine-docs/src/main/resources/docs-for-nb/nosqlbench/index.md deleted file mode 100644 index 53d20f4ea..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/nosqlbench/index.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: NoSQLBench Intro -weight: 0 ---- - -# Welcome to NoSQLBench - -Welcome to the documentation for NoSQLBench. This is a power tool that emulates real application workloads. This means -that you can fast-track performance, sizing and data model testing without writing your own testing harness. - -To get started right away, jump to the -[Quick Start Example](/index.html#/docs/getting_started) from the menu on the left. - -To see the ways you can get NoSQLBench, check out the project site -[DOWNLOADS.md](https://github.com/nosqlbench/nosqlbench/blob/master/DOWNLOADS.md). - -## What is NoSQLBench? - -NoSQLBench is a serious performance testing tool for the NoSQL ecosystem. - -**NoSQLBench brings advanced testing capabilities into one tool that are not found in other testing tools.** - -- You can run common testing workloads directly from the command line. You can start doing this within 5 minutes of - reading this. -- You can generate virtual data sets of arbitrary size, with deterministic data and statistically shaped values. -- You can design custom workloads that emulate your application, contained in a single file, based on statement - templates - no IDE or coding required. -- You can immediately plot your results in a docker and grafana stack on Linux with a single command line option. -- When needed, you can open the access panels and rewire the runtime behavior of NoSQLBench to do advanced testing, - including a full scripting environment with Javascript. - -The core machinery of NoSQLBench has been built with attention to detail. It has been battle tested within DataStax as a -way to help users validate their data models, baseline system performance, and qualify system designs for scale. - -In short, NoSQLBench wishes to be a programmable power tool for performance testing. However, it is somewhat generic. It -doesn't know directly about a particular type of system, or protocol. It simply provides a suitable machine harness in -which to put your drivers and testing logic. If you know how to build a client for a particular kind of system, EB will -let you load it like a plugin and control it dynamically. - -Initially, NoSQLBench comes with support for CQL, but we would like to see this expanded with contributions from others. - -## Origins - -The code in this project comes from multiple sources. The procedural data generation capability was known before as -'Virtual Data Set'. The core runtime and scripting harness was from the 'EngineBlock' project. The CQL support was -previously used within DataStax. In March of 2020, DataStax and the project maintainers for these projects decided to -put everything into one OSS project in order to make contributions and sharing easier for everyone. Thus, the new -project name and structure was launched as nosqlbench.io. NoSQLBench is an independent project that is primarily -sponsored by DataStax. - -We offer NoSQLBench as a new way of thinking about testing systems. It is not limited to testing only one type of -system. It is our wish to build a community of users and practice around this project so that everyone in the NoSQL -ecosystem can benefit from common concepts and understanding and reliable patterns of use. - -## Scalable User Experience - -NoSQLBench endeavors to be valuable to all users. We do this by making it easy for you, our user, to do just what you -need without worrying about the rest. If you need to do something simple, it should be simple to find the right settings -and just do it. If you need something more sophisticated, then you should be able to find what you need with a -reasonable amount of effort and no surprises. - -That is the core design principle behind NoSQLBench. We hope you like it. - diff --git a/engine-docs/src/main/resources/docs-for-nb/reference/activity_parameters.md b/engine-docs/src/main/resources/docs-for-nb/reference/activity_parameters.md deleted file mode 100644 index f2e9cb316..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/reference/activity_parameters.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: Activity Parameters -weight: 05 ---- - -# Activity Parameters - -Activity parameters are passed as named arguments for an activity, either -on the command line or via a scenario script. On the command line, these -take the form of - - = - -Some activity parameters are universal in that they can be used with any -driver type. These parameters are recognized by nosqlbench whether or not -they are recognized by a particular driver implementation. These are -called _core parameters_. Only core activity parameters are documented -here. - -**NOTE:** -To see what activity parameters are valid for a given activity type, see -the documentation for that activity type with -`nb help `. - -When starting out, you want to familiarize yourself with these parameters. -The most important ones to learn about first are driver, cycles and -threads. - -## driver - -For historic reasons, you can also use `type`. They both mean the same -thing for now, but `driver` is more descriptive. The `type` parameter will -continue to be supported in this major version (3.x), but it will be an -error to use it in 4.x and newer. - -- `driver=` -- _default_: inferred from `alias` or `yaml` parameters, or unset -- _required_: yes, unless inferred -- _dynamic_: no - -Every activity is powered by a named ActivityType. Thus, you must set -the `type` parameter. If you do not specify this parameter, it will be -inferred from a substring match against the alias and/or yaml parameters. -If there is more than one valid match for a valid type value, then you -must set the type parameter directly. - -Telling nosqlbench what type of an activity will be run also determines -what other parameters are considered valid and how they will be used. So -in this way, the type parameter is actually the base parameter for any -activity. When used with scenario commands like `run` or `start`, an -activity of the named type will be initialized, and then further activity -parameters on the command line will be used to configure it before it is -started. - -## alias - -- `alias=` -- _default_: inferred from yaml, or 'UNSET' -- _required_: no -- _dynamic_: no - -You *should* set the _alias_ parameter when you have multiple activities, -when you want to name metrics per-activity, or when you want to control -activities via scripting. - -Each activity can be given a symbolic name known as an _alias_. It is good -practice to give all your activities an alias, since this determines the -named used in logging, metrics, and even scripting control. - -_default value_ : The name of any provided YAML filename is used as the -basis for the default alias. Otherwise, the activity type name is used. -This is a convenience for simple test scenarios only. - -## threads - -- `threads=` -- _default_: 1 -- _required_: no -- _dynamic_: yes - -You *should* set the _threads_ parameter when you need to ramp up a -workload. - -Each activity can be created with a number of threads. It is important to -adjust this setting to the system types used by nosqlbench. - -_default value_ : For now, the default is simply *1*. Users must be aware -of this setting and adjust it to a reasonable value for their workloads. - -`threads=auto` : When you set `threads=auto`, it will set the number of -threads to 10x the number of cores in your system. There is no distinction -here between full cores and hardware threads. This is generally a -reasonable number of threads to tap into the procesing power of a client -system. - -`threads=__x` : When you set `threads=5x` or `threads=10x`, you will set -the number of threads to some multiplier of the logical CPUs in the local -system. - -**NOTE:** -The threads parameter will work slightly differently for activities using -the async parameter. For example, when `async=500` is provided, then the -number of async operations is split between all configured threads, and -each thread will juggle a number of in-flight operations asynchronously. -Without the async parameter, threads determines the logical concurrency -level of nosqlbench in the classic 'request-per-thread' mode. Neither mode -is strictly correct, and both modes can be used for more accurate testing -depending on the constraints of your environment. - -A good rule of thumb for setting threads for maximum effect is to set it -relatively high, such as 10XvCPU when running synchronous workloads -(when not providing the async parameter), and to 5XvCPU for all async -workloads. Variation in system dynamics make it difficult to peg an ideal -number, so experimentation is encouraged while you dial in your settings -initially. - -## cycles - -- `cycles=` -- `cycles=..` -- _default_: same as `stride` -- _required_: no -- _dynamic_: no - -The cycles parameter determines the starting and ending point for an -activity. It determines the range of values which will act as seed values -for each operation. For each cycle of the test, a statement is built from -a statement template and executed as an operation. - -If you do not set the cycles parameter, then it will automatically be set -to the size of the sequence. The sequence is simply the length of the op -sequence that is constructed from the active statements and ratios in your -activity YAML. - -You *should* set the cycles for every activity except for schema-like -activities, or activities which you run just as a sanity check of active -statements. - -In the `cycles=` version, the count indicates the total -number of cycles, and is equivalent to `cycles=0..`. In both -cases, the max value is not the actual number of the last cycle. This is -because all cycle parameters define a closed-open interval. In other -words, the minimum value is either zero by default or the specified -minimum value, but the maximum value is the first value *not* included in -the interval. This means that you can easily stack intervals over -subsequent runs while knowing that you will cover all logical cycles -without gaps or duplicates. For example, given `cycles=1000` and then -`cycles=1000..2000`, and then `cycles=2000..5K`, you know that all cycles -between 0 (inclusive) and 5000 (exclusive) have been specified. - -## stride - -- `stride=` -- _default_: same as op sequence length -- _required_: no -- _dynamic_: no - -Usually, you don't want to provide a setting for stride, but it is still -important to understand what it does. Within nosqlbench, each time a -thread needs to allocate a set of cycles to operate on, it takes a -contiguous range of values from a shared atomic value. Thus, the stride is -the unit of micro-batching within nosqlbench. It also means that you can -use stride to optimize a workload by setting the value higher than the -default. For example if you are running a single-statement workload at a -very high rate, it doesn't make sense for threads to allocate one op at a -time from a shared atomic value. You can simply set -`stride=1000` to cause (ballpark estimation) about 1000X less internal -contention. - -The stride is initialized to the calculated sequence length. The sequence -length is simply the number of operations in the op sequence that is -planned from your active statements and their ratios. - -You usually do not want to set the stride directly. If you do, make sure -it is a multiple of what it would normally be set to if you need to ensure -that sequences are not divided up differently. This can be important when -simulating the access patterns of applications. - -**NOTE:** -When simulating multi-op access patterns in non-async mode, the stride -metric can tell you how long it took for a whole group of operations to -complete. - -## async - -- `async=` -- _default_: unset -- _required_: no -- _dynamic_: no - -The `async=` parameter puts an activity into an asynchronous dispatch -mode and configures each thread to juggle a proportion of the operations -specified. If you specify `async=500 threads=10`, then each of 10 threads -will manage execution of 50 operations at a time. With async mode, a -thread will always prepare and send operations if there are fewer in -flight than it is allotted before servicing any pending responses. - -Async mode also puts threads into a different sequencing behavior. When in -async mode, responses from an operation may arrive in a different order -than they are sent, and thus linearized operations can't be guaranteed as -with the non-async mode. This means that sometimes you use want to avoid -async mode when you are intentionally simulating access patterns with -multiple linearized operations per user as you may see in your -application. - -The absence of the async parameter leaves the activity in the default -non-async mode, where each thread works through a sequence of ops one -operation at a time. - -## cyclerate - -- `cyclerate=` -- `cyclerate=,` -- _default_: unset -- _required_: no -- _dynamic_: yes - -The cyclerate parameter sets a maximum op rate for individual cycles -within the activity, across the whole activity, irrespective of how many -threads are active. - -**NOTE:** -The cyclerate is a rate limiter, and can thus only throttle an activity to -be slower than it would otherwise run. Rate limiting is also an invasive -element in a workload, and will always come at a cost. For extremely high -throughput testing, consider carefully whether your testing would benefit -more from concurrency-based throttling as with async or the striderate -described below. - -When the cyclerate parameter is provided, two additional metrics are -tracked: the wait time and the response time. See the 'Reference|Timing -Terms' section for more details on these metrics. - -_default_: None. When the cyclerate parameter is not provided, an activity -runs as fast as it can given how fast operations can complete. - -Examples: - -- `cyclerate=1000` - set the cycle rate limiter to 1000 ops/s and a - default burst ratio of 1.1. -- `cyclerate=1000,1.0` - same as above, but with burstrate set to 1.0 - (use it or lose it, not usually desired) -- `cyclerate=1000,1.5` - same as above, with burst rate set to 1.5 (aka - 50% burst allowed) - -Synonyms: - -- `rate` -- `targetrate` - -### burst ratio - -This is only an optional part of the cyclerate as shown in examples above. -If you do not specify it when you initialize a cyclerate, then it defaults -1.1. The burst ratio is only valid as part of a rate limit and can not be -specified by itself. - -* _default_: `1.1` -* _dynamic_: yes - -The nosqlbench rate limiter provides a sliding scale between strict rate -limiting and average rate limiting. The difference between them is -controlled by a _burst ratio_ parameter. When the burst ratio is 1.0 -(burst up to 100% relative rate), the rate limiter acts as a strict rate -limiter, disallowing faster operations from using time that was previously -forfeited by prior slower operations. This is a "use it or lose it" mode -that means things like GC events can steal throughput from a running -client as a necessary effect of losing time in a strict timing sense. - -When the burst ratio is set to higher than 1.0, faster operations may -recover lost time from previously slower operations. For example, a burst -ratio of 1.3 means that the rate limiter will allow bursting up to 130% of -the base rate, but only until the average rate is back to 100% relative -speed. This means that any valleys created in the actual op rate of the -client can be converted into plateaus of throughput above the strict rate, -but only at a speed that fits within (op rate * burst ratio). This allows -for workloads to approximate the average target rate over time, with -controllable bursting rates. This ability allows for near-strict behavior -while allowing clients to still track truer to rate limit expectations, so -long as the overall workload is not saturating resources. - -**NOTE:** -The default burst ratio of 1.1 makes testing results slightly more stable -on average, but can also hide some short-term slow-downs in system -throughput. It is set at the default to fit most tester's expectations for -averaging results, but it may not be strict enough for your testing -purposes. However, a strict setting of 1.0 nearly always adds cold/startup -time to the result, so if you are testing for steady state, be sure to -account for this across test runs. - -## striderate - -- `striderate=` -- `striderate=,` -- _default_: unset -- _required_: no -- _dynamic_: yes - -The `striderate` parameter allows you to limit the start of a stride -according to some rate. This works almost exactly like the cyclerate -parameter, except that it blocks a whole group of operations from starting -instead of a single operation. The striderate can use a burst ratio just -as the cyclerate. - -This sets the target rate for strides. In nosqlbench, a stride is a group -of operations that are dispatched and executed together within the same -thread. This is useful, for example, to emulate application behaviors in -which some outside request translates to multiple internal requests. It is -also a way to optimize a client runtime for more efficiency and -throughput. The stride rate limiter applies to the whole activity -irrespective of how many threads it has. - -**WARNING:** -When using the cyclerate an striderate options together, operations are -delayed based on both rate limiters. If the relative rates are not -synchronised with the side of a stride, then one rate limiter will -artificially throttle the other. Thus, it usually doesn't make sense to -use both of these settings in the same activity. - -## seq - -- `seq=` -- _default_: `seq=bucket` -- _required_: no -- _dynamic_: no - -The `seq=` parameter determines the type of -sequencing that will be used to plan the op sequence. The op sequence is a -look-up-table that is used for each stride to pick statement forms -according to the cycle offset. It is simply the sequence of statements -from your YAML that will be executed, but in a pre-planned, and highly -efficient form. - -An op sequence is planned for every activity. With the default ratio on -every statement as 1, and the default bucket scheme, the basic result is -that each active statement will occur once in the order specified. Once -you start adding ratios to statements, the most obvious thing that you -might expect wil happen: those statements will occur multiple times to -meet their ratio in the op mix. You can customize the op mix further by -changing the seq parameter to concat or interval. - -**NOTE:** -The op sequence is a look up table of statement templates, *not* -individual statements or operations. Thus, the cycle still determines the -uniqueness of an operation as you would expect. For example, if statement -form ABC occurs 3x per sequence because you set its ratio to 3, then each -of these would manifest as a distinct operation with fields determined by -distinct cycle values. - -There are three schemes to pick from: - -### bucket - -This is a round robin planner which draws operations from buckets in -circular fashion, removing each bucket as it is exhausted. For example, -the ratios A:4, B:2, C:1 would yield the sequence A B C A B A A. The -ratios A:1, B5 would yield the sequence A B B B B B. - -### concat - -This simply takes each statement template as it occurs in order and -duplicates it in place to achieve the ratio. The ratios above (A:4, B:2, -C:1) would yield the sequence A A A A B B C for the concat sequencer. - -### interval - -This is arguably the most complex sequencer. It takes each ratio as a -frequency over a unit interval of time, and apportions the associated -operation to occur evenly over that time. When two operations would be -assigned the same time, then the order of appearance establishes -precedence. In other words, statements appearing first win ties for the -same time slot. The ratios A:4 B:2 C:1 would yield the sequence A B C A A -B A. This occurs because, over the unit interval (0.0,1.0), A is assigned -the positions `A: 0.0, 0.25, 0.5, 0.75`, B is assigned the -positions `B: 0.0, 0.5`, and C is assigned position `C: 0.0`. These -offsets are all sorted with a position-stable sort, and then the -associated ops are taken as the order. - -In detail, the rendering appears -as `0.0(A), 0.0(B), 0.0(C), 0.25(A), 0.5(A), 0.5(B), 0.75(A)`, which -yields `A B C A A B A` as the op sequence. - -This sequencer is most useful when you want a stable ordering of operation -from a rich mix of statement types, where each operations is spaced as -evenly as possible over time, and where it is not important to control the -cycle-by-cycle sequencing of statements. - -## hdr_digits - -- `hdr_digits=3` -- _default_: `4` -- _required_: no -- _dynamic_: no - -This parameter determines the number of significant digits used in all HDR -histograms for metrics collected from this activity. The default of 4 -allows 4 significant digits, which means *up to* 10000 distinct histogram -buckets per named metric, per histogram interval. This does not mean that -there _will be_ 10000 distinct buckets, but it means there could be if -there is significant volume and variety in the measurements. - -If you are running a scenario that creates many activities, then you can -set `hdr_digits=1` on some of them to save client resources. diff --git a/engine-docs/src/main/resources/docs-for-nb/reference/advanced_metrics.md b/engine-docs/src/main/resources/docs-for-nb/reference/advanced_metrics.md deleted file mode 100644 index 83dbf3099..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/reference/advanced_metrics.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: Advanced Metrics ---- - -# Advanced Metrics - -## Unit of Measure - -All metrics collected from activities are recorded in nanoseconds and ops per second. All histograms are recorded with 4 -digits of precision using HDR histograms. - -## Metric Outputs - -Metrics from a scenario run can be gathered in multiple ways: - -- In the log output -- In CSV files -- In HDR histogram logs -- In Histogram Stats logs (CSV) -- To a monitoring system via graphite -- via the --docker-metrics option - -With the exception of the `--docker-metrics` approach, these forms may be combined and used in combination. The command -line options for enabling these are documented in the built-in help, although some examples of these may be found below. - -## Metrics via Graphite - -If you like to have all of your testing data in one place, then you may be interested in reporting your measurements to -a monitoring system. For this, nosqlbench includes a -[Metrics Library](https://github.com/dropwizard/metrics). Graphite reporting is baked in as the default reporter. - -In order to enable graphite reporting, use one of these options formats: - - --report-graphite-to - --report-graphite-to : - -## Metric Naming - -## Prefix - -Core metrics use the prefix _nosqlbench_ by default. You can override this with the ``--metrics-prefix` option: - - --metrics-prefix myclient.group5 - -## Identifiers - -Metrics associated with a specific activity will have the activity alias in their name. There is a set of core metrics -which are always present regardless of the activity type. The names and types of additional metrics provided for each -activity type vary. - -Sometimes, an activity type will expose metrics on a per statement basis, measuring over all invocations of a given -statement as defined in the YAML. In these cases, you will see `--` separating the name components of the metric. At the -most verbose, a metric name could take on the form like -`.------`, although this is rare when you name your statements, -which is recommended. Just keep in mind that the double dash connects an activity's alias with named statements *within* -that activity. - -## HDR Histograms - -### Recording HDR Histogram Logs - -You can record details of histograms from any compatible metric (histograms and timers) with an option like this: - - --log-histograms hdrdata.log - -If you want to record only certain metrics in this way, then use this form: - - --log-histograms 'hdrdata.log:.*suffix' - - -Notice that the option is enclosed in single quotes. This is because the second part of the option value is a regex. The -'.*suffix' pattern matches any metric name that ends with "suffix". Effectively, leaving out the pattern is the same as -using '.\*', which matches all metrics. Any valid regex is allowed here. - -Metrics may be included in multiple logs, but care should be taken not to overdo this. Keeping higher fidelity histogram -reservoirs does come with a cost, so be sure to be specific in what you record as much as possible. - -If you want to specify the recording interval, use this form: - - --log-histograms 'hdrdata.log:.*suffix:5s' - -If you want to specify the interval, you must use the third form above, although it is valid to leave the pattern empty, -such as 'hdrdata.log::5s'. - -Each interval specified will be tracked in a discrete reservoir in memory, so they will not interfere with each other in -terms of accuracy. - -### Recording HDR Histogram Stats - -You can also record basic snapshots of histogram data on a periodic interval just like above with HDR histogram logs. -The option to do this is: - - --log-histostats 'hdrstats.log:.*suffix:10s' - -Everything works the same as for hdr histogram logging, except that the format is in CSV as shown in the example below: - -~~~ -#logging stats for session scenario-1479089852022 -#[Histogram log format version 1.0] -#[StartTime: 1479089852.046 (seconds since epoch), Sun Nov 13 20:17:32 CST 2016] -#Tag,Interval_Start,Interval_Length,count,min,p25,p50,p75,p90,p95,p98,p99,p999,p9999,max -Tag=diag1.delay,0.457,0.044,1,16,31,31,31,31,31,31,31,31,31,31 -Tag=diag1.cycles,0.48,0.021,31,4096,8191,8191,8191,8191,8191,8191,8191,8191,8191,2097151 -Tag=diag1.delay,0.501,0.499,1,1,1,1,1,1,1,1,1,1,1,1 -Tag=diag1.cycles,0.501,0.499,498,1024,2047,2047,4095,4095,4095,4095,4095,4095,4095,4194303 -... -~~~ - -This includes the metric name (Tag), the interval start time and length (from the beginning of collection time), number -of metrics recorded (count), minimum magnitude, a number of percentile measurements, and the maximum value. Notice that -the format used is similar to that of the HDR logging, although instead of including the raw histogram data, common -percentiles are recorded directly. - diff --git a/engine-docs/src/main/resources/docs-for-nb/reference/cli_scripting.md b/engine-docs/src/main/resources/docs-for-nb/reference/cli_scripting.md deleted file mode 100644 index 7448e3a95..000000000 --- a/engine-docs/src/main/resources/docs-for-nb/reference/cli_scripting.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: CLI Scripting ---- - -# Command-Line Scripting - -Sometimes you want to to run a set of workloads in a particular order, or call other specific test setup logic in -between phases or workloads. While the full scripting environment allows you to do this and more, it is not necessary to -write javascript for every scenario. - -For more basic setup and sequencing needs, you can achive a fair degree of flexibility on the command line. A few key -API calls are supported directly on the command line. This guide explains each of them, what the do, and how to use them -together. - -## Script Construction - -As the command line is parsed, from left to right, the scenario script is built in an internal scripting buffer. Once -the command line is fully parsed, this script is executed. Each of the commands below is effectively a macro for a -snippet of script. It is important to remember that order is important. - -## Command line format - -Newlines are not allowed when building scripts from the command line. As long as you follow the allowed forms below, you -can simply string multiple commands together with spaces between. As usual, single word options without double dashes -are commands, key=value style parameters apply to the previous command, and all other commands with - - --this-style - -are non-scripting options. - -## Concurrency & Control - -All activities that run during a scenario run under the control of, but independently from the scenario script. This -means that you can have a number of activities running while the scenario script is doing its own thing. The scenario -only completes when both the scenario script and the activities are finished. - -### start an activity - -`start driver= alias= ...` - -You can start an activity with this command. At the time this command is evaluated, the activity is started, and the -script continues without blocking. This is an asynchronous start of an activity. If you start multiple activities in -this way, they will run concurrently. - -The type argument is required to identify the activity type to run. The alias parameter is not strictly required, unless -you want to be able to interact with the started activity later. In any case, it is a good idea to name all your -activities with a meaningful alias. - -### stop an activity - -`stop ` - -Stop an activity with the given alias. This is synchronous, and causes the scenario to pause until the activity is -stopped. This means that all threads for the activity have completed and signalled that they're in a stopped state. - -You can stop a single activity alias, or any which match a regex pattern. If the value provided -contains spaces, semicolons or commas, then it is split into words and each word is used as if -the stop command had been called on it in turn. - -If the alias name you supply follows matches the regular expression `[a-zA-Z_][a-zA-Z0-9_.]*`, i.e. an alphanumeric -name with dots or underscores and no leading digit, then it is taken as a literal name and only -matches the same literal alias. However, if there are any other characters, it is presumed to be -a regular expression and matched as such against all currently running activities. - -### await an activity - -`await ` - -Await the normal completion of an activity with the given alias. This causes the scenario script to pause while it waits -for the named activity to finish. This does not tell the activity to stop. It simply puts the scenario script into a -paused state until the named activity is complete. - -### run an activity - -`run driver= alias= ...` - -Run an activity to completion, waiting until it is complete before continuing with the scenario script. It is -effectively the same as - - start driver= ... alias= - await - -### wait millis - -`waitmillis ` - -Pause the scenario script for this many milliseconds. This is useful for controlling workload run duration, etc. - -### add a script - -`script