checkpoint: paths and packages cleanup, tri-state filtering on keyspaces, more efficient init, LOCAL_QUORUM

This commit is contained in:
Jonathan Shook 2022-07-19 01:41:23 -05:00
parent e79b09ace7
commit 1d1c96f9bf
66 changed files with 1410 additions and 845 deletions

View File

@ -104,13 +104,13 @@
<artifactId>antlr4-maven-plugin</artifactId>
<version>4.10.1</version>
<configuration>
<sourceDirectory>src/main/java/io/nosqlbench/converters/cql/grammars
<sourceDirectory>src/main/java/io/nosqlbench/cqlgen/grammars
</sourceDirectory>
<arguments>
<argument>-package</argument>
<argument>io.nosqlbench.converters.cql.generated</argument>
<argument>io.nosqlbench.cqlgen.generated</argument>
</arguments>
<outputDirectory>src/main/java/io/nosqlbench/converters/cql/generated
<outputDirectory>src/main/java/io/nosqlbench/cqlgen/generated
</outputDirectory>
</configuration>
<executions>
@ -130,7 +130,7 @@
<configuration>
<filesets>
<fileset>
<directory>src/main/java/io/nosqlbench/converters/cql/generated
<directory>src/main/java/io/nosqlbench/cqlgen/generated
</directory>
<includes>
<include>**/*.java</include>

View File

@ -290,6 +290,7 @@ public class Cqld4Space {
.add(Param.optional("password", String.class, "password (see also passfile)"))
.add(Param.optional("passfile", String.class, "file to load the password from"))
.add(Param.optional("whitelist", String.class, "list of whitelist hosts addresses"))
.add(Param.optional("showstmt", Boolean.class, "show the contents of the statement in the log"))
.add(Param.optional("cloud_proxy_address", String.class, "Cloud Proxy Address"))
.add(SSLKsFactory.get().getConfigModel())
.add(getDriverOptionsModel())

View File

@ -16,10 +16,11 @@
package io.nosqlbench.adapter.cqld4.opdispensers;
import com.datastax.dse.driver.api.core.graph.FluentGraphStatement;
import com.datastax.oss.driver.api.core.CqlSession;
import com.datastax.oss.driver.api.core.DefaultConsistencyLevel;
import com.datastax.oss.driver.api.core.config.DriverExecutionProfile;
import com.datastax.oss.driver.api.core.cql.Statement;
import com.datastax.oss.driver.api.core.cql.*;
import com.datastax.oss.driver.api.core.metadata.Node;
import com.datastax.oss.driver.api.core.metadata.token.Token;
import io.nosqlbench.adapter.cqld4.Cqld4OpMetrics;
@ -27,24 +28,28 @@ import io.nosqlbench.adapter.cqld4.optypes.Cqld4CqlOp;
import io.nosqlbench.engine.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.engine.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.engine.api.templating.ParsedOp;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.Map;
import java.util.function.LongFunction;
public abstract class BaseCqlStmtDispenser extends BaseOpDispenser<Cqld4CqlOp> {
public abstract class Cqld4BaseOpDispenser extends BaseOpDispenser<Cqld4CqlOp> {
private final static Logger logger = LogManager.getLogger("CQLD4");
private final int maxpages;
private final Cqld4OpMetrics metrics = new Cqld4OpMetrics();
private final LongFunction<CqlSession> sessionFunc;
private final boolean isRetryReplace;
public BaseCqlStmtDispenser(DriverAdapter adapter, LongFunction<CqlSession> sessionFunc, ParsedOp op) {
public Cqld4BaseOpDispenser(DriverAdapter adapter, LongFunction<CqlSession> sessionFunc, ParsedOp op) {
super(adapter, op);
this.sessionFunc = sessionFunc;
this.maxpages = op.getStaticConfigOr("maxpages",1);
this.isRetryReplace = op.getStaticConfigOr("retryreplace",false);
this.maxpages = op.getStaticConfigOr("maxpages", 1);
this.isRetryReplace = op.getStaticConfigOr("retryreplace", false);
}
public int getMaxPages() {
@ -71,13 +76,14 @@ public abstract class BaseCqlStmtDispenser extends BaseOpDispenser<Cqld4CqlOp> {
* initialized within each dispenser, not for each time dispensing occurs.
*/
protected LongFunction<Statement> getEnhancedStmtFunc(LongFunction<Statement> basefunc, ParsedOp op) {
LongFunction<Statement> partial = basefunc;
partial = op.enhanceEnum(partial, "cl", DefaultConsistencyLevel.class, Statement::setConsistencyLevel);
partial = op.enhanceEnum(partial, "consistency_level", DefaultConsistencyLevel.class, Statement::setConsistencyLevel);
partial = op.enhanceEnum(partial, "scl", DefaultConsistencyLevel.class, Statement::setSerialConsistencyLevel);
partial = op.enhanceEnum(partial, "serial_consistency_level", DefaultConsistencyLevel.class, Statement::setSerialConsistencyLevel);
partial = op.enhanceFuncOptionally(partial, "idempotent", Boolean.class, Statement::setIdempotent);
partial = op.enhanceFuncOptionally(partial, "timeout", double.class, (statement, l) -> statement.setTimeout(Duration.ofMillis((long)(l*1000L))));
partial = op.enhanceFuncOptionally(partial, "timeout", double.class, (statement, l) -> statement.setTimeout(Duration.ofMillis((long) (l * 1000L))));
partial = op.enhanceFuncOptionally(partial, "custom_payload", Map.class, Statement::setCustomPayload);
partial = op.enhanceFuncOptionally(partial, "execution_profile", DriverExecutionProfile.class, Statement::setExecutionProfile);
partial = op.enhanceFuncOptionally(partial, "execution_profile_name", String.class, Statement::setExecutionProfileName);
@ -89,9 +95,38 @@ public abstract class BaseCqlStmtDispenser extends BaseOpDispenser<Cqld4CqlOp> {
partial = op.enhanceFuncOptionally(partial, "routing_keys", ByteBuffer[].class, Statement::setRoutingKey);
partial = op.enhanceFuncOptionally(partial, "routing_token", Token.class, Statement::setRoutingToken);
partial = op.enhanceFuncOptionally(partial, "tracing", boolean.class, Statement::setTracing);
partial = op.enhanceFuncOptionally(partial, "showstmt", boolean.class, this::showstmt);
return partial;
}
private Statement showstmt(Statement stmt, boolean input) {
String query = cqlFor(stmt, new StringBuilder());
logger.info("CQL(SIMPLE): " + query);
return stmt;
}
private String cqlFor(Statement stmt, StringBuilder sb) {
if (stmt instanceof SimpleStatement ss) {
sb.append("(SIMPLE):" + ss.getQuery());
} else if (stmt instanceof BoundStatement bs) {
sb.append("(BOUND+" + bs.getValues().size() + " values): " + bs.getPreparedStatement().getQuery());
} else if (stmt instanceof FluentGraphStatement fgs) {
sb.append("(FLUENT): non-printable");
} else if (stmt instanceof BatchStatement bs) {
for (BatchableStatement<?> batchable : bs) {
if (sb.length() < 1024) {
cqlFor(bs, sb);
} else {
sb.append(("(statement too large to show)"));
break;
}
}
} else {
sb.append("Unknown statement type for extraction (showstmt):" + stmt.getClass().getSimpleName());
}
return sb.toString();
}
}

View File

@ -32,7 +32,7 @@ import org.apache.logging.log4j.Logger;
import java.util.function.LongFunction;
public class Cqld4PreparedStmtDispenser extends BaseCqlStmtDispenser {
public class Cqld4PreparedStmtDispenser extends Cqld4BaseOpDispenser {
private final static Logger logger = LogManager.getLogger(Cqld4PreparedStmtDispenser.class);
private final RSProcessors processors;

View File

@ -27,7 +27,7 @@ import io.nosqlbench.engine.api.templating.ParsedOp;
import java.util.function.LongFunction;
public class Cqld4RawStmtDispenser extends BaseCqlStmtDispenser {
public class Cqld4RawStmtDispenser extends Cqld4BaseOpDispenser {
private final LongFunction<Statement> stmtFunc;
private final LongFunction<String> targetFunction;

View File

@ -25,7 +25,7 @@ import io.nosqlbench.engine.api.templating.ParsedOp;
import java.util.function.LongFunction;
public class Cqld4SimpleCqlStmtDispenser extends BaseCqlStmtDispenser {
public class Cqld4SimpleCqlStmtDispenser extends Cqld4BaseOpDispenser {
private final LongFunction<Statement> stmtFunc;
private final LongFunction<String> targetFunction;

View File

@ -32,11 +32,8 @@ import java.util.Map;
// TODO: add statement filtering
// TODO: add statement pre and post processing for trace capture and start timer op
// TODO: add statement post processing for trace capture
// TODO: add trace capture
// TODO: add start timer op
// TODO: add stop timer op
// TODO: add showcql equivalent
// TODO: add/document max tries exhausted exception
// TODO: add/document UnexpectedPagingException
// TODO: add/document chnge unapplied exception

View File

@ -1,63 +0,0 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlKeyspace;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
public class CGKeyspaceFilter implements CGModelTransformer, CGTransformerConfigurable {
private List<Pattern> patterns = List.of(Pattern.compile(".*"));
private final static Logger logger = LogManager.getLogger(CGKeyspaceFilter.class);
@Override
public CqlModel apply(CqlModel model) {
for (CqlKeyspace keyspace : model.getKeyspaceDefs()) {
boolean included = false;
for (Pattern pattern : patterns) {
if (pattern.matcher(keyspace.getName()).matches()) {
included=true;
break;
}
}
if (!included) {
logger.info("removing keyspaces, tables and types for non-included keyspace '" + keyspace.getName() +"'");
model.removeKeyspaceDef(keyspace.getName());
model.removeTablesForKeyspace(keyspace.getName());
model.removeTypesForKeyspace(keyspace.getName());
} else {
logger.info("including keyspace '" + keyspace.getName()+"'");
}
}
return model;
}
@Override
public void accept(Map<String, ?> cfgmap) {
List<String> includes = (List<String>) cfgmap.get("include");
this.patterns = includes.stream()
.map(Pattern::compile)
.toList();
}
}

View File

@ -1,61 +0,0 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CGRegexReplacer implements Function<String,String>, CGTransformerConfigurable {
@Override
public String apply(String s) {
return null;
}
@Override
public void accept(Map<String, ?> stringMap) {
List<List<String>> replacers = (List<List<String>>) stringMap.get("replacers");
}
private final static class Replacer implements Function<String,String>{
private final Pattern pattern;
private final String replacement;
Replacer(String from, String to) {
this.pattern = Pattern.compile(from);
this.replacement = to;
}
@Override
public String apply(String s) {
Matcher matcher = pattern.matcher(s);
StringBuilder sb = new StringBuilder();
while (matcher.find()) {
matcher.appendReplacement(sb,replacement);
}
matcher.appendTail(sb);
return sb.toString();
}
}
}

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.cqlgen.exporter;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import io.nosqlbench.cqlgen.exporter.binders.Binding;
import io.nosqlbench.cqlgen.exporter.binders.BindingsAccumulator;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class CGColumnRebinder {
private final static Logger logger = LogManager.getLogger(CGColumnRebinder.class);
private final BindingsAccumulator accumulator;
private final double partitionMultiplier;
private final int quantizerDigits;
CGColumnRebinder(BindingsAccumulator accumulator, double partitionMultipler, int quantizerDigits) {
this.accumulator = accumulator;
this.partitionMultiplier = partitionMultipler;
this.quantizerDigits = quantizerDigits;
}
public Binding forColumn(CqlColumnDef cdef) {
if (cdef.isLastPartitionKey()) {
return dividedBinding(cdef);
} else {
return accumulator.forColumn(cdef);
}
}
private Binding dividedBinding(CqlColumnDef column) {
CGTableStats stats = column.getTable().getTableAttributes();
if (stats == null) {
return accumulator.forColumn(column);
}
String partitionsSpec = stats.getAttribute("Number of partitions (estimate)");
if (partitionsSpec == null) {
}
double estimatedPartitions = Double.parseDouble(partitionsSpec);
long modulo = (long) (estimatedPartitions *= partitionMultiplier);
if (modulo == 0) {
return accumulator.forColumn(column);
}
modulo = quantizeModuloByMagnitude(modulo, 1);
logger.debug("Set partition modulo for " + column.getFullName() + " to " + modulo);
Binding binding = accumulator.forColumn(column, "Mod(" + modulo + "L); ");
return binding;
}
public static long quantizeModuloByMagnitude(long modulo, int significand) {
double initial = modulo;
double log10 = Math.log10(initial);
int zeroes = (int) log10;
zeroes = Math.max(1, zeroes - (significand - 1));
long fractional = (long) Math.pow(10, zeroes);
long partial = ((long) initial / fractional) * fractional;
long nextPartial = partial + fractional;
if (Math.abs(initial - partial) <= Math.abs(initial - nextPartial)) {
return partial;
} else {
return nextPartial;
}
}
}

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.converters.cql.exporters.binders.Binding;
import io.nosqlbench.converters.cql.exporters.binders.BindingsLibrary;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import io.nosqlbench.cqlgen.exporter.binders.Binding;
import io.nosqlbench.cqlgen.exporter.binders.BindingsLibrary;
import io.nosqlbench.engine.api.activityconfig.StatementsLoader;
import io.nosqlbench.engine.api.activityconfig.yaml.StmtsDocList;
import io.nosqlbench.api.content.Content;
@ -32,22 +32,23 @@ import java.util.Optional;
public class CGDefaultCqlBindings implements BindingsLibrary {
private final Map<String, String> bindings;
private final static String exporterCfgDir = "cqlgen";
private final static String bindingsFileName = "bindings-cqlgen.yaml";
public CGDefaultCqlBindings() {
String yamlContent = NBIO.all()
.name("bindings")
.extension("yaml", "yml")
.name(bindingsFileName)
.first()
.map(Content::asString)
.or(() -> loadLocal("bindings.yaml"))
.orElseThrow(() -> new RuntimeException("Unable to load bindings.yaml"));
.or(() -> loadLocal(bindingsFileName))
.orElseThrow(() -> new RuntimeException("Unable to load " + bindingsFileName + ", from local dir or internally as cqlgen/" + bindingsFileName));
StmtsDocList stmtsDocs = StatementsLoader.loadString(yamlContent, Map.of());
this.bindings = stmtsDocs.getDocBindings();
}
private Optional<String> loadLocal(String path) {
try {
String resourceName = getClass().getPackageName().replaceAll("\\.", File.separator)+File.separator+path;
String resourceName = exporterCfgDir + File.separator + path;
InputStream stream = getClass().getClassLoader().getResourceAsStream(resourceName);
byte[] bytes = stream.readAllBytes();
return Optional.of(new String(bytes));

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import io.nosqlbench.api.labels.Labeled;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import java.util.HashMap;
import java.util.Map;

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import java.util.Locale;
import java.util.Optional;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import java.util.HashMap;
import java.util.Map;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import java.util.HashMap;
import java.util.Map;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import java.util.function.Function;

View File

@ -14,10 +14,10 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import io.nosqlbench.converters.cql.exporters.transformers.CGNameObfuscator;
import io.nosqlbench.converters.cql.exporters.transformers.CGTransformerConfigurable;
import io.nosqlbench.cqlgen.exporter.transformers.CGNameObfuscator;
import io.nosqlbench.cqlgen.exporter.transformers.CGTransformerConfigurable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -71,9 +71,9 @@ public class CGTextTransformers implements Consumer<List<Map<String, ?>>>, Suppl
// Configure Transformer IFF ...
if (transformer instanceof CGTransformerConfigurable configurable) {
Object cfgvalues = cfgmap.get("config");
if (cfgvalues instanceof Map txconfigmap) {
configurable.accept((txconfigmap));
logger.info("configured transformer with " + txconfigmap);
if (cfgvalues != null) {
configurable.accept((cfgvalues));
logger.info("configured transformer with " + cfgvalues);
}
}

View File

@ -14,14 +14,17 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters;
package io.nosqlbench.cqlgen.exporter;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import io.nosqlbench.converters.cql.cqlast.*;
import io.nosqlbench.converters.cql.exporters.binders.*;
import io.nosqlbench.converters.cql.exporters.transformers.CGModelTransformers;
import io.nosqlbench.converters.cql.parser.CqlModelParser;
import io.nosqlbench.cqlgen.exporter.binders.BindingsAccumulator;
import io.nosqlbench.cqlgen.exporter.binders.BindingsLibrary;
import io.nosqlbench.cqlgen.exporter.transformers.CGModelTransformers;
import io.nosqlbench.cqlgen.parser.CqlModelParser;
import io.nosqlbench.cqlgen.exporter.binders.Binding;
import io.nosqlbench.cqlgen.exporter.binders.NamingFolio;
import io.nosqlbench.cqlgen.model.*;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.snakeyaml.engine.v2.api.Dump;
@ -50,18 +53,21 @@ import java.util.stream.Collectors;
*/
public class CGWorkloadExporter {
private final static Logger logger = LogManager.getLogger(CGWorkloadExporter.class);
private final BindingsLibrary defaultBindings = new CGDefaultCqlBindings();
private CGColumnRebinder binder;
private NamingFolio namer;
private BindingsAccumulator bindings = new BindingsAccumulator(namer, List.of(defaultBindings));
private final CqlModel model;
private final Map<String, String> bindingsMap = new LinkedHashMap<>();
private final int DEFAULT_RESOLUTION = 10000;
String replication;
String namingTemplate;
private List<String> includedKeyspaces;
// TODO: move this to a transformer
private String replication;
// TODO: Move these to a config object
private String namingTemplate;
private double partitionMultiplier;
private int quantizerDigits;
private Map<String, List<String>> blockplan = Map.of();
private final Map<String, Double> timeouts = new HashMap<String, Double>(Map.of(
"create", 60.0,
"truncate", 900.0,
@ -72,8 +78,6 @@ public class CGWorkloadExporter {
"delete", 10.0,
"update", 10.0
));
private boolean elideUnusedTables;
private Map<String, List<String>> blockplan = Map.of();
public CGWorkloadExporter(CqlModel model, CGModelTransformers transformers) {
this.model = model;
@ -138,7 +142,7 @@ public class CGWorkloadExporter {
}
Yaml yaml = new Yaml();
Path cfgpath = Path.of("exporter.yaml");
Path cfgpath = Path.of("cqlgen.conf");
CGWorkloadExporter exporter;
if (Files.exists(cfgpath)) {
@ -177,9 +181,9 @@ public class CGWorkloadExporter {
exporter.setPartitionMultiplier(Double.parseDouble(partition_multipler));
exporter.configureTimeouts(cfgmap.get("timeouts"));
exporter.configureElideUnusedTables(cfgmap.get("elide_unused_tables"));
exporter.configureBlocks(cfgmap.get("blockplan"));
exporter.configureQuantizerDigits(cfgmap.get("quantizer_digits"));
String workload = exporter.getWorkloadAsYaml();
try {
@ -199,15 +203,23 @@ public class CGWorkloadExporter {
}
}
private void configureQuantizerDigits(Object quantizer_digits) {
if (quantizer_digits != null) {
this.quantizerDigits = Integer.parseInt(quantizer_digits.toString());
}
}
public Map<String, Object> generateBlocks() {
namer.informNamerOfAllKnownNames(model);
Map<String, Object> workload = new LinkedHashMap<>();
workload.put("description", "Auto-generated workload from source schema.");
workload.put("scenarios", genScenarios(model));
workload.put("bindings", bindingsMap);
workload.put("bindings", new LinkedHashMap<String, String>());
Map<String, Object> blocks = new LinkedHashMap<>();
workload.put("blocks", blocks);
workload.put("params", new LinkedHashMap<>(
Map.of("cl", "LOCAL_QUORUM")
));
for (Map.Entry<String, List<String>> blocknameAndComponents : blockplan.entrySet()) {
String blockname = blocknameAndComponents.getKey();
@ -236,7 +248,6 @@ public class CGWorkloadExporter {
simplifyTimeouts(block);
blocks.put(blockname, block);
}
bindingsMap.putAll(bindings.getAccumulatedBindings());
return workload;
}
@ -276,14 +287,6 @@ public class CGWorkloadExporter {
}
}
private void configureElideUnusedTables(Object elide_unused_tables) {
if (elide_unused_tables == null) {
this.elideUnusedTables = false;
} else {
this.elideUnusedTables = Boolean.parseBoolean(elide_unused_tables.toString());
}
}
public void configureTimeouts(Object spec) {
if (spec instanceof Map specmap) {
for (Object key : specmap.keySet()) {
@ -309,8 +312,6 @@ public class CGWorkloadExporter {
public void setNamingTemplate(String namingTemplate) {
this.namingTemplate = namingTemplate;
this.namer = new NamingFolio(namingTemplate);
this.bindings = new BindingsAccumulator(namer, List.of(defaultBindings));
}
private LinkedHashMap<String, Object> genScenarios(CqlModel model) {
@ -340,6 +341,9 @@ public class CGWorkloadExporter {
Map<String, Object> ops = new LinkedHashMap<>();
blockdata.put("ops", ops);
for (CqlTable table : model.getTableDefs()) {
if (table.getClusteringColumns().size() == 0) {
logger.debug("skipping table " + table.getFullName() + " for scan since there are no clustering columns");
}
ops.put(
namer.nameFor(table, "optype", "scan", "blockname", blockname),
Map.of(
@ -437,49 +441,10 @@ public class CGWorkloadExporter {
.replaceAll("BINDINGS",
String.join(", ",
table.getColumnDefinitions().stream()
.map(cdef -> {
if (cdef.isLastPartitionKey()) {
return dividedBinding(cdef, table);
} else {
return bindings.forColumn(cdef);
}
})
.map(binder::forColumn)
.map(c -> "{" + c.getName() + "}").toList()));
}
private Binding dividedBinding(CqlColumnDef columnDef, CqlTable tableDef) {
CGTableStats stats = tableDef.getTableAttributes();
if (stats == null) {
return bindings.forColumn(columnDef);
}
String partitionsSpec = stats.getAttribute("Number of partitions (estimate)");
if (partitionsSpec == null) {
}
double estimatedPartitions = Double.parseDouble(partitionsSpec);
long modulo = (long) (estimatedPartitions *= partitionMultiplier);
if (modulo == 0) {
return bindings.forColumn(columnDef);
}
modulo = quantizeModuloByMagnitude(modulo, 1);
logger.debug("Set partition modulo for " + tableDef.getFullName() + " to " + modulo);
Binding binding = bindings.forColumn(columnDef, "Mod(" + modulo + "L); ");
return binding;
}
public static long quantizeModuloByMagnitude(long modulo, int significand) {
double initial = modulo;
double log10 = Math.log10(initial);
int zeroes = (int) log10;
zeroes = Math.max(1, zeroes - (significand - 1));
long fractional = (long) Math.pow(10, zeroes);
long partial = ((long) initial / fractional) * fractional;
long nextPartial = partial + fractional;
if (Math.abs(initial - partial) <= Math.abs(initial - nextPartial)) {
return partial;
} else {
return nextPartial;
}
}
private Map<String, Object> genUpdateOpTemplates(CqlModel model, String blockname) {
Map<String, Object> blockdata = new LinkedHashMap<>();
@ -579,8 +544,7 @@ public class CGWorkloadExporter {
private String genPredicatePart(CqlColumnDef def) {
String typeName = def.getTrimmedTypedef();
Binding binding = bindings.forColumn(def);
Binding binding = binder.forColumn(def);
return def.getName() + "={" + binding.getName() + "}";
}
@ -601,11 +565,11 @@ public class CGWorkloadExporter {
for (CqlColumnDef coldef : table.getNonKeyColumnDefinitions()) {
if (coldef.isCounter()) {
sb.append(coldef.getName()).append("=")
.append(coldef.getName()).append("+").append("{").append(bindings.forColumn(coldef).getName()).append("}")
.append(coldef.getName()).append("+").append("{").append(binder.forColumn(coldef).getName()).append("}")
.append(", ");
} else {
sb.append(coldef.getName()).append("=")
.append("{").append(bindings.forColumn(coldef).getName()).append("}")
.append("{").append(binder.forColumn(coldef).getName()).append("}")
.append(", ");
}
}
@ -617,6 +581,19 @@ public class CGWorkloadExporter {
public String getWorkloadAsYaml() {
if (model.isEmpty()) {
throw new RuntimeException("Can't build a workload yaml with no elements to process. The parsed model is empty. Did you filter everything out?");
}
this.namer = new NamingFolio(this.namingTemplate);
BindingsLibrary defaultBindings = new CGDefaultCqlBindings();
BindingsAccumulator bindingslib = new BindingsAccumulator(namer, List.of(defaultBindings));
this.binder = new CGColumnRebinder(bindingslib, 10, 1);
namer.informNamerOfAllKnownNames(model);
Map<String, Object> workload = generateBlocks();
((Map<String, String>) workload.get("bindings")).putAll(bindingslib.getAccumulatedBindings());
DumpSettings dumpSettings = DumpSettings.builder()
.setDefaultFlowStyle(FlowStyle.BLOCK)
.setIndent(2)
@ -631,7 +608,7 @@ public class CGWorkloadExporter {
BaseRepresenter r;
Dump dump = new Dump(dumpSettings);
Map<String, Object> workload = generateBlocks();
return dump.dumpToString(workload);
}
@ -667,7 +644,7 @@ public class CGWorkloadExporter {
Map<String, Object> dropTypesBlock = new LinkedHashMap<>();
Map<String, Object> ops = new LinkedHashMap<>();
dropTypesBlock.put("ops", ops);
for (CqlType type : model.getTypes()) {
for (CqlType type : model.getTypeDefs()) {
ops.put(
namer.nameFor(type, "optype", "drop-type", "blockname", blockname),
Map.of(
@ -683,7 +660,7 @@ public class CGWorkloadExporter {
Map<String, Object> dropTypesBlock = new LinkedHashMap<>();
Map<String, Object> ops = new LinkedHashMap<>();
dropTypesBlock.put("ops", ops);
for (CqlType type : model.getTypes()) {
for (CqlType type : model.getTypeDefs()) {
ops.put(
namer.nameFor(type, "optype", "drop-keyspace", "blockname", blockname),
Map.of(
@ -736,8 +713,8 @@ public class CGWorkloadExporter {
Map<String, Object> ops = new LinkedHashMap<>();
blockdata.put("ops", ops);
for (String keyspace : model.getTypesByKeyspaceAndName().keySet()) {
for (CqlType type : model.getTypesByKeyspaceAndName().get(keyspace).values()) {
for (String keyspace : model.getTypesByKeyspaceThenName().keySet()) {
for (CqlType type : model.getTypesByKeyspaceThenName().get(keyspace).values()) {
ops.put(
namer.nameFor(type, "optype", "create", "blockname", blockname),
Map.of(
@ -766,12 +743,8 @@ public class CGWorkloadExporter {
Map<String, Object> schemablock = new LinkedHashMap<>();
Map<String, Object> ops = new LinkedHashMap<>();
for (String ksname : model.getTablesByNameByKeyspace().keySet()) {
for (CqlTable cqltable : model.getTablesByNameByKeyspace().get(ksname).values()) {
if (elideUnusedTables && totalRatioFor(cqltable) == 0.0d) {
logger.info("eliding table " + ksname + "." + cqltable.getName() + " since its total op ratio was " + totalRatioFor(cqltable));
continue;
}
for (String ksname : model.getTableDefsByKeyspaceThenTable().keySet()) {
for (CqlTable cqltable : model.getTableDefsByKeyspaceThenTable().get(ksname).values()) {
ops.put(
namer.nameFor(cqltable, "optype", "create", "blockname", blockname),
Map.of(

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.binders;
package io.nosqlbench.cqlgen.exporter.binders;
public class Binding {
String name;

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.binders;
package io.nosqlbench.cqlgen.exporter.binders;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.binders;
package io.nosqlbench.cqlgen.exporter.binders;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import java.util.Optional;

View File

@ -14,12 +14,12 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.binders;
package io.nosqlbench.cqlgen.exporter.binders;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.converters.cql.cqlast.CqlTable;
import io.nosqlbench.converters.cql.exporters.CGElementNamer;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import io.nosqlbench.cqlgen.model.CqlModel;
import io.nosqlbench.cqlgen.model.CqlTable;
import io.nosqlbench.cqlgen.exporter.CGElementNamer;
import io.nosqlbench.api.labels.Labeled;
import java.util.*;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.binders;
package io.nosqlbench.cqlgen.exporter.binders;
public enum NamingStyle {
/**

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.binders;
package io.nosqlbench.cqlgen.exporter.binders;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
public class UnresolvedBindingException extends RuntimeException {
private final CqlColumnDef def;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.api.config.NBNamedElement;
import io.nosqlbench.api.labels.Labeled;

View File

@ -14,10 +14,10 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.converters.cql.exporters.CGSchemaStats;
import io.nosqlbench.cqlgen.exporter.CGSchemaStats;
import io.nosqlbench.cqlgen.model.CqlModel;
import java.io.IOException;
import java.nio.file.Path;
@ -38,21 +38,25 @@ public class CGGenStatsInjector implements CGModelTransformer, CGTransformerConf
}
@Override
public void accept(Map<String, ?> config) {
String histogramPath = config.get("path").toString();
if (histogramPath != null) {
CGSchemaStats schemaStats = null;
Path statspath = Path.of(histogramPath);
try {
CqlSchemaStatsParser parser = new CqlSchemaStatsParser();
schemaStats = parser.parse(statspath);
this.schemaStats = schemaStats;
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
public void accept(Object configObject) {
if (configObject instanceof Map config) {
} else schemaStats = null;
String histogramPath = config.get("path").toString();
if (histogramPath != null) {
CGSchemaStats schemaStats = null;
Path statspath = Path.of(histogramPath);
try {
CqlSchemaStatsParser parser = new CqlSchemaStatsParser();
schemaStats = parser.parse(statspath);
this.schemaStats = schemaStats;
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
} else schemaStats = null;
} else {
throw new RuntimeException("stats injector requires a map for it's config value");
}
}
}

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.cqlgen.model.CqlModel;
/**
* @deprecated Superseded by direct rendering from AST in generator

View File

@ -14,10 +14,10 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlKeyspace;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.cqlgen.model.CqlKeyspace;
import io.nosqlbench.cqlgen.model.CqlModel;
import java.util.List;

View File

@ -0,0 +1,125 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.cqlgen.model.CqlModel;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;
public class CGKeyspaceFilter implements CGModelTransformer, CGTransformerConfigurable {
private final static Logger logger = LogManager.getLogger(CGKeyspaceFilter.class);
private List<TriStateFilter> patterns;
private enum InclExcl {
include,
exclude
}
private enum Action {
add,
remove,
inderminate
}
@Override
public CqlModel apply(CqlModel model) {
Set<String> keyspacenames = model.getAllKnownKeyspaceNames();
for (String keyspace : keyspacenames) {
Action action = Action.inderminate;
for (TriStateFilter pattern : patterns) {
action = pattern.apply(keyspace);
switch (action) {
case add:
logger.debug("including all definitions in " + keyspace + " with inclusion pattern " + pattern);
break;
case remove:
logger.info("removing all definitions in " + keyspace + " with exclusion pattern " + pattern);
model.removeKeyspaceDef(keyspace);
model.removeTablesForKeyspace(keyspace);
model.removeTypesForKeyspace(keyspace);
case inderminate:
}
}
if (action == Action.inderminate) {
logger.warn("Undetermined status of keyspace filter. No includes or excludes matched, and no default pattern was at the end of the list. Consider adding either include: '.*' or exclude: '.*' at the end.");
}
}
return model;
}
private static class TriStateFilter implements Function<String, Action> {
private final InclExcl filterType;
private final Pattern pattern;
public TriStateFilter(String filterType, String pattern) {
this(InclExcl.valueOf(filterType), Pattern.compile(pattern));
}
public TriStateFilter(InclExcl filterType, Pattern pattern) {
this.filterType = filterType;
this.pattern = pattern;
}
public TriStateFilter(Map.Entry<String, String> entry) {
this(entry.getKey(), entry.getValue());
}
@Override
public Action apply(String s) {
return switch (filterType) {
case exclude -> pattern.matcher(s).matches() ? Action.remove : Action.inderminate;
case include -> pattern.matcher(s).matches() ? Action.add : Action.inderminate;
};
}
public String toString() {
return filterType + ": " + pattern.pattern();
}
}
@Override
public void accept(Object cfgobj) {
if (cfgobj instanceof List cfglist) {
List<Map<String, String>> filters = (List<Map<String, String>>) cfglist;
if (filters != null) {
this.patterns = filters.stream()
.map(m -> {
if (m.size() != 1) {
throw new RuntimeException("Each filter entry must be a single keyed map with include or exclude keys, and a regex value.");
}
return new ArrayList<>(m.entrySet()).get(0);
}
).map(TriStateFilter::new)
.toList();
}
} else {
throw new RuntimeException("keyspace filter requires a Map object for it's config value, full of single key maps as (include|exclude): regex");
}
}
}

View File

@ -14,9 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.cqlgen.model.CqlModel;
import java.util.function.Function;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -67,9 +67,9 @@ public class CGModelTransformers implements Consumer<List<Map<String, ?>>>, Supp
// Configure Transformer IFF ...
if (transformer instanceof CGTransformerConfigurable configurable) {
Object cfgvalues = cfgmap.get("config");
if (cfgvalues instanceof Map txconfigmap) {
configurable.accept((txconfigmap));
logger.info("configured transformer with " + txconfigmap);
if (cfgvalues !=null ) {
configurable.accept((cfgvalues));
logger.info("configured transformer with " + cfgvalues);
}
}

View File

@ -20,11 +20,11 @@
* once an element is named, use the same name throughout
* prefix each element type with a code for the type
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.converters.cql.cqlast.CqlTable;
import io.nosqlbench.converters.cql.cqlast.CqlType;
import io.nosqlbench.cqlgen.model.CqlModel;
import io.nosqlbench.cqlgen.model.CqlTable;
import io.nosqlbench.cqlgen.model.CqlType;
import io.nosqlbench.virtdata.core.bindings.DataMapper;
import io.nosqlbench.virtdata.core.bindings.VirtData;
import org.apache.logging.log4j.LogManager;
@ -43,21 +43,21 @@ public class CGNameObfuscator implements CGModelTransformer, CGTransformerConfig
public CqlModel apply(CqlModel model) {
for (String keyspaceName : model.getAllKnownKeyspaceNames()) {
String newKeyspaceName = remapper.nameForType("keyspace",keyspaceName,"ks_");
model.renamekeyspace(keyspaceName,newKeyspaceName);
String newKeyspaceName = remapper.nameForType("keyspace", keyspaceName, "ks_");
model.renamekeyspace(keyspaceName, newKeyspaceName);
}
for (CqlTable cqlTable : model.getTableDefs()) {
String tablename = cqlTable.getName();
String newTableName = remapper.nameFor(cqlTable,"tbl_");
String newTableName = remapper.nameFor(cqlTable, "tbl_");
model.renameTable(cqlTable, newTableName);
cqlTable.renameColumns(remapper.mapperForType(cqlTable, "col_"));
}
for (CqlType type : model.getTypes()) {
for (CqlType type : model.getTypeDefs()) {
String typeName = type.getName();
String newTypeName = remapper.nameFor(type,"typ_");
model.renameType(type.getKeyspace(),typeName,newTypeName);
String newTypeName = remapper.nameFor(type, "typ_");
model.renameType(type.getKeyspace(), typeName, newTypeName);
type.renameColumns(remapper.mapperForType(type, "typ"));
}
@ -66,12 +66,16 @@ public class CGNameObfuscator implements CGModelTransformer, CGTransformerConfig
}
@Override
public void accept(Map<String, ?> stringMap) {
Object namer = stringMap.get("namer");
Optional<DataMapper<String>> optionalMapper = VirtData.getOptionalMapper(namer.toString());
LongFunction<String> namerFunc = optionalMapper.orElseThrow(
() -> new RuntimeException("Unable to resolve obfuscator namer '" + namer + "'")
);
remapper.setNamingFunction(namerFunc);
public void accept(Object configObject) {
if (configObject instanceof Map cfgmap) {
Object namer = cfgmap.get("namer");
Optional<DataMapper<String>> optionalMapper = VirtData.getOptionalMapper(namer.toString());
LongFunction<String> namerFunc = optionalMapper.orElseThrow(
() -> new RuntimeException("Unable to resolve obfuscator namer '" + namer + "'")
);
remapper.setNamingFunction(namerFunc);
} else {
throw new RuntimeException("name obfuscator requires a map for its configuration value.");
}
}
}

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.converters.cql.cqlast.CqlTable;
import io.nosqlbench.converters.cql.exporters.CGTableStats;
import io.nosqlbench.cqlgen.exporter.CGTableStats;
import io.nosqlbench.cqlgen.model.CqlModel;
import io.nosqlbench.cqlgen.model.CqlTable;
public class CGRatioCalculator implements CGModelTransformer {
@ -57,10 +57,13 @@ public class CGRatioCalculator implements CGModelTransformer {
double reads = Double.parseDouble(table.getTableAttributes().getAttribute("Local read count"));
double writes = Double.parseDouble(table.getTableAttributes().getAttribute("Local write count"));
table.getTableAttributes().setAttribute("weighted_reads", String.valueOf(reads / totalOps));
table.getTableAttributes().setAttribute("weighted_writes", String.valueOf(writes / totalOps));
table.getTableAttributes().setAttribute("weighted_space", String.valueOf(Double.parseDouble(table.getTableAttributes().getAttribute("Space used (total)")) / totalReads));
double totalTableReads = reads / totalOps;
double totalTableWrites = writes / totalOps;
table.getTableAttributes().setAttribute("weighted_reads", String.valueOf(totalTableReads));
table.getTableAttributes().setAttribute("weighted_writes", String.valueOf(totalTableWrites));
table.getTableAttributes().setAttribute("weighted_ops", String.valueOf(totalTableReads+totalTableWrites));
double tableSpaceUsed = Double.parseDouble(table.getTableAttributes().getAttribute("Space used (total)"));
table.getTableAttributes().setAttribute("weighted_space", String.valueOf(tableSpaceUsed / totalSpace));
}
return model;

View File

@ -0,0 +1,124 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.cqlgen.exporter.CGTextTransformer;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CGRegexReplacer implements CGTextTransformer, CGTransformerConfigurable {
private final static Logger logger = LogManager.getLogger(CGRegexReplacer.class);
private List<Replacer> replacers;
private String prefix = "";
private String suffix = "";
private String outfile;
@Override
public String apply(String text) {
String previous = "";
while (!previous.equals(text)) {
previous=text;
long original_size = text.length();
int steps = 0;
int replacements = 0;
for (Replacer replacer : replacers) {
logger.info("applying regex replacer #" + ++steps);
text = replacer.apply(text);
replacements += replacer.replacements;
}
logger.info(steps + " replacers applied. " + replacements + " replacements found total.");
if (outfile != null) {
try {
if (outfile.startsWith("_")) {
Files.write(Path.of(outfile), text.getBytes(), StandardOpenOption.TRUNCATE_EXISTING);
} else {
Files.write(Path.of(outfile), text.getBytes(), StandardOpenOption.CREATE_NEW);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
return text;
}
@Override
public void accept(Object configObject) {
if (configObject instanceof Map stringMap) {
this.prefix = stringMap.containsKey("prefix") ? stringMap.get("prefix").toString() : "";
this.suffix = stringMap.containsKey("suffix") ? stringMap.get("suffix").toString() : "";
this.outfile = stringMap.containsKey("outfile") ? stringMap.get("outfile").toString() : null;
Object replacersObject = stringMap.get("replacers");
if (replacersObject instanceof List list) {
List<List<String>> replacers = (List<List<String>>) stringMap.get("replacers");
this.replacers = replacers.stream()
.map(l -> new Replacer(l.get(0), l.get(1).replaceAll("PREFIX", prefix).replaceAll("SUFFIX", suffix)))
.toList();
} else {
throw new RuntimeException("regex replacer needs a list of lists for its replacers field, with each list consisting" +
" of a single regex matcher and a single regex replacer.");
}
} else {
throw new RuntimeException("regex replacer requires a Map for its config value, with a replacer field.");
}
}
private final static class Replacer implements Function<String, String> {
private final Pattern pattern;
private final String replacement;
public int replacements = 0;
Replacer(String from, String to) {
this.pattern = Pattern.compile("(?m)(?i)" + from);
this.replacement = to;
}
@Override
public String apply(String s) {
Matcher matcher = pattern.matcher(s);
StringBuilder sb = new StringBuilder();
while (matcher.find()) {
matcher.appendReplacement(sb, replacement);
// if (matcher.end() - matcher.start() > 10000) {
// logger.info("whoops");
// }
// logger.info("matcher:[" + matcher.group(0) + "][" + matcher.group(1) + "][" + matcher.group(2) + "][" + matcher.group(3));
// logger.info(String.format("\tat %2.2f%%", (float) ((float) matcher.start() / (float) s.length())));
CharSequence replaced = sb.subSequence(matcher.start(), matcher.end());
// logger.info("replaced:" + replaced);
this.replacements++;
}
matcher.appendTail(sb);
return sb.toString();
}
}
}

View File

@ -14,10 +14,10 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlKeyspace;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.cqlgen.model.CqlKeyspace;
import io.nosqlbench.cqlgen.model.CqlModel;
import java.util.Map;
@ -33,9 +33,13 @@ public class CGReplicationSettingInjector implements CGModelTransformer, CGTrans
}
@Override
public void accept(Map<String, ?> stringMap) {
if (stringMap.containsKey("replication_fields")) {
this.replicationFields = stringMap.get("replication_fields").toString();
public void accept(Object cfgObject) {
if (cfgObject instanceof Map stringMap) {
if (stringMap.containsKey("replication_fields")) {
this.replicationFields = stringMap.get("replication_fields").toString();
}
} else {
throw new RuntimeException("replication settings injector requires a map for its config value.");
}
}
}

View File

@ -14,10 +14,9 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import java.util.Map;
import java.util.function.Consumer;
public interface CGTransformerConfigurable extends Consumer<Map<String, ?>> {
public interface CGTransformerConfigurable extends Consumer<Object> {
}

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.cqlast.CqlColumnDef;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.converters.cql.cqlast.CqlTable;
import io.nosqlbench.cqlgen.model.CqlColumnDef;
import io.nosqlbench.cqlgen.model.CqlModel;
import io.nosqlbench.cqlgen.model.CqlTable;
import java.util.List;
@ -26,7 +26,7 @@ public class CGUdtReplacer implements CGModelTransformer {
@Override
public CqlModel apply(CqlModel model) {
List<String> toReplace = model.getTypes().stream().map(t -> t.getKeyspace() + "." + t.getName()).toList();
List<String> toReplace = model.getTypeDefs().stream().map(t -> t.getKeyspace() + "." + t.getName()).toList();
for (CqlTable table : model.getTableDefs()) {
for (CqlColumnDef coldef : table.getColumnDefinitions()) {
String typedef = coldef.getTrimmedTypedef();

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.exporters.transformers;
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.converters.cql.exporters.CGKeyspaceStats;
import io.nosqlbench.converters.cql.exporters.CGSchemaStats;
import io.nosqlbench.converters.cql.exporters.CGTableStats;
import io.nosqlbench.cqlgen.exporter.CGKeyspaceStats;
import io.nosqlbench.cqlgen.exporter.CGSchemaStats;
import io.nosqlbench.cqlgen.exporter.CGTableStats;
import org.apache.commons.math4.util.Pair;
import java.io.BufferedReader;

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.cqlgen.exporter.transformers;
import io.nosqlbench.cqlgen.model.CqlModel;
import io.nosqlbench.cqlgen.model.CqlTable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.Map;
public class UnusedTableRemover implements CGModelTransformer, CGTransformerConfigurable {
private final static Logger logger = LogManager.getLogger(UnusedTableRemover.class);
private double minimumThreshold = 0.0001;
@Override
public CqlModel apply(CqlModel model) {
if (!model.hasStats()) {
logger.warn("Unused table remover is not active since there are no stats provided.");
return model;
}
List<CqlTable> tableDefs = model.getTableDefs();
for (CqlTable table : tableDefs) {
String weightedOpsSpec = table.getTableAttributes().getAttribute("weighted_ops");
double weightedOps = Double.parseDouble(weightedOpsSpec);
if (weightedOps < minimumThreshold) {
logger.info(String.format(
"removing table " + table.getKeySpace() + "." + table.getName() + " with minimum weighted_ops of %1.5f under %1.5f",
weightedOps, minimumThreshold)
);
model.getTableDefsByKeyspaceThenTable().get(table.getKeySpace()).remove(table.getName());
}
}
return model;
}
@Override
public void accept(Object cfgObj) {
if (cfgObj instanceof Map stringMap) {
Object fractionalThresholdSpec = stringMap.get("percentage_threshold");
if (fractionalThresholdSpec != null) {
this.minimumThreshold = Double.parseDouble(fractionalThresholdSpec.toString());
}
} else {
throw new RuntimeException("unused table remover requires a Map for its config value.");
}
}
}

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.RecognitionException;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
public enum ColType {
PartitionComponent,

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import io.nosqlbench.api.config.NBNamedElement;
import io.nosqlbench.api.labels.Labeled;
@ -107,4 +107,12 @@ public class CqlColumnDef implements NBNamedElement, Labeled {
public boolean isLastClusteringColumn() {
return table.isLastClusteringColumn(position);
}
public CqlTable getTable() {
return this.table;
}
public String getFullName() {
return getKeyspace()+"."+getTable().getName()+"."+getName()+"(column)";
}
}

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import io.nosqlbench.api.config.NBNamedElement;
import io.nosqlbench.api.labels.Labeled;
import io.nosqlbench.converters.cql.exporters.CGKeyspaceStats;
import io.nosqlbench.cqlgen.exporter.CGKeyspaceStats;
import java.util.Map;

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import io.nosqlbench.converters.cql.exporters.CGKeyspaceStats;
import io.nosqlbench.converters.cql.exporters.CGSchemaStats;
import io.nosqlbench.converters.cql.exporters.CGTableStats;
import io.nosqlbench.cqlgen.exporter.CGKeyspaceStats;
import io.nosqlbench.cqlgen.exporter.CGSchemaStats;
import io.nosqlbench.cqlgen.exporter.CGTableStats;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -42,7 +42,7 @@ public class CqlModel {
private final Supplier<List<String>> errors;
Map<String, CqlKeyspace> keyspaceDefs = new LinkedHashMap<>();
Map<String, Map<String, CqlTable>> tableDefs = new LinkedHashMap<>();
Map<String, Map<String, CqlType>> types = new LinkedHashMap<>();
Map<String, Map<String, CqlType>> typeDefs = new LinkedHashMap<>();
CGSchemaStats schemaStats = null;
@ -131,7 +131,7 @@ public class CqlModel {
return new ArrayList<>(this.keyspaceDefs.values());
}
public Map<String, Map<String, CqlTable>> getTablesByNameByKeyspace() {
public Map<String, Map<String, CqlTable>> getTableDefsByKeyspaceThenTable() {
return tableDefs;
}
@ -195,14 +195,14 @@ public class CqlModel {
public void saveType(String keyspace, String name) {
udt.setKeyspace(keyspace);
udt.setName(name);
Map<String, CqlType> ksTypes = this.types.computeIfAbsent(keyspace, ks -> new LinkedHashMap<>());
Map<String, CqlType> ksTypes = this.typeDefs.computeIfAbsent(keyspace, ks -> new LinkedHashMap<>());
ksTypes.put(udt.getName(),udt);
udt=null;
}
public List<CqlType> getTypes() {
public List<CqlType> getTypeDefs() {
ArrayList<CqlType> list = new ArrayList<>();
for (Map<String, CqlType> cqlTypesByKeyspace : types.values()) {
for (Map<String, CqlType> cqlTypesByKeyspace : typeDefs.values()) {
for (CqlType cqlType : cqlTypesByKeyspace.values()) {
list.add(cqlType);
}
@ -219,13 +219,13 @@ public class CqlModel {
}
public void removeTypesForKeyspace(String name) {
this.types.remove(name);
this.typeDefs.remove(name);
}
public String getSummaryLine() {
return "keyspaces: " + keyspaceDefs.size() + ", tables: " + getTableDefs().size() +
", columns: " + getTableDefs().stream().mapToInt(t -> t.getColumnDefinitions().size()).sum() +
", types: " + getTypes().size();
", types: " + getTypeDefs().size();
}
public void renamekeyspace(String keyspaceName, String newKeyspaceName) {
@ -243,14 +243,14 @@ public class CqlModel {
}
this.tableDefs.put(newKeyspaceName, tablesForKeyspace);
}
if (this.types.containsKey(keyspaceName)) {
Map<String, CqlType> typesForKeyspace = this.types.remove(keyspaceName);
if (this.typeDefs.containsKey(keyspaceName)) {
Map<String, CqlType> typesForKeyspace = this.typeDefs.remove(keyspaceName);
if (typesForKeyspace!=null) {
for (CqlType cqltype : typesForKeyspace.values()) {
cqltype.setKeyspace(newKeyspaceName);
}
}
this.types.put(newKeyspaceName,typesForKeyspace);
this.typeDefs.put(newKeyspaceName,typesForKeyspace);
}
}
@ -262,7 +262,7 @@ public class CqlModel {
}
public void renameType(String keyspaceName, String typeName, String newTypeName) {
Map<String,CqlType> typesInKeyspace = types.get(keyspaceName);
Map<String,CqlType> typesInKeyspace = typeDefs.get(keyspaceName);
CqlType cqlType = typesInKeyspace.remove(typeName);
cqlType.setName(newTypeName);
typesInKeyspace.put(newTypeName,cqlType);
@ -280,11 +280,15 @@ public class CqlModel {
keyspace.setReplicationData(repldata);
}
public Map<String, Map<String, CqlType>> getTypesByKeyspaceAndName() {
return types;
public Map<String, Map<String, CqlType>> getTypesByKeyspaceThenName() {
return typeDefs;
}
public void addClusteringOrder(String colname, String order) {
table.addTableClusteringOrder(colname, order);
}
public boolean isEmpty() {
return this.keyspaceDefs.size()==0 && this.tableDefs.size()==0 && this.typeDefs.size()==0;
}
}

View File

@ -14,10 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import io.nosqlbench.converters.cql.generated.CqlParser;
import io.nosqlbench.converters.cql.generated.CqlParserBaseListener;
import io.nosqlbench.cqlgen.generated.CqlParser;
import io.nosqlbench.cqlgen.generated.CqlParserBaseListener;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.ErrorNode;

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import io.nosqlbench.api.config.NBNamedElement;
import io.nosqlbench.api.labels.Labeled;
import io.nosqlbench.converters.cql.exporters.CGTableStats;
import io.nosqlbench.cqlgen.exporter.CGTableStats;
import java.util.*;
import java.util.function.Function;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.cqlast;
package io.nosqlbench.cqlgen.model;
import io.nosqlbench.api.config.NBNamedElement;
import io.nosqlbench.api.labels.Labeled;

View File

@ -14,14 +14,14 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.parser;
package io.nosqlbench.cqlgen.parser;
import io.nosqlbench.converters.cql.cqlast.CGErrorListener;
import io.nosqlbench.converters.cql.cqlast.CqlModel;
import io.nosqlbench.converters.cql.cqlast.CqlModelBuilder;
import io.nosqlbench.converters.cql.cqlast.CqlType;
import io.nosqlbench.converters.cql.generated.CqlLexer;
import io.nosqlbench.converters.cql.generated.CqlParser;
import io.nosqlbench.cqlgen.generated.CqlLexer;
import io.nosqlbench.cqlgen.generated.CqlParser;
import io.nosqlbench.cqlgen.model.CGErrorListener;
import io.nosqlbench.cqlgen.model.CqlModel;
import io.nosqlbench.cqlgen.model.CqlModelBuilder;
import io.nosqlbench.cqlgen.model.CqlType;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CodePointCharStream;
import org.antlr.v4.runtime.CommonTokenStream;
@ -53,7 +53,7 @@ public class CqlModelParser {
public static CqlType parseCqlType(String input) {
CqlModel parsed = parse(input, null);
List<CqlType> types = parsed.getTypes();
List<CqlType> types = parsed.getTypeDefs();
if (types.size()!=1) {
throw new RuntimeException("error parsing typedef");
}

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.nosqlbench.converters.cql.traverser;
package io.nosqlbench.cqlgen.traverser;
import java.nio.file.Path;

View File

@ -22,8 +22,8 @@ import com.datastax.oss.driver.api.core.type.DataTypes;
import com.datastax.oss.driver.api.core.type.MapType;
import com.datastax.oss.driver.api.core.type.UserDefinedType;
import com.datastax.oss.driver.internal.core.type.UserDefinedTypeBuilder;
import io.nosqlbench.converters.cql.cqlast.CqlType;
import io.nosqlbench.converters.cql.parser.CqlModelParser;
import io.nosqlbench.cqlgen.model.CqlType;
import io.nosqlbench.cqlgen.parser.CqlModelParser;
import java.util.function.LongFunction;

View File

@ -53,6 +53,27 @@ Configure from multiple sources:
driverconfig=myconfig.json
### Basic Cqld4 driver options
- **hosts** & **localdc** - (required unless using scb) - Set the endpoint and local datacenter name
for the driver.
- example: `host=mydsehost localdc=testdc1`
- **driverconfig** - (explained above) - set the configuration source for the driver.
- **username** OR **userfile** - (optional, only one may be used) - If you need to specify a
username but want to put it in a file instead, simply use the `userfile=myfile` option. It is
not uncommon to say `userfile=userfile`.
* **password** OR **passfile** - (optional, only one may be used) - Fi you need to specify a
password but want to put it ina file instead, simply use the `passfile=mypassfile` option. It
is not uncommon to say `passfile=passfile`.
* **showstmt** - enable per-statement diagnostics whic show as much of the statement as possible
for the given statement type. *WARNING* - Do not use this for performance testing, only for
diagnostics.
* **maxpages** - configure the maximum number of pages allowed in a CQL result set. This is
configured to `maxpages=1` by default, so that users will be aware of any paging that occurs
by default. If you expect and want to allow paging in your operation, then set this number
higher. A *synthetic* exception is generated as `UnexpectedPagingException` by default when
the number of pages exceeds maxpages.
### Activity level Driver Config
The activity parameters which are provided by the driver are exposed as `driver.<name>`. Any
@ -69,22 +90,22 @@ well. Where possible, a backwards-compatible option helper was provided so that
not possible as some options were no longer supported, or changed so much that there was no longer a
direct mapping that would work consistently across versions. You can try to use the previous
options, like `pooling` and so on. If the option is not supported as such, it will cause an error
with an explanation. Otherwise, these helper options will simply set the equivalent options
in the driver profile to achieve the same effect. As stated above, it is highly recommended that
driver settings be captured in a configuration file and set with `driverconfig=<file>.json`
with an explanation. Otherwise, these helper options will simply set the equivalent options in the
driver profile to achieve the same effect. As stated above, it is highly recommended that driver
settings be captured in a configuration file and set with `driverconfig=<file>.json`
## Statement Forms
The CQLd4 driver supports idiomatic usage of all the main statement APIs within the native Java
driver. The syntax for specifying these types is simplified as well, using only a single
`type` field which allows values of simple, prepared, raw, gremlin, fluent, and so on.
The previous form of specifing `type: cql` and optional modifiers like `prepared` and
`parameterized` is deprecated now, sinces all the forms are explicitly supported by a
well-defined type name.
`type` field which allows values of simple, prepared, raw, gremlin, fluent, and so on. The previous
form of specifing `type: cql` and optional modifiers like `prepared` and
`parameterized` is deprecated now, sinces all the forms are explicitly supported by a well-defined
type name.
The previous form will work, but you will get a warning, as these should be deprecated
going forward. It is best to use the forms in the examples below. The defaults and field
names for the classic form have not changed.
The previous form will work, but you will get a warning, as these should be deprecated going
forward. It is best to use the forms in the examples below. The defaults and field names for the
classic form have not changed.
## CQLd4 Op Template Examples
@ -133,11 +154,10 @@ names for the classic form have not changed.
## CQL Op Template - Optional Fields
If any of these are provided as op template fields or as op params, or
as activity params, then they will have the described effect. The calls
to set these parameters on an individual statement are only incurred if
they are provided. Otherwise, defaults are used. These options can be
applied to any of the statement forms above.
If any of these are provided as op template fields or as op params, or as activity params, then they
will have the described effect. The calls to set these parameters on an individual statement are
only incurred if they are provided. Otherwise, defaults are used. These options can be applied to
any of the statement forms above.
```yaml
params:
@ -167,6 +187,17 @@ params:
# fractional seconds, like 0.500
timeout: 2.0
# Set the maximum number of allowed pages for this request before a
# UnexpectedPagingException is thrown.
maxpages: 1
# Set the LWT rebinding behavior for this statement. If set to true, then
# any statement result which was not applied will be retried with the
# conditional fields set to the currently visible values. This makes all LWT
# statements do another round trip of retrying (assuming the data doesn't
# match the preconditions) in order to test LWT performance.
retryreplace: true
## The following options are meant for advanced testing scenarios only,
## and are not generally meant to be used in typical application-level,
## data mode, performance or scale testing. These expose properties
@ -225,14 +256,14 @@ params:
## Driver Cache
Like all driver adapters, the CQLd4 driver has the ability to use multiple low-level
driver instances for the purposes of advanced testing. To take advantage of this,
simply set a `space` parameter in your op templates, with a dynamic value.
__WARNING__: If you use the driver cache feature, be aware that creating a large
number of driver instances will be very expensive. Generally driver instances are meant
to be initialized and then shared throughout the life-cycle of an application process.
Thus, if you are doing multi-instance driver testing, it is best to use bindings
functions for the `space` parameter which have bounded cardinality per host.
Like all driver adapters, the CQLd4 driver has the ability to use multiple low-level driver
instances for the purposes of advanced testing. To take advantage of this, simply set a `space`
parameter in your op templates, with a dynamic value.
__WARNING__: If you use the driver cache feature, be aware that creating a large number of driver
instances will be very expensive. Generally driver instances are meant to be initialized and then
shared throughout the life-cycle of an application process. Thus, if you are doing multi-instance
driver testing, it is best to use bindings functions for the `space` parameter which have bounded
cardinality per host.

View File

@ -0,0 +1,98 @@
# cqlgen - A default CQL workload generator
With NB5, the cqld4 driver comes with a workload generator that can be used to generate a
workload yaml from a CQL schema file.
Development on this workload generator is just starting, but it is already in a useful state.
Eventually, workload observation and monitoring methods will be used to create workloads which
more accurately emulate those in-situ.
## Inputs & Outputs
You need the cql schema for whatever keyspaces, types, and tables you want to include.
Optionally, you can provide a table stats file which is generated as
`nodetool tablestats > tablestats`.
Note: The table stats file provides results only for a single node. As such, you will want to
adjust a config parameter called `partition_multiplier` to improve accuracy of the generated
workload. Further, the file only contains traffic and data details since the last time your node
was restarted, thus it may not be representative of the overall character of your workload and data.
## Usage
A typical cqlgen command looks like this:
```
nb5 cqlgen myschema.cql myworkload.yaml myhistograms
```
1. The first option is simply the name of the .cql file containing your schema.
2. The second is the name of the output yaml. The generator will *not* overwrite this file for you.
3. The third option is an optional table stats file created with `nodetool tablestats >tablestats`.
If provided, the reads, writes, and estimated partition counts will be used to weight the
workload to the tables and data sizes automatically.
For now, it is almost certain that you'll need to extract the configs and tailor them as
described below. Then, when you run `nb5 cqlgen ...` the config files in the current directory will
be used.
## Workload Patterns
The initial version of the cql workload generator provides these defaults:
* All keyspaces, tables, or types which are provided on the input are included in the workload.
* All create syntax has "if not exists" added.
* All drop syntax has "if exists" added.
* All table DDL properties are ignored except for durable writes.
* The default replication settings are as for local testing with SimpleReplicationStrategy. For
testing on a proper cluster with NetworkTopology or in Astra, you'll need to modify this in
the configs explained below.
* All UDTs are converted to blobs. This will be replaced by a layer which understands UDTs very
soon.
* Data bindings are created using the simplest possible binding recipes that work.
* Cardinalities on partition-specific bindings are multiplied by 10. This presumes even data
distribution, replication factor 3, and 30 nodes. This method will be improved in the future.
* For the main phase, reads, writes, updates, and scans are included, 1 each.
* reads select * from a fully qualified predicate.
* writes will write to all named fields.
* updates change all fields with a fully qualified predicate.
* scan-10 will read up to 10 consecutive rows from a partition with a partially qualified
predicate. This means the last clustering column is not included in the predicates. Single
key (1 partition component) tables do not have a scan created for them.
* When partition estimates are provided, all read and writes statements have predicates for
the last partition component modified to modulo by the estimated partition cardinality.
## Fine Tuning
The generator uses two internal files for the purposes of setting defaults:
- cqlgen.conf - a yaml formatted configuration file.
- cqlgen-bindings.yaml
Both of these files will be read from the internal nb5 resources unless you pull them into the
local directory with these commands:
```
nb5 --copy cqlgen/cqlgen-bindings.yaml
nb5 --copy cqlgen/cqlgen.conf
```
The details of how to customize these files are included within them. The cqlgen-bindings.yaml
file contains default bindings by type. If you get UnresolvedBindingsException when trying to
generate a workload, then a binding for the type in question must be added to the
cqlgen-bindings.yaml file.
The cqlgen.conf controls much of the internal wiring of the workload generator. Modifying it
gives you the ability to enable and disable certain stages and behaviors, like:
* obfuscating all keyspace, table, and column names
* keyspaces to include by name
* tables to exclude by traffic
* setting the replcation fields
* default timeouts
* block naming and construction (which type of operations are included in each)
These are mostly controlled by a series of processing phases known as transformers.
Some transformers depend on others upstream, but if the data provided is not sufficient, they
will silently pass-through.
This is a new feature of the NoSQLBench driver. If you are an early adopter, please reach out
with ideas, or for requests and support as needed.

View File

@ -1,28 +1,35 @@
text_transformers:
# next step to do on this is to make the grammar support recursive type defs, because even after fixups,
# some dsefs defs like inode_ids set<frozen<tuple<timeuuid, timestamp>>>, are not supported yet
# text_transformers:
# - class: CGRegexReplacer
# config:
# prefix: "r__"
# suffix: ""
# outfile: _replaced.cql
# replacers:
# - - '(\s*)(options|role|roles|permissions|permission|date|key|timestamp|type|keys) ([a-zA-Z][a-zA-Z<>_-]*)(,?)'
# - '$1base$2 $3$4'
# - - '(.*PRIMARY KEY .*?)\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(.*)'
# - '$1base$2$3'
# - - '(.*CLUSTERING ORDER BY.+?)\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(.*)'
# - '$1base$2$3'
# - - '(.*CREATE TABLE system_auth\.)(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(.*)'
# - '$1base$2$3'
# - - '^(\s*?)\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(\s+[a-zA-Z][a-zA-Z<>,_ -]*?,?)$'
# - '$1PREFIX$2SUFFIX$3'
# - - '^(.*?PRIMARY KEY.*?)\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(.*?)$'
# - '$1PREFIX$2SUFFIX$3'
# - - '^(.*?CLUSTERING ORDER BY.+?)\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(.*?)$'
# - '$1PREFIX$2SUFFIX$3'
# - - '^(\s*?CREATE TABLE.+?)\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\b(.*?)$'
# - '$1PREFIX$2SUFFIX$3'
#
model_transformers:
# filters in or out keyspaces
# filters in or out keyspaces
- class: CGKeyspaceFilter
config:
include:
- dba_info
- mcs_or_prod
- prod_parallel_test
- vzw_order
- vzw_common
- vzw_soe
- exclude: system
- exclude: system_.*
- exclude: dse_.*
- exclude: dsefs_.*
- exclude: cfs_.*
- exclude: cfs
- exclude: HiveMetaStore
- exclude: spark_system
- include: .*
# replaces the replication settings with the provided values here,
# specifed as a text block to be put inside the curly braces
@ -45,20 +52,35 @@ model_transformers:
- class: CGUdtReplacer
# Reads a configured file path containing nodetool histogram stats output
# If no histostats file is provided, then this is skipped, including
# any downstream usage of this data
- class: CGGenStatsInjector
config:
path: histogram
path: tablestats
# Uses nodetool histogram stats to weight reads and writes over all ops
# This depends on data from the stats injector above. If not provided,
# this skips modifying ratios gracefully and they re all just set to 1
# as usual.
- class: CGRatioCalculator
# if this is set, and the fractional rate of operations against a table
# counting reads and writes is less than the percent threshold, then
# the table will be excluded from all op template generation.
- class: UnusedTableRemover
config:
# this is as a fractional number, so 0.1 is the same as 10%
minimum_threshold: 0.001
# replaces names of keyspaces, tables, and columns with generated values
- class: CGNameObfuscator
config:
namer: Combinations('0-9;0-9;0-9;0-9;0-9');
# This controls how the elements in the schema are named in the yaml.
# This affects block names, op template names and so on, and also how
# op templates will be named in all logs and metric views.
naming_template: "[OPTYPE-][KEYSPACE-][TYPE-][NAME]"
# for more distinction in metric names if needed:
#naming_template: "[BLOCKNAME-][OPTYPE-][KEYSPACE-][TYPE-][NAME]"
@ -66,13 +88,17 @@ naming_template: "[OPTYPE-][KEYSPACE-][TYPE-][NAME]"
# found in the node tool stats. If you have the actual value from complete
# statistics, this can be 1.0, but it is generally not accurate to take
# stats from one node. This allows for sizing up those estimates according
# to anecdotal information.
# to anecdotal information. The general rule of thumb on this is to take
# the number of nodes in your cluster and divide by the replication factor.
partition_multiplier: 30.0
# Timeouts for each operation category. These are specified in terms of seconds.
# fractional values are allowed. If not specified, all operations will default to
# using 10 seconds. Internally, these are configured as milliseconds.
# using 10 seconds. Internally, these are configured as milliseconds. If multiple
# timeouts are found in a block, then timeouts will remain specific to each op
# template. If they are all the same, then they are pulled up to a single param
# at the root of the block.
timeouts:
create: 60.0
truncate: 900.0
@ -83,9 +109,6 @@ timeouts:
update: 10.0
delete: 10.0
# if this is set, and the total ratio for a table is 0.0D, then it will
# not be added to the table DDL
elide_unused_tables: false
# future use, not active right now
blockplan:

View File

@ -1,419 +1,7 @@
# cql driver
This is a driver which allows for the execution of CQL statements. This driver supports both sync and async modes, with
detailed metrics provided for both.
The cql driver functionality is now provided by the cqld4 driver adapter.
Please see the cqld4 help topic with `nb5 help cqld4`
### Example activity definitions
Run a cql activity named 'cql1', with definitions from activities/cqldefs.yaml
... driver=cql alias=cql1 workload=cqldefs
Run a cql activity defined by cqldefs.yaml, but with shortcut naming
... driver=cql workload=cqldefs
Only run statement groups which match a tag regex
... driver=cql workload=cqldefs tags=group:'ddl.*'
Run the matching 'dml' statements, with 100 cycles, from [1000..1100)
... driver=cql workload=cqldefs tags=group:'dml.*' cycles=1000..1100
This last example shows that the cycle range is [inclusive..exclusive),
to allow for stacking test intervals. This is standard across all
activity types.
### CQL ActivityType Parameters
- **cqldriver** - default: dse - The type of driver to use, either dse, or oss. If you need DSE-specific features, use
the dse driver. If you are connecting to an OSS Apache Cassandra cluster, you must use the oss driver. The oss driver
option is only available in nosqlbench.
- **host** - The host or hosts to use for connection points to
the cluster. If you specify multiple values here, use commas
with no spaces.
Examples:
- `host=192.168.1.25`
- `host=192.168.1.25,testhost42`
- **workload** - The workload definition which holds the schema and statement defs.
see workload yaml location for additional details
(no default, required)
- **port** - The port to connect with
- **cl** - An override to consistency levels for the activity. If
this option is used, then all consistency levels will be replaced
by this one for the current activity, and a log line explaining
the difference with respect to the yaml will be emitted.
This is not a dynamic parameter. It will only be applied at
activity start.
- **cbopts** - default: none - this is how you customize the cluster
settings for the client, including policies, compression, etc. This
is a string of *Java*-like method calls just as you would use them
in the Cluster.Builder fluent API. They are evaluated inline
with the default Cluster.Builder options not covered below.
Example: cbopts=".withCompression(ProtocolOptions.Compression.NONE)"
- **whitelist** default: none - Applies a whitelist policy to the load balancing
policy in the driver. If used, a WhitelistPolicy(RoundRobinPolicy())
will be created and added to the cluster builder on startup.
Examples:
- `whitelist=127.0.0.1`
- `whitelist=127.0.0.1:9042,127.0.0.2:1234`
- **retrypolicy** default: none - Applies a retry policy in the driver
The only option supported for this version is `retrypolicy=logging`,
which uses the default retry policy, but with logging added.
- **reconnectpolicy** default: none - Applies a reconnection policy in the
driver Supports
either `reconnectpolicy=exponential(minDelayInMs,maxDelayInMs)`
or `reconnectpolicy=constant(delayInMs)`. The driver reconnects using
this policy when the entire cluster becomes unavailable.
- **protocol_version** default: unset, defaults to driver default behavior
- Set the CQL protocol version. Valid values are V1, V2, V3, V4, V5,
DSE_V1, DSE_V2. Protocol is usually auto-negotiated, however, the
initial connection may use a lower protocol to ensure connectivity to
older server versions. If you know you are running on a newer server
version, you can set this to match.
- **pooling** default: none - Applies the connection pooling options to
the policy. Examples:
- `pooling=4:10`
keep between 4 and 10 connections to LOCAL hosts
- `pooling=4:10,2:5`
keep 4-10 connections to LOCAL hosts and 2-5 to REMOTE
- `pooling=4:10:2000`
keep between 4-10 connections to LOCAL hosts with
up to 2000 requests per connection
- `pooling=5:10:2000,2:4:1000` keep between 5-10 connections to
LOCAL hosts with up to 2000 requests per connection, and 2-4
connection to REMOTE hosts with up to 1000 requests per connection
Additionally, you may provide the following options on pooling. Any
of these that are provided must appear in this order:
`,heartbeat_interval_s:n,idle_timeout_s:n,pool_timeout_ms:n`, so a
full example with all options set would appear as:
`pooling=5:10:2000,2:4:1000,heartbeat_interval_s:30,idle_timeout_s:120,pool_timeout_ms:5`
- **socketoptions** default: none - Applies any of the valid socket
options to the client when the session is built. Each of the options
uses the long form of the name, with either a numeric or boolean
value. Individual sub-parameters should be separated by a comma, and
the parameter names and values can be separated by either equals or a
colon. All of these values may be changed:
- read_timeout_ms
- connect_timeout_ms
- keep_alive
- reuse_address
- so_linger
- tcp_no_delay
- receive_buffer_size
- send_buffer_size
Examples:
- `socketoptions=read_timeout_ms=23423,connect_timeout_ms=4444`
- `socketoptions=tcp_no_delay=true`
- **tokens** default: unset - Only executes statements that fall within
any of the specified token ranges. Others are counted in metrics
as skipped-tokens, with a histogram value of the cycle number.
Examples:
- tokens=1:10000,100000:1000000
- tokens=1:123456
- **maxtries** - default: 10 - how many times an operation may be
attempted before it is disregarded
- **maxpages** - default: 1 - how many pages can be read from a query which
is larger than the fetchsize. If more than this number of pages
is required for such a query, then an UnexpectedPaging excpetion
is passed to the error handler as explained below.
- **fetchsize** - controls the driver parameter of the same name.
Suffixed units can be used here, such as "50K". If this parameter
is not present, then the driver option is not set.
- **cycles** - standard, however the cql activity type will default
this to however many statements are included in the current
activity, after tag filtering, etc.
- **username** - the user to authenticate as. This option requires
that one of **password** or **passfile** also be defined.
- **password** - the password to authenticate with. This will be
ignored if passfile is also present.
- **passfile** - the file to read the password from. The first
line of this file is used as the password.
- **ssl** - specifies the type of the SSL implementation.
Disabled by default, possible values are `jdk` and `openssl`.
[Additional parameters may need to be provided](ssl.md).
- **jmxreporting** - enable JMX reporting if needed.
Examples:
- `jmxreporting=true`
- `jmxreporting=false` (the default)
- **alias** - this is a standard nosqlbench parameter, however the cql type will use the workload value also as the
alias value when not specified.
- **errors** - error handler configuration.
(default errors=stop,retryable->retry,unverified->stop)
Examples:
- errors=stop,WriteTimeoutException=histogram
- errors=count
- errors=warn,retryable=count
See the separate help on 'cqlerrors' for detailed
configuration options.
- **defaultidempotence** - sets default idempotence on the
driver options, but only if it has a value.
(default unset, valid values: true or false)
- **speculative** - sets the speculative retry policy on the cluster.
(default unset)
This can be in one of the following forms:
- pT:E:L - where :L is optional and
T is a floating point threshold between 0.0 and 100.0 and
E is an allowed number of concurrent speculative executions and
L is the maximum latency tracked in the tracker instance
(L defaults to 15000 when left out)
Examples:
- p99.8:5:15000ms - 99.8 percentile, 5 executions, 15000ms max tracked
- p98:2:10000ms - 98.0 percentile, 2 executions allowed, 10s max tracked
- Tms:E - where :E is optional and
T is a constant threshold latency and
E is the allowed number of concurrent speculative retries
(E default to 5 when left out)
Examples:
- 100ms:5 - constant threshold of 100ms and 5 allowed executions
- **seq** - selects the statement sequencer used with statement ratios.
(default: bucket)
(options: concat | bucket | interval)
The concat sequencer repeats each statement in order until the ratio
is achieved.
The bucket sequencer uses simple round-robin distribution to plan
statement ratios, a simple but unbalanced form of interleaving.
The interval sequencer apportions statements over time and then by
order of appearance for ties. This has the effect of interleaving
statements from an activity more evenly, but is less obvious in how
it works.
All of the sequencers create deterministic schedules which use an internal
lookup table for indexing into a list of possible statements.
- **trace** - enables a trace on a subset of operations. This is disabled
by default.
Examples:
`trace=modulo:100,filename:trace.log`
The above traces every 100th cycle to a file named trace.log.
`trace=modulo:1000,filename:stdout`
The above traces every 1000th cycle to stdout.
If the trace log is not specified, then 'tracelog' is assumed.
If the filename is specified as stdout, then traces are dumped to stdout.
- **sessionid** - names the configuration to be used for this activity. Within a given scenario, any activities that use
the same name for clusterid will share a session and cluster. default: 'default'
- **drivermetrics** - enable reporting of driver metrics.
default: false
- **driverprefix** - set the metrics name that will prefix all CQL driver metrics.
default: 'driver.*clusterid*.'
The clusterid specified is included so that separate cluster and session
contexts can be reported independently for advanced tests.
- **usercodecs** - enable the loading of user codec libraries for more
details see: com.datastax.codecs.framework.UDTCodecInjector in the
nosqlbench code base. This is for dynamic codec loading with
user-provided codecs mapped via the internal UDT APIs. default: false
- **secureconnectbundle** - used to connect to CaaS, accepts a path to the
secure connect bundle that is downloaded from the CaaS UI. Examples:
- `secureconnectbundle=/tmp/secure-connect-my_db.zip`
- `secureconnectbundle="/home/automaton/secure-connect-my_db.zip"`
Check
out [Astra Documentation](https://docs.astra.datastax.com/docs/test-loading-data-with-nosqlbench)
for samples
- **insights** - Set to false to disable the driver from sending insights
monitoring information
- `insights=false`
- **tickduration** - sets the tickDuration (milliseconds) of
HashedWheelTimer of the java driver. This timer is used to schedule
speculative requests. Examples:
- `tickduration=10`
- `tickduration=100` (driver default value)
- **compression** - sets the transport compression to use for this
activity. Valid values are 'LZ4' and 'SNAPPY'. Both types are bundled
with EBDSE.
- **showcql** - logs cql statements as INFO (to see INFO messages in stdout use -v or greater) Note: this is expensive
and should only be done to troubleshoot workloads. Do not use `showcql` for your tests.
- **lbp** - configures the load balancing policies for the Java driver. With this parameter, you can
configure nested load balancing policies in short-hand form.
The policies available are documented in detail under the help topic `cql-loadbalancing`. See that
guide if you need more than the examples below.
Examples:
- `lbp=LAP(retry_period=3,scale=10)` - Latency aware policy with retry period of 3 seconds.
(Seconds is the default time unit, unless _ms parameter is used) and scale 10.
- `lbp=LAP(rp=3,s=10)` - Same as above, using the equivalent but terser form.
- `lbp=LAP(rp_ms=3000,s_ms=10000)` - Same as above, with milliseconds instead of
seconds.
- `loadbalancing=LAP(s=10),TAP()` - Latency aware policy, followed by
token aware policy.
### CQL YAML Parameters
A uniform YAML configuration format was introduced with engineblock 2.0.
As part of this format, statement parameters were added for the CQL Activity Type.
These parameters will be consolidated with the above parameters in time, but for
now **they are limited to a YAML params block**:
params:
ratio: 1
# Sets the statement ratio within the operation sequencer
# scheme. Integers only.
# When preparing the operation order (AKA sequencing),
# frequency of the associated statements.
cl: ONE
# Sets the consistency level, using any of the standard
# identifiers from com.datastax.driver.core.ConsistencyLevel,
# any one of:
# LOCAL_QUORUM, ANY, ONE, TWO, THREE, QUORUM, ALL,
# EACH_QUORUM, SERIAL, LOCAL_SERIAL, LOCAL_ONE
prepared: true
# By default, all statements are prepared. If you are
# creating schema, set this to false.
idempotent: false
# For statements that are known to be idempotent, set this
# to true
instrument: false
# If a statement has instrument set to true, then
# individual Timer metrics will be tracked for
# that statement for both successes and errors,
# using the given statement name.
verify: *
compare: all
# Adds two operators to the operation:
# 1) verify that there is a single row result set in the response.
# 2) verify some or all of the field values by name and/or value.
# If this option is used on any statement, then the activity will
# provide verification metrics and exceptions, including details
# of verification in the log once the activity is completed.
# For full details on this field, see the docs on cqlverify.
/// Cross-verify all fields and field values between the reference data and
/// the actual data.
all(0x1|0x1<<1|0x1<<2);
logresultcsv: true
OR
logresultcsv: myfilename.csv
# If a statement has logresultcsv set to true,
# then individual operations will be logged to a CSV file.
# In this case the CSV file will be named as
# <statement-name>--results.csv.
# If the value is present and not "true", then the value will
# be used as the name of the file.
#
# The format of the file is:
# <cycle>,(SUCCESS|FAILURE),<nanos>,<rows-fetched>,(<error-class,NONE)
# NOTES:
# 1) BE CAREFUL with this setting. A single logged line per
# result is not useful for high-speed testing as it will
# impose IO loads on the client to slow it down.
# 2) BE CAREFUL with the name. It is best to just pick good
# names for your statement defs so that everything remains
# coherent and nothing gets accidentally overwritten.
# 3) If logresultcsv is provided at the activity level, it
# applies to all statements, and the only value values
# there are true and false.
start-timers: timername1, timername2, ...
#
# If a statement has start-timers value set, then the named
# timers are started in the local thread before the
# statement is executed
#
# Together, with the stop-timers modifier, you can measure
# sequences of statements with specific named boundaries.
#
# The name of the timer is qualified with the activity alias
# just as all other metric names.
#
# This is generally only useful when the async= parameter is
# NOT used, since the scope of the timer is thread-local. When
# async is used, many operations may overlap each other in the
# same thread, breaking linearization guarantees which make
# thread local scoping helpful for tracking linearized operations.
#
# When a timer is started, a timer context is created and stored
# under this name in the thread. You must ensure that an
# associated stop-timers setting is applied to another statement
# in order to trigger the tally of these metrics.
stop-timers: timername1, timername2, ...
#
# If a statement has a stop-timers value set, then after the
# statement is finished, whether by error or by successful
# completion, the named timers are stopped and the resulting
# measurement is added to metrics.
#
# If you add stop-timers with names that do not have a matching
# start-timers name, or vice-versa then an error is thrown.
### Metrics
- alias.result - A timer which tracks the performance of an op result
only. This is the async get on the future, broken out as a separate
step.
- alias.result-success - A timer that records rate and histograms of the
time it takes from submitting a query to completely reading the result
set that it returns, across all pages. This metric is only counted for
non-exceptional results, while the result metric above includes
all operations.
- alias.bind - A timer which tracks the performance of the statement
binding logic, including the generation of data immediately prior
- alias.execute - A timer which tracks the performance of op submission
only. This is the async execution call, broken out as a separate step.
- alias.tries - A histogram of how many tries were required to get a
completed operation
- alias.pages - A timer which tracks the performance of paging, specific
to more than 1-page query results. i.e., if all reads return within 1
page, this metric will not have any data.
- alias.strides - A timer around each stride of operations within a thread
- alias.skipped-tokens - A histogram that records the count and cycle values
of skipped tokens.
## YAML Examples
Please see the bundled activities with nosqlbench for examples.
If you need to see the original cql documentation for some reason, then you can
find it under `nb5 help cql_olddocs`.

View File

@ -0,0 +1,424 @@
# CQL driver (deprecated docs, use cqld4 instead)
The cql driver functionality is now provided by the cqld4 driver adapter.
Please see the cqld4 help topic with `nb5 help cqld4`
---
This is a driver which allows for the execution of CQL statements. This driver supports both sync and async modes, with
detailed metrics provided for both.
### Example activity definitions
Run a cql activity named 'cql1', with definitions from activities/cqldefs.yaml
... driver=cql alias=cql1 workload=cqldefs
Run a cql activity defined by cqldefs.yaml, but with shortcut naming
... driver=cql workload=cqldefs
Only run statement groups which match a tag regex
... driver=cql workload=cqldefs tags=group:'ddl.*'
Run the matching 'dml' statements, with 100 cycles, from [1000..1100)
... driver=cql workload=cqldefs tags=group:'dml.*' cycles=1000..1100
This last example shows that the cycle range is [inclusive..exclusive),
to allow for stacking test intervals. This is standard across all
activity types.
### CQL ActivityType Parameters
- **cqldriver** - default: dse - The type of driver to use, either dse, or oss. If you need DSE-specific features, use
the dse driver. If you are connecting to an OSS Apache Cassandra cluster, you must use the oss driver. The oss driver
option is only available in nosqlbench.
- **host** - The host or hosts to use for connection points to
the cluster. If you specify multiple values here, use commas
with no spaces.
Examples:
- `host=192.168.1.25`
- `host=192.168.1.25,testhost42`
- **workload** - The workload definition which holds the schema and statement defs.
see workload yaml location for additional details
(no default, required)
- **port** - The port to connect with
- **cl** - An override to consistency levels for the activity. If
this option is used, then all consistency levels will be replaced
by this one for the current activity, and a log line explaining
the difference with respect to the yaml will be emitted.
This is not a dynamic parameter. It will only be applied at
activity start.
- **cbopts** - default: none - this is how you customize the cluster
settings for the client, including policies, compression, etc. This
is a string of *Java*-like method calls just as you would use them
in the Cluster.Builder fluent API. They are evaluated inline
with the default Cluster.Builder options not covered below.
Example: cbopts=".withCompression(ProtocolOptions.Compression.NONE)"
- **whitelist** default: none - Applies a whitelist policy to the load balancing
policy in the driver. If used, a WhitelistPolicy(RoundRobinPolicy())
will be created and added to the cluster builder on startup.
Examples:
- `whitelist=127.0.0.1`
- `whitelist=127.0.0.1:9042,127.0.0.2:1234`
- **retrypolicy** default: none - Applies a retry policy in the driver
The only option supported for this version is `retrypolicy=logging`,
which uses the default retry policy, but with logging added.
- **reconnectpolicy** default: none - Applies a reconnection policy in the
driver Supports
either `reconnectpolicy=exponential(minDelayInMs,maxDelayInMs)`
or `reconnectpolicy=constant(delayInMs)`. The driver reconnects using
this policy when the entire cluster becomes unavailable.
- **protocol_version** default: unset, defaults to driver default behavior
- Set the CQL protocol version. Valid values are V1, V2, V3, V4, V5,
DSE_V1, DSE_V2. Protocol is usually auto-negotiated, however, the
initial connection may use a lower protocol to ensure connectivity to
older server versions. If you know you are running on a newer server
version, you can set this to match.
- **pooling** default: none - Applies the connection pooling options to
the policy. Examples:
- `pooling=4:10`
keep between 4 and 10 connections to LOCAL hosts
- `pooling=4:10,2:5`
keep 4-10 connections to LOCAL hosts and 2-5 to REMOTE
- `pooling=4:10:2000`
keep between 4-10 connections to LOCAL hosts with
up to 2000 requests per connection
- `pooling=5:10:2000,2:4:1000` keep between 5-10 connections to
LOCAL hosts with up to 2000 requests per connection, and 2-4
connection to REMOTE hosts with up to 1000 requests per connection
Additionally, you may provide the following options on pooling. Any
of these that are provided must appear in this order:
`,heartbeat_interval_s:n,idle_timeout_s:n,pool_timeout_ms:n`, so a
full example with all options set would appear as:
`pooling=5:10:2000,2:4:1000,heartbeat_interval_s:30,idle_timeout_s:120,pool_timeout_ms:5`
- **socketoptions** default: none - Applies any of the valid socket
options to the client when the session is built. Each of the options
uses the long form of the name, with either a numeric or boolean
value. Individual sub-parameters should be separated by a comma, and
the parameter names and values can be separated by either equals or a
colon. All of these values may be changed:
- read_timeout_ms
- connect_timeout_ms
- keep_alive
- reuse_address
- so_linger
- tcp_no_delay
- receive_buffer_size
- send_buffer_size
Examples:
- `socketoptions=read_timeout_ms=23423,connect_timeout_ms=4444`
- `socketoptions=tcp_no_delay=true`
- **tokens** default: unset - Only executes statements that fall within
any of the specified token ranges. Others are counted in metrics
as skipped-tokens, with a histogram value of the cycle number.
Examples:
- tokens=1:10000,100000:1000000
- tokens=1:123456
- **maxtries** - default: 10 - how many times an operation may be
attempted before it is disregarded
- **maxpages** - default: 1 - how many pages can be read from a query which
is larger than the fetchsize. If more than this number of pages
is required for such a query, then an UnexpectedPaging excpetion
is passed to the error handler as explained below.
- **fetchsize** - controls the driver parameter of the same name.
Suffixed units can be used here, such as "50K". If this parameter
is not present, then the driver option is not set.
- **cycles** - standard, however the cql activity type will default
this to however many statements are included in the current
activity, after tag filtering, etc.
- **username** - the user to authenticate as. This option requires
that one of **password** or **passfile** also be defined.
- **password** - the password to authenticate with. This will be
ignored if passfile is also present.
- **passfile** - the file to read the password from. The first
line of this file is used as the password.
- **ssl** - specifies the type of the SSL implementation.
Disabled by default, possible values are `jdk` and `openssl`.
[Additional parameters may need to be provided](ssl.md).
- **jmxreporting** - enable JMX reporting if needed.
Examples:
- `jmxreporting=true`
- `jmxreporting=false` (the default)
- **alias** - this is a standard nosqlbench parameter, however the cql type will use the workload value also as the
alias value when not specified.
- **errors** - error handler configuration.
(default errors=stop,retryable->retry,unverified->stop)
Examples:
- errors=stop,WriteTimeoutException=histogram
- errors=count
- errors=warn,retryable=count
See the separate help on 'cqlerrors' for detailed
configuration options.
- **defaultidempotence** - sets default idempotence on the
driver options, but only if it has a value.
(default unset, valid values: true or false)
- **speculative** - sets the speculative retry policy on the cluster.
(default unset)
This can be in one of the following forms:
- pT:E:L - where :L is optional and
T is a floating point threshold between 0.0 and 100.0 and
E is an allowed number of concurrent speculative executions and
L is the maximum latency tracked in the tracker instance
(L defaults to 15000 when left out)
Examples:
- p99.8:5:15000ms - 99.8 percentile, 5 executions, 15000ms max tracked
- p98:2:10000ms - 98.0 percentile, 2 executions allowed, 10s max tracked
- Tms:E - where :E is optional and
T is a constant threshold latency and
E is the allowed number of concurrent speculative retries
(E default to 5 when left out)
Examples:
- 100ms:5 - constant threshold of 100ms and 5 allowed executions
- **seq** - selects the statement sequencer used with statement ratios.
(default: bucket)
(options: concat | bucket | interval)
The concat sequencer repeats each statement in order until the ratio
is achieved.
The bucket sequencer uses simple round-robin distribution to plan
statement ratios, a simple but unbalanced form of interleaving.
The interval sequencer apportions statements over time and then by
order of appearance for ties. This has the effect of interleaving
statements from an activity more evenly, but is less obvious in how
it works.
All of the sequencers create deterministic schedules which use an internal
lookup table for indexing into a list of possible statements.
- **trace** - enables a trace on a subset of operations. This is disabled
by default.
Examples:
`trace=modulo:100,filename:trace.log`
The above traces every 100th cycle to a file named trace.log.
`trace=modulo:1000,filename:stdout`
The above traces every 1000th cycle to stdout.
If the trace log is not specified, then 'tracelog' is assumed.
If the filename is specified as stdout, then traces are dumped to stdout.
- **sessionid** - names the configuration to be used for this activity. Within a given scenario, any activities that use
the same name for clusterid will share a session and cluster. default: 'default'
- **drivermetrics** - enable reporting of driver metrics.
default: false
- **driverprefix** - set the metrics name that will prefix all CQL driver metrics.
default: 'driver.*clusterid*.'
The clusterid specified is included so that separate cluster and session
contexts can be reported independently for advanced tests.
- **usercodecs** - enable the loading of user codec libraries for more
details see: com.datastax.codecs.framework.UDTCodecInjector in the
nosqlbench code base. This is for dynamic codec loading with
user-provided codecs mapped via the internal UDT APIs. default: false
- **secureconnectbundle** - used to connect to CaaS, accepts a path to the
secure connect bundle that is downloaded from the CaaS UI. Examples:
- `secureconnectbundle=/tmp/secure-connect-my_db.zip`
- `secureconnectbundle="/home/automaton/secure-connect-my_db.zip"`
Check
out [Astra Documentation](https://docs.astra.datastax.com/docs/test-loading-data-with-nosqlbench)
for samples
- **insights** - Set to false to disable the driver from sending insights
monitoring information
- `insights=false`
- **tickduration** - sets the tickDuration (milliseconds) of
HashedWheelTimer of the java driver. This timer is used to schedule
speculative requests. Examples:
- `tickduration=10`
- `tickduration=100` (driver default value)
- **compression** - sets the transport compression to use for this
activity. Valid values are 'LZ4' and 'SNAPPY'. Both types are bundled
with EBDSE.
- **showcql** - logs cql statements as INFO (to see INFO messages in stdout use -v or greater) Note: this is expensive
and should only be done to troubleshoot workloads. Do not use `showcql` for your tests.
- **lbp** - configures the load balancing policies for the Java driver. With this parameter, you can
configure nested load balancing policies in short-hand form.
The policies available are documented in detail under the help topic `cql-loadbalancing`. See that
guide if you need more than the examples below.
Examples:
- `lbp=LAP(retry_period=3,scale=10)` - Latency aware policy with retry period of 3 seconds.
(Seconds is the default time unit, unless _ms parameter is used) and scale 10.
- `lbp=LAP(rp=3,s=10)` - Same as above, using the equivalent but terser form.
- `lbp=LAP(rp_ms=3000,s_ms=10000)` - Same as above, with milliseconds instead of
seconds.
- `loadbalancing=LAP(s=10),TAP()` - Latency aware policy, followed by
token aware policy.
### CQL YAML Parameters
A uniform YAML configuration format was introduced with engineblock 2.0.
As part of this format, statement parameters were added for the CQL Activity Type.
These parameters will be consolidated with the above parameters in time, but for
now **they are limited to a YAML params block**:
params:
ratio: 1
# Sets the statement ratio within the operation sequencer
# scheme. Integers only.
# When preparing the operation order (AKA sequencing),
# frequency of the associated statements.
cl: ONE
# Sets the consistency level, using any of the standard
# identifiers from com.datastax.driver.core.ConsistencyLevel,
# any one of:
# LOCAL_QUORUM, ANY, ONE, TWO, THREE, QUORUM, ALL,
# EACH_QUORUM, SERIAL, LOCAL_SERIAL, LOCAL_ONE
prepared: true
# By default, all statements are prepared. If you are
# creating schema, set this to false.
idempotent: false
# For statements that are known to be idempotent, set this
# to true
instrument: false
# If a statement has instrument set to true, then
# individual Timer metrics will be tracked for
# that statement for both successes and errors,
# using the given statement name.
verify: *
compare: all
# Adds two operators to the operation:
# 1) verify that there is a single row result set in the response.
# 2) verify some or all of the field values by name and/or value.
# If this option is used on any statement, then the activity will
# provide verification metrics and exceptions, including details
# of verification in the log once the activity is completed.
# For full details on this field, see the docs on cqlverify.
/// Cross-verify all fields and field values between the reference data and
/// the actual data.
all(0x1|0x1<<1|0x1<<2);
logresultcsv: true
OR
logresultcsv: myfilename.csv
# If a statement has logresultcsv set to true,
# then individual operations will be logged to a CSV file.
# In this case the CSV file will be named as
# <statement-name>--results.csv.
# If the value is present and not "true", then the value will
# be used as the name of the file.
#
# The format of the file is:
# <cycle>,(SUCCESS|FAILURE),<nanos>,<rows-fetched>,(<error-class,NONE)
# NOTES:
# 1) BE CAREFUL with this setting. A single logged line per
# result is not useful for high-speed testing as it will
# impose IO loads on the client to slow it down.
# 2) BE CAREFUL with the name. It is best to just pick good
# names for your statement defs so that everything remains
# coherent and nothing gets accidentally overwritten.
# 3) If logresultcsv is provided at the activity level, it
# applies to all statements, and the only value values
# there are true and false.
start-timers: timername1, timername2, ...
#
# If a statement has start-timers value set, then the named
# timers are started in the local thread before the
# statement is executed
#
# Together, with the stop-timers modifier, you can measure
# sequences of statements with specific named boundaries.
#
# The name of the timer is qualified with the activity alias
# just as all other metric names.
#
# This is generally only useful when the async= parameter is
# NOT used, since the scope of the timer is thread-local. When
# async is used, many operations may overlap each other in the
# same thread, breaking linearization guarantees which make
# thread local scoping helpful for tracking linearized operations.
#
# When a timer is started, a timer context is created and stored
# under this name in the thread. You must ensure that an
# associated stop-timers setting is applied to another statement
# in order to trigger the tally of these metrics.
stop-timers: timername1, timername2, ...
#
# If a statement has a stop-timers value set, then after the
# statement is finished, whether by error or by successful
# completion, the named timers are stopped and the resulting
# measurement is added to metrics.
#
# If you add stop-timers with names that do not have a matching
# start-timers name, or vice-versa then an error is thrown.
### Metrics
- alias.result - A timer which tracks the performance of an op result
only. This is the async get on the future, broken out as a separate
step.
- alias.result-success - A timer that records rate and histograms of the
time it takes from submitting a query to completely reading the result
set that it returns, across all pages. This metric is only counted for
non-exceptional results, while the result metric above includes
all operations.
- alias.bind - A timer which tracks the performance of the statement
binding logic, including the generation of data immediately prior
- alias.execute - A timer which tracks the performance of op submission
only. This is the async execution call, broken out as a separate step.
- alias.tries - A histogram of how many tries were required to get a
completed operation
- alias.pages - A timer which tracks the performance of paging, specific
to more than 1-page query results. i.e., if all reads return within 1
page, this metric will not have any data.
- alias.strides - A timer around each stride of operations within a thread
- alias.skipped-tokens - A histogram that records the count and cycle values
of skipped tokens.
## YAML Examples
Please see the bundled activities with nosqlbench for examples.

View File

@ -16,9 +16,9 @@
package io.nosqlbench.converters.cql.cql.parser;
import io.nosqlbench.converters.cql.exporters.CGWorkloadExporter;
import io.nosqlbench.converters.cql.exporters.transformers.CGModelTransformers;
import io.nosqlbench.converters.cql.parser.CqlModelParser;
import io.nosqlbench.cqlgen.exporter.CGWorkloadExporter;
import io.nosqlbench.cqlgen.exporter.transformers.CGModelTransformers;
import io.nosqlbench.cqlgen.parser.CqlModelParser;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

View File

@ -17,6 +17,7 @@
package io.nosqlbench.converters.cql.exporters;
import io.nosqlbench.api.labels.Labeled;
import io.nosqlbench.cqlgen.exporter.CGElementNamer;
import org.junit.jupiter.api.Test;
import java.util.Map;

View File

@ -16,6 +16,7 @@
package io.nosqlbench.converters.cql.exporters;
import io.nosqlbench.cqlgen.exporter.CGColumnRebinder;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
@ -24,10 +25,10 @@ public class CGWorkloadExporterTest {
@Test
public void testQuantizer() {
assertThat(CGWorkloadExporter.quantizeModuloByMagnitude(23L,1)).isEqualTo(20L);
assertThat(CGWorkloadExporter.quantizeModuloByMagnitude(234234343L,2)).isEqualTo(230000000L);
assertThat(CGWorkloadExporter.quantizeModuloByMagnitude(275234343L,3)).isEqualTo(275000000L);
assertThat(CGWorkloadExporter.quantizeModuloByMagnitude(275734343L,3)).isEqualTo(276000000L);
assertThat(CGColumnRebinder.quantizeModuloByMagnitude(23L,1)).isEqualTo(20L);
assertThat(CGColumnRebinder.quantizeModuloByMagnitude(234234343L,2)).isEqualTo(230000000L);
assertThat(CGColumnRebinder.quantizeModuloByMagnitude(275234343L,3)).isEqualTo(275000000L);
assertThat(CGColumnRebinder.quantizeModuloByMagnitude(275734343L,3)).isEqualTo(276000000L);
}
}

View File

@ -16,7 +16,7 @@
package io.nosqlbench.converters.cql.exporters;
import io.nosqlbench.converters.cql.exporters.binders.NamingFolio;
import io.nosqlbench.cqlgen.exporter.binders.NamingFolio;
import org.junit.jupiter.api.Test;
import java.util.Map;

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.cqlgen.exporter.transformers;
import org.junit.jupiter.api.Test;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.assertj.core.api.Assertions.assertThat;
public class CGRegexReplacerTest {
@Test
public void testErroredMatcher() {
// Pattern pattern = Pattern.compile("(?m)(?s)(?i)(\\s*PRIMARY KEY .*?)\\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\\b(.*)$");
Pattern pattern1 = Pattern.compile("(?m)(?i)^(\s*PRIMARY KEY.*?)\\b(options|role|roles|permissions|permission|date|key|timestamp|type|keys)\\b(.*?)$");
String tomatch = """
CREATE TABLE dse_insights_local.insights_config (
r__key int,
config dse_insights_local.insights_config_type,
PRIMARY KEY (key)
PRIMARY KEY (roles)
PRIMARY KEY (foo, bar,timestamp, baz)
options map<text, text>,
) WITH read_repair_chance = 0.0
""";
Matcher matcher = pattern1.matcher(tomatch);
StringBuilder sb = new StringBuilder();
while (matcher.find()) {
matcher.appendReplacement(sb,"$1<$2>$3");
}
matcher.appendTail(sb);
String phase1 = sb.toString();
assertThat(phase1).isEqualTo("""
CREATE TABLE dse_insights_local.insights_config (
r__key int,
config dse_insights_local.insights_config_type,
PRIMARY KEY (<key>)
PRIMARY KEY (<roles>)
PRIMARY KEY (foo, bar,<timestamp>, baz)
options map<text, text>,
) WITH read_repair_chance = 0.0
""");
sb.setLength(0);
Pattern pattern2 = Pattern.compile(
"(?m)(?i)^(\\s*?)\\b(options)\\b(\\s+[a-zA-Z][a-zA-Z<>_, -]*?,?)$"
);
Matcher matcher2 = pattern2.matcher(phase1);
while (matcher2.find()) {
matcher2.appendReplacement(sb,"$1{$2}$3");
}
matcher2.appendTail(sb);
String phase2=sb.toString();
assertThat(phase2).isEqualTo("""
CREATE TABLE dse_insights_local.insights_config (
r__key int,
config dse_insights_local.insights_config_type,
PRIMARY KEY (<key>)
PRIMARY KEY (<roles>)
PRIMARY KEY (foo, bar,<timestamp>, baz)
{options} map<text, text>,
) WITH read_repair_chance = 0.0
""");
// assertThat(matcher).matches();
// System.out.println(matcher.group(0));
// System.out.println(matcher.group(1));
// System.out.println(matcher.group(2));
// System.out.println(matcher.group(3));
}
}

View File

@ -487,6 +487,10 @@ public class SimpleActivity implements Activity, ProgressCapable {
for (int i = 0; i < pops.size(); i++) {
long ratio = ratios.get(i);
ParsedOp pop = pops.get(i);
if (ratio==0) {
logger.info("skipped mapping op '" + pop.getName() + "'");
continue;
}
DriverAdapter adapter = adapters.get(i);
OpMapper opMapper = adapter.getOpMapper();
OpDispenser<? extends Op> dispenser = opMapper.apply(pop);

View File

@ -227,7 +227,7 @@ public class NBCLI implements Function<String[], Integer> {
}
if (args.length > 0 && args[0].toLowerCase().equals("cqlgen")) {
String exporterImpl = "io.nosqlbench.converters.cql.exporters.CGWorkloadExporter";
String exporterImpl = "io.nosqlbench.cqlgen.exporter.CGWorkloadExporter";
String[] exporterArgs = Arrays.copyOfRange(args, 1, args.length);
try {
Class<?> genclass = Class.forName(exporterImpl);