incremental progress on data api workload

This commit is contained in:
Jonathan Shook 2024-05-14 10:39:20 -05:00
parent ff74e8f104
commit 2cce700eb4
10 changed files with 169 additions and 85 deletions

View File

@ -1,12 +1,12 @@
<component name="ProjectRunConfigurationManager"> <component name="ProjectRunConfigurationManager">
<configuration default="false" name="SCENARIO astra DAPI dapi_novector" type="JarApplication" folderName="Astra DAPI"> <configuration default="false" name="SCENARIO astra_dapi_kv_id" type="JarApplication" folderName="Astra DAPI">
<extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension"> <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
<option name="credential" /> <option name="credential" />
<option name="region" /> <option name="region" />
<option name="useCurrentConnection" value="false" /> <option name="useCurrentConnection" value="false" />
</extension> </extension>
<option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" /> <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
<option name="PROGRAM_PARAMETERS" value="astra_kv_dapi dapi_novector collection=baselines astraTokenFile=target/token astraApiEndpointFile=target/endpoint -v" /> <option name="PROGRAM_PARAMETERS" value="astra_kv_dapi astra_dapi_kv_id collection=baselines astraTokenFile=target/token astraApiEndpointFile=target/endpoint -v" />
<option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/dataapi" /> <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/dataapi" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" /> <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
<option name="ALTERNATIVE_JRE_PATH" value="21" /> <option name="ALTERNATIVE_JRE_PATH" value="21" />

View File

@ -0,0 +1,15 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="SCENARIO astra_dapi_v1536_id threads=1" type="JarApplication" folderName="Astra DAPI">
<extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
<option name="credential" />
<option name="region" />
<option name="useCurrentConnection" value="false" />
</extension>
<option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
<option name="PROGRAM_PARAMETERS" value="astra_kv_dapi astra_dapi_v1536_id collection=baselines astraTokenFile=target/token astraApiEndpointFile=target/endpoint -v threads=1" />
<option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/dataapi" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
<option name="ALTERNATIVE_JRE_PATH" value="21" />
<method v="2" />
</configuration>
</component>

View File

@ -28,6 +28,8 @@ import io.nosqlbench.nb.api.labels.NBLabels;
import java.util.function.Function; import java.util.function.Function;
// TODO: Add details to dataapi.md in main resources folder, a la cqld4.md
@Service(value = DriverAdapter.class, selector = "dataapi") @Service(value = DriverAdapter.class, selector = "dataapi")
public class DataApiDriverAdapter extends BaseDriverAdapter<DataApiBaseOp, DataApiSpace> { public class DataApiDriverAdapter extends BaseDriverAdapter<DataApiBaseOp, DataApiSpace> {
public DataApiDriverAdapter(NBComponent parent, NBLabels childLabels) { public DataApiDriverAdapter(NBComponent parent, NBLabels childLabels) {

View File

@ -14,14 +14,14 @@
* limitations under the License. * limitations under the License.
*/ */
package io.nosqlbench.adapter.dataapi; package io.nosqlbench.adapter.dataapi.opdispensers;
import com.datastax.astra.client.Database; import com.datastax.astra.client.Database;
import com.datastax.astra.client.model.Filter; import com.datastax.astra.client.model.Filter;
import com.datastax.astra.client.model.FindOptions; import com.datastax.astra.client.model.FindOptions;
import com.datastax.astra.client.model.Projection; import com.datastax.astra.client.model.Projection;
import com.datastax.astra.client.model.Sort; import com.datastax.astra.client.model.Sort;
import io.nosqlbench.adapter.dataapi.opdispensers.DataApiOpDispenser; import io.nosqlbench.adapter.dataapi.DataApiDriverAdapter;
import io.nosqlbench.adapter.dataapi.ops.DataApiBaseOp; import io.nosqlbench.adapter.dataapi.ops.DataApiBaseOp;
import io.nosqlbench.adapter.dataapi.ops.DataApiFindVectorFilterOp; import io.nosqlbench.adapter.dataapi.ops.DataApiFindVectorFilterOp;
import io.nosqlbench.adapters.api.templating.ParsedOp; import io.nosqlbench.adapters.api.templating.ParsedOp;

View File

@ -58,9 +58,9 @@ public abstract class DataApiOpDispenser extends BaseOpDispenser<DataApiBaseOp,
protected Filter getFilterFromOp(ParsedOp op, long l) { protected Filter getFilterFromOp(ParsedOp op, long l) {
// TODO: Clarify 'filter' vs 'filters' or whether to support both uniformly // TODO: Clarify 'filter' vs 'filters' or whether to support both uniformly
Filter filter = null; Filter filter = null;
Optional<LongFunction<List>> filterFunction = op.getAsOptionalFunction("filters", List.class).or( Optional<LongFunction<List>> filterFunction = op.getAsOptionalFunction("filters", List.class)
() -> op.getAsOptionalFunction("filter",List.class) .or(() -> op.getAsOptionalFunction("filter",List.class));
);
if (filterFunction.isPresent()) { if (filterFunction.isPresent()) {
List<Map<String,Object>> filters = filterFunction.get().apply(l); List<Map<String,Object>> filters = filterFunction.get().apply(l);
List<Filter> andFilterList = new ArrayList<>(); List<Filter> andFilterList = new ArrayList<>();

View File

@ -16,8 +16,10 @@
package io.nosqlbench.adapter.dataapi.ops; package io.nosqlbench.adapter.dataapi.ops;
import com.datastax.astra.client.Collection;
import com.datastax.astra.client.Database; import com.datastax.astra.client.Database;
import com.datastax.astra.client.model.Document; import com.datastax.astra.client.model.Document;
import com.datastax.astra.client.model.InsertOneResult;
public class DataApiInsertOneVectorOp extends DataApiBaseOp { public class DataApiInsertOneVectorOp extends DataApiBaseOp {
private final Document doc; private final Document doc;
@ -33,6 +35,8 @@ public class DataApiInsertOneVectorOp extends DataApiBaseOp {
@Override @Override
public Object apply(long value) { public Object apply(long value) {
return db.getCollection(collectionName).insertOne(doc, vector); Collection<Document> collection = db.getCollection(collectionName);
InsertOneResult result = collection.insertOne(doc, vector);
return result;
} }
} }

View File

@ -3,71 +3,118 @@ min_version: "5.21.0"
description: | description: |
A basic workload that uses the DataStax Data API Client in Java, emulating what A basic workload that uses the DataStax Data API Client in Java, emulating what
applications would do in the native stack. applications would do in the native stack.
TEMPLATE(cardinality,1000) variations:
TEMPLATE(collection,keyvalue) without vector:
TEMPLATE(dimensions,1536) with _id:
TEMPLATE(similarity,COSINE) search by id=
TEMPLATE(keycount,TEMPLATE(cardinality)) search by id<
TEMPLATE(valuecount,TEMPLATE(cardinality)) NOT USED: with uuid:
with vector:
with _id:
search by id=
search by id<
vector search with filter by id<
vector search with vector ~
with uuid:
vector search with vector ~
cardinality=TEMPLATE(cardinality,1000)
collection=TEMPLATE(collection,keyvalue)
dimensions=TEMPLATE(dimensions,1536)
similarity=TEMPLATE(similarity,COSINE)
keycount=TEMPLATE(keycount,TEMPLATE(cardinality))
valuecount=TEMPLATE(valuecount,TEMPLATE(cardinality))
rampup-threads=TEMPLATE(rampup-threads,100)
scenarios: scenarios:
dapi_novector:
schema: run driver=dataapi tags==block:schema threads==1 cycles==UNDEF astra_dapi_kv_id:
schema: >-
run driver=dataapi
tags==block:schema_kv
threads==1 cycles==UNDEF
rampup: >- rampup: >-
run driver=dataapi tags==block:rampup run driver=dataapi
tags==block:rampup_kv_id
cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality)) cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality))
threads=TEMPLATE(rampup-threads)
errors=count
find_kv_by_id: >-
run driver=dataapi
tags==block:find_kv_by_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count threads=auto errors=count
find_key: >- find_kv_lt_id: >-
run driver=dataapi tags==block:find_key run driver=dataapi
tags==block:find_kv_lt_id
cycles===TEMPLATE(main-cycles,1000) cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count threads=auto errors=count
dapi_vector_d1536: astra_dapi_v1536_id:
schema_vector: run driver=dataapi tags==block:schema_vector threads==1 cycles==UNDEF schema_vector: >-
run driver=dataapi
tags==block:schema_v1536
threads==1 cycles==UNDEF
rampup_vector: >- rampup_vector: >-
run driver=dataapi tags==block:rampup_vector run driver=dataapi
tags==block:rampup_v1536_id
cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality)) cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality))
threads=TEMPLATE(rampup-threads)
errors=count
find_kv_by_id: >-
run driver=dataapi
tags==block:find_kv_by_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count threads=auto errors=count
find_key_vector: >- find_kv_lt_id: >-
run driver=dataapi tags==block:find_key_vector run driver=dataapi
tags==block:find_kv_lt_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count
find_by_vector: >-
run driver=dataapi
tags==block:find_by_vector
cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality))
threads=auto errors=count
find_by_vector_lt_id: >-
run driver=dataapi
tags==block:find_by_vector_lt_id
cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality)) cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality))
threads=auto errors=count threads=auto errors=count
astra_dapi_v1536_uuid:
schema_vector: >-
# kv_dapi: run driver=dataapi
# kv_dapi_schema: run driver=http tags==block:schema threads==1 cycles==UNDEF tags==block:schema_v1536
# kv_dapi_rampup: run driver=http tags==block:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto threads==1 cycles==UNDEF
# kv_dapi_main: run driver=http tags==block:"main.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto rampup_vector: >-
run driver=dataapi
# basic_check: tags==block:rampup_v1536_uuid
# schema: run driver=http tags==block:schema threads==1 cycles==UNDEF cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality))
# rampup: run driver=http tags==block:rampup cycles===TEMPLATE(rampup-cycles,10) threads=auto threads=TEMPLATE(rampup-threads)
# main: run driver=http tags==block:"main.*" cycles===TEMPLATE(main-cycles,10) threads=auto errors=count
find_by_vector: >-
run driver=dataapi
tags==block:find_by_vector
cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality))
threads=auto errors=count
bindings: bindings:
# To enable an optional weighted set of hosts in place of a load balancer
# Examples
# single host: jsonapi_host=host1
# multiple hosts: jsonapi_host=host1,host2,host3
# multiple weighted hosts: jsonapi_host=host1:3,host2:7
weighted_hosts: WeightedStrings('TEMPLATE(jsonapi_host,TEMPLATE(stargate_host,localhost))')
seq_key: Mod(TEMPLATE(keycount)); ToString() -> String seq_key: Mod(TEMPLATE(keycount)); ToString() -> String
# seq_key: Mod(TEMPLATE(keycount,50000000000L));
seq_value: Hash(); Mod(TEMPLATE(valuecount)); ToString() -> String seq_value: Hash(); Mod(TEMPLATE(valuecount)); ToString() -> String
# rw_key: TEMPLATE(keydist,Uniform(0,50000000000L));
rw_key: TEMPLATE(keydist,Uniform(0,TEMPLATE(keycount))); ToString() -> String rw_key: TEMPLATE(keydist,Uniform(0,TEMPLATE(keycount))); ToString() -> String
rw_key_num: TEMPLATE(keydist,Uniform(0,TEMPLATE(keycount))); rw_key_num: TEMPLATE(keydist,Uniform(0,TEMPLATE(keycount)));
rw_value: Hash(); TEMPLATE(valdist,Uniform(0,TEMPLATE(valuecount))); ToString() -> String rw_value: Hash(); TEMPLATE(valdist,Uniform(0,TEMPLATE(valuecount))); ToString() -> String
vector_value: HashedFloatVectors(TEMPLATE(dimensions,1536)); vector_value: HashedFloatVectors(TEMPLATE(dimensions,1536));
request_id: ToHashedUUID(); ToString(); request_id: ToHashedUUID(); ToString();
params: params:
cl: TEMPLATE(cl,LOCAL_QUORUM) cl: TEMPLATE(cl,LOCAL_QUORUM)
blocks: blocks:
reset_schema: reset_schema:
ops: ops:
drop_index: drop_index:
@ -77,15 +124,16 @@ blocks:
raw: |- raw: |-
DROP TABLE IF EXISTS TEMPLATE(keyspace, baselines).TEMPLATE(table,keyvalue); DROP TABLE IF EXISTS TEMPLATE(keyspace, baselines).TEMPLATE(table,keyvalue);
schema: # Schema
schema_kv:
ops: ops:
delete_collection_op: delete_collection_op:
delete_collection: "TEMPLATE(collection)" delete_collection: "TEMPLATE(collection)"
create_collection_op: create_collection_op:
create_collection: "TEMPLATE(collection)" create_collection: "TEMPLATE(collection)"
# separate these cases later, when you can recreate the same collection name with/without vector support schema_v1536:
schema_vector:
ops: ops:
delete_collection_op_v: delete_collection_op_v:
delete_collection: "TEMPLATE(collection)_v" delete_collection: "TEMPLATE(collection)_v"
@ -94,7 +142,9 @@ blocks:
dimensions: TEMPLATE(dimensions) dimensions: TEMPLATE(dimensions)
similarity: TEMPLATE(similarity) similarity: TEMPLATE(similarity)
rampup: # Rampup
rampup_kv_id:
ops: ops:
insert_one_op: insert_one_op:
insert_one: "TEMPLATE(collection)" insert_one: "TEMPLATE(collection)"
@ -102,7 +152,15 @@ blocks:
_id: "{seq_key}" _id: "{seq_key}"
value: "{seq_value}" value: "{seq_value}"
rampup_vector: # rampup_kv_uuid:
# ops:
# insert_one_op:
# insert_one: "TEMPLATE(collection)"
# document:
# value: "{seq_value}"
rampup_v1536_id:
ops: ops:
insert_one_op_v: insert_one_op_v:
insert_one_vector: "TEMPLATE(collection)_v" insert_one_vector: "TEMPLATE(collection)_v"
@ -111,17 +169,41 @@ blocks:
value: "{seq_value}" value: "{seq_value}"
vector: "{vector_value}" vector: "{vector_value}"
# rampup-uuid: rampup_v1536_uuid:
# ops: ops:
# insert_one_op: insert_one_op_v:
# insert-one: "TEMPLATE(collection)" insert_one_vector: "TEMPLATE(collection)_v"
# document: document:
# value: "{seq_value}" value: "{seq_value}"
find_key: vector: "{vector_value}"
find_kv_by_id:
ops:
find_op:
find: "TEMPLATE(collection)"
filters:
- conjunction: "and"
operator: "eq"
field: "_id"
value: "{rw_key}"
find_kv_lt_id:
params: params:
ratio: 5 ratio: 5
ops: ops:
find_op: find_kv_id_lt:
find: "TEMPLATE(collection)"
filters:
- conjunction: "and"
operator: "lt"
field: "_id"
value: "{rw_key_num}"
find_by_vector_lt_id:
params:
ratio: 5
ops:
find_kv_id_lt:
find: "TEMPLATE(collection)" find: "TEMPLATE(collection)"
filters: filters:
- conjunction: "and" - conjunction: "and"
@ -130,36 +212,14 @@ blocks:
value: "{rw_key_num}" value: "{rw_key_num}"
vector: "{vector_value}" vector: "{vector_value}"
find_key_vector: find_by_vector:
params: params:
ratio: 5 ratio: 5
ops: ops:
find_op_v: find_op_filter_v:
find_vector_filter: "TEMPLATE(collection)" find_vector_filter: "TEMPLATE(collection)"
filters:
- conjunction: "and"
operator: "lt"
field: "_id"
value: "{rw_key_num}"
vector: "{vector_value}" vector: "{vector_value}"
#
# rampup_with_vector_uuid:
# ops:
# insert_one_op:
# insert_one: "TEMPLATE(collection)"
# document:
# value: "{seq_value}"
# vector: "{vector_value}"
#
# main_read_with_vector:
# ops:
# find_op:
# find: "TEMPLATE(collection)"
# filter:
# _id: "{rw_key}"
#
# main_ann_with_vector_limit_20: # main_ann_with_vector_limit_20:
# params: # params:
# ratio: 5 # ratio: 5

View File

@ -0,0 +1,3 @@
# Data API
## DataStax Data API Adapter

View File

@ -200,7 +200,7 @@
<profile> <profile>
<id>adapter-s4j-include</id> <id>adapter-s4j-include</id>
<activation> <activation>
<activeByDefault>false</activeByDefault> <activeByDefault>true</activeByDefault>
</activation> </activation>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -147,7 +147,7 @@
<profile> <profile>
<id>adapter-s4j-module</id> <id>adapter-s4j-module</id>
<activation> <activation>
<activeByDefault>false</activeByDefault> <activeByDefault>true</activeByDefault>
</activation> </activation>
<modules> <modules>
<module>adapter-s4j</module> <module>adapter-s4j</module>