incremental progress on data api workload

This commit is contained in:
Jonathan Shook 2024-05-14 10:39:20 -05:00
parent ff74e8f104
commit 2cce700eb4
10 changed files with 169 additions and 85 deletions

View File

@ -1,12 +1,12 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="SCENARIO astra DAPI dapi_novector" type="JarApplication" folderName="Astra DAPI">
<configuration default="false" name="SCENARIO astra_dapi_kv_id" type="JarApplication" folderName="Astra DAPI">
<extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
<option name="credential" />
<option name="region" />
<option name="useCurrentConnection" value="false" />
</extension>
<option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
<option name="PROGRAM_PARAMETERS" value="astra_kv_dapi dapi_novector collection=baselines astraTokenFile=target/token astraApiEndpointFile=target/endpoint -v" />
<option name="PROGRAM_PARAMETERS" value="astra_kv_dapi astra_dapi_kv_id collection=baselines astraTokenFile=target/token astraApiEndpointFile=target/endpoint -v" />
<option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/dataapi" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
<option name="ALTERNATIVE_JRE_PATH" value="21" />

View File

@ -0,0 +1,15 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="SCENARIO astra_dapi_v1536_id threads=1" type="JarApplication" folderName="Astra DAPI">
<extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
<option name="credential" />
<option name="region" />
<option name="useCurrentConnection" value="false" />
</extension>
<option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
<option name="PROGRAM_PARAMETERS" value="astra_kv_dapi astra_dapi_v1536_id collection=baselines astraTokenFile=target/token astraApiEndpointFile=target/endpoint -v threads=1" />
<option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/dataapi" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
<option name="ALTERNATIVE_JRE_PATH" value="21" />
<method v="2" />
</configuration>
</component>

View File

@ -28,6 +28,8 @@ import io.nosqlbench.nb.api.labels.NBLabels;
import java.util.function.Function;
// TODO: Add details to dataapi.md in main resources folder, a la cqld4.md
@Service(value = DriverAdapter.class, selector = "dataapi")
public class DataApiDriverAdapter extends BaseDriverAdapter<DataApiBaseOp, DataApiSpace> {
public DataApiDriverAdapter(NBComponent parent, NBLabels childLabels) {

View File

@ -14,14 +14,14 @@
* limitations under the License.
*/
package io.nosqlbench.adapter.dataapi;
package io.nosqlbench.adapter.dataapi.opdispensers;
import com.datastax.astra.client.Database;
import com.datastax.astra.client.model.Filter;
import com.datastax.astra.client.model.FindOptions;
import com.datastax.astra.client.model.Projection;
import com.datastax.astra.client.model.Sort;
import io.nosqlbench.adapter.dataapi.opdispensers.DataApiOpDispenser;
import io.nosqlbench.adapter.dataapi.DataApiDriverAdapter;
import io.nosqlbench.adapter.dataapi.ops.DataApiBaseOp;
import io.nosqlbench.adapter.dataapi.ops.DataApiFindVectorFilterOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;

View File

@ -58,9 +58,9 @@ public abstract class DataApiOpDispenser extends BaseOpDispenser<DataApiBaseOp,
protected Filter getFilterFromOp(ParsedOp op, long l) {
// TODO: Clarify 'filter' vs 'filters' or whether to support both uniformly
Filter filter = null;
Optional<LongFunction<List>> filterFunction = op.getAsOptionalFunction("filters", List.class).or(
() -> op.getAsOptionalFunction("filter",List.class)
);
Optional<LongFunction<List>> filterFunction = op.getAsOptionalFunction("filters", List.class)
.or(() -> op.getAsOptionalFunction("filter",List.class));
if (filterFunction.isPresent()) {
List<Map<String,Object>> filters = filterFunction.get().apply(l);
List<Filter> andFilterList = new ArrayList<>();

View File

@ -16,8 +16,10 @@
package io.nosqlbench.adapter.dataapi.ops;
import com.datastax.astra.client.Collection;
import com.datastax.astra.client.Database;
import com.datastax.astra.client.model.Document;
import com.datastax.astra.client.model.InsertOneResult;
public class DataApiInsertOneVectorOp extends DataApiBaseOp {
private final Document doc;
@ -33,6 +35,8 @@ public class DataApiInsertOneVectorOp extends DataApiBaseOp {
@Override
public Object apply(long value) {
return db.getCollection(collectionName).insertOne(doc, vector);
Collection<Document> collection = db.getCollection(collectionName);
InsertOneResult result = collection.insertOne(doc, vector);
return result;
}
}

View File

@ -3,71 +3,118 @@ min_version: "5.21.0"
description: |
A basic workload that uses the DataStax Data API Client in Java, emulating what
applications would do in the native stack.
TEMPLATE(cardinality,1000)
TEMPLATE(collection,keyvalue)
TEMPLATE(dimensions,1536)
TEMPLATE(similarity,COSINE)
TEMPLATE(keycount,TEMPLATE(cardinality))
TEMPLATE(valuecount,TEMPLATE(cardinality))
variations:
without vector:
with _id:
search by id=
search by id<
NOT USED: with uuid:
with vector:
with _id:
search by id=
search by id<
vector search with filter by id<
vector search with vector ~
with uuid:
vector search with vector ~
cardinality=TEMPLATE(cardinality,1000)
collection=TEMPLATE(collection,keyvalue)
dimensions=TEMPLATE(dimensions,1536)
similarity=TEMPLATE(similarity,COSINE)
keycount=TEMPLATE(keycount,TEMPLATE(cardinality))
valuecount=TEMPLATE(valuecount,TEMPLATE(cardinality))
rampup-threads=TEMPLATE(rampup-threads,100)
scenarios:
dapi_novector:
schema: run driver=dataapi tags==block:schema threads==1 cycles==UNDEF
astra_dapi_kv_id:
schema: >-
run driver=dataapi
tags==block:schema_kv
threads==1 cycles==UNDEF
rampup: >-
run driver=dataapi tags==block:rampup
run driver=dataapi
tags==block:rampup_kv_id
cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality))
threads=TEMPLATE(rampup-threads)
errors=count
find_kv_by_id: >-
run driver=dataapi
tags==block:find_kv_by_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count
find_key: >-
run driver=dataapi tags==block:find_key
find_kv_lt_id: >-
run driver=dataapi
tags==block:find_kv_lt_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count
dapi_vector_d1536:
schema_vector: run driver=dataapi tags==block:schema_vector threads==1 cycles==UNDEF
astra_dapi_v1536_id:
schema_vector: >-
run driver=dataapi
tags==block:schema_v1536
threads==1 cycles==UNDEF
rampup_vector: >-
run driver=dataapi tags==block:rampup_vector
run driver=dataapi
tags==block:rampup_v1536_id
cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality))
threads=TEMPLATE(rampup-threads)
errors=count
find_kv_by_id: >-
run driver=dataapi
tags==block:find_kv_by_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count
find_key_vector: >-
run driver=dataapi tags==block:find_key_vector
find_kv_lt_id: >-
run driver=dataapi
tags==block:find_kv_lt_id
cycles===TEMPLATE(main-cycles,1000)
threads=auto errors=count
find_by_vector: >-
run driver=dataapi
tags==block:find_by_vector
cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality))
threads=auto errors=count
find_by_vector_lt_id: >-
run driver=dataapi
tags==block:find_by_vector_lt_id
cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality))
threads=auto errors=count
# kv_dapi:
# kv_dapi_schema: run driver=http tags==block:schema threads==1 cycles==UNDEF
# kv_dapi_rampup: run driver=http tags==block:rampup cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
# kv_dapi_main: run driver=http tags==block:"main.*" cycles===TEMPLATE(main-cycles,10000000) threads=auto
# basic_check:
# schema: run driver=http tags==block:schema threads==1 cycles==UNDEF
# rampup: run driver=http tags==block:rampup cycles===TEMPLATE(rampup-cycles,10) threads=auto
# main: run driver=http tags==block:"main.*" cycles===TEMPLATE(main-cycles,10) threads=auto
astra_dapi_v1536_uuid:
schema_vector: >-
run driver=dataapi
tags==block:schema_v1536
threads==1 cycles==UNDEF
rampup_vector: >-
run driver=dataapi
tags==block:rampup_v1536_uuid
cycles===TEMPLATE(rampup-cycles,TEMPLATE(cardinality))
threads=TEMPLATE(rampup-threads)
errors=count
find_by_vector: >-
run driver=dataapi
tags==block:find_by_vector
cycles===TEMPLATE(main-cycles,TEMPLATE(cardinality))
threads=auto errors=count
bindings:
# To enable an optional weighted set of hosts in place of a load balancer
# Examples
# single host: jsonapi_host=host1
# multiple hosts: jsonapi_host=host1,host2,host3
# multiple weighted hosts: jsonapi_host=host1:3,host2:7
weighted_hosts: WeightedStrings('TEMPLATE(jsonapi_host,TEMPLATE(stargate_host,localhost))')
seq_key: Mod(TEMPLATE(keycount)); ToString() -> String
# seq_key: Mod(TEMPLATE(keycount,50000000000L));
seq_value: Hash(); Mod(TEMPLATE(valuecount)); ToString() -> String
# rw_key: TEMPLATE(keydist,Uniform(0,50000000000L));
rw_key: TEMPLATE(keydist,Uniform(0,TEMPLATE(keycount))); ToString() -> String
rw_key_num: TEMPLATE(keydist,Uniform(0,TEMPLATE(keycount)));
rw_value: Hash(); TEMPLATE(valdist,Uniform(0,TEMPLATE(valuecount))); ToString() -> String
vector_value: HashedFloatVectors(TEMPLATE(dimensions,1536));
request_id: ToHashedUUID(); ToString();
params:
cl: TEMPLATE(cl,LOCAL_QUORUM)
params:
cl: TEMPLATE(cl,LOCAL_QUORUM)
blocks:
reset_schema:
ops:
drop_index:
@ -77,15 +124,16 @@ blocks:
raw: |-
DROP TABLE IF EXISTS TEMPLATE(keyspace, baselines).TEMPLATE(table,keyvalue);
schema:
# Schema
schema_kv:
ops:
delete_collection_op:
delete_collection: "TEMPLATE(collection)"
create_collection_op:
create_collection: "TEMPLATE(collection)"
# separate these cases later, when you can recreate the same collection name with/without vector support
schema_vector:
schema_v1536:
ops:
delete_collection_op_v:
delete_collection: "TEMPLATE(collection)_v"
@ -94,7 +142,9 @@ blocks:
dimensions: TEMPLATE(dimensions)
similarity: TEMPLATE(similarity)
rampup:
# Rampup
rampup_kv_id:
ops:
insert_one_op:
insert_one: "TEMPLATE(collection)"
@ -102,7 +152,15 @@ blocks:
_id: "{seq_key}"
value: "{seq_value}"
rampup_vector:
# rampup_kv_uuid:
# ops:
# insert_one_op:
# insert_one: "TEMPLATE(collection)"
# document:
# value: "{seq_value}"
rampup_v1536_id:
ops:
insert_one_op_v:
insert_one_vector: "TEMPLATE(collection)_v"
@ -111,17 +169,41 @@ blocks:
value: "{seq_value}"
vector: "{vector_value}"
# rampup-uuid:
# ops:
# insert_one_op:
# insert-one: "TEMPLATE(collection)"
# document:
# value: "{seq_value}"
find_key:
rampup_v1536_uuid:
ops:
insert_one_op_v:
insert_one_vector: "TEMPLATE(collection)_v"
document:
value: "{seq_value}"
vector: "{vector_value}"
find_kv_by_id:
ops:
find_op:
find: "TEMPLATE(collection)"
filters:
- conjunction: "and"
operator: "eq"
field: "_id"
value: "{rw_key}"
find_kv_lt_id:
params:
ratio: 5
ops:
find_op:
find_kv_id_lt:
find: "TEMPLATE(collection)"
filters:
- conjunction: "and"
operator: "lt"
field: "_id"
value: "{rw_key_num}"
find_by_vector_lt_id:
params:
ratio: 5
ops:
find_kv_id_lt:
find: "TEMPLATE(collection)"
filters:
- conjunction: "and"
@ -130,36 +212,14 @@ blocks:
value: "{rw_key_num}"
vector: "{vector_value}"
find_key_vector:
find_by_vector:
params:
ratio: 5
ops:
find_op_v:
find_op_filter_v:
find_vector_filter: "TEMPLATE(collection)"
filters:
- conjunction: "and"
operator: "lt"
field: "_id"
value: "{rw_key_num}"
vector: "{vector_value}"
#
# rampup_with_vector_uuid:
# ops:
# insert_one_op:
# insert_one: "TEMPLATE(collection)"
# document:
# value: "{seq_value}"
# vector: "{vector_value}"
#
# main_read_with_vector:
# ops:
# find_op:
# find: "TEMPLATE(collection)"
# filter:
# _id: "{rw_key}"
#
# main_ann_with_vector_limit_20:
# params:
# ratio: 5

View File

@ -0,0 +1,3 @@
# Data API
## DataStax Data API Adapter

View File

@ -200,7 +200,7 @@
<profile>
<id>adapter-s4j-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>

View File

@ -147,7 +147,7 @@
<profile>
<id>adapter-s4j-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-s4j</module>