Initial implementation of gcp spanner adapter

reverting jdbc changes for pr

renames and cleanup
This commit is contained in:
Mark Wolters 2024-09-25 16:26:44 -04:00
parent ffd6c22e6a
commit eb33818ce0
23 changed files with 389 additions and 310 deletions

View File

@ -18,10 +18,17 @@ package io.nosqlbench.adapter.gcpspanner;
import com.google.cloud.spanner.ResultSet;
import java.util.ArrayList;
import java.util.List;
public class GCPSpannerAdapterUtils {
public static final String SPANNER = "gcp_spanner";
public static int[] getKeyArrayFromResultSet(ResultSet rs) {
return rs.getLongList(0).stream().mapToInt(Math::toIntExact).toArray();
List<Integer> values = new ArrayList<>();
while(rs.next()) {
values.add(Integer.valueOf(rs.getString(0)));
}
return values.stream().mapToInt(i -> i).toArray();
}
}

View File

@ -57,8 +57,8 @@ public class GCPSpannerOpMapper implements OpMapper<GCPSpannerBaseOp<?>> {
return switch (typeAndTarget.enumId) {
case update_database_ddl ->
new GCPSpannerUpdateDatabaseDdlOpDispenser(adapter, op, typeAndTarget.targetFunction);
case insert_vector ->
new GCPSpannerInsertVectorOpDispenser(adapter, op, typeAndTarget.targetFunction);
case insert ->
new GCPSpannerInsertOpDispenser(adapter, op, typeAndTarget.targetFunction);
case execute_dml ->
new GCPSpannerExecuteDmlOpDispenser(adapter, op, typeAndTarget.targetFunction);
};

View File

@ -31,8 +31,8 @@ import org.apache.logging.log4j.Logger;
* @see <a href="https://cloud.google.com/docs/authentication#getting_credentials_for_server-centric_flow">Authentication methods at Google</a>
* @see <a href="https://cloud.google.com/java/docs/reference/google-cloud-spanner/latest/overview">Library Reference Doc</a>
* @see <a href="https://cloud.google.com/spanner/docs/reference/standard-sql/dml-syntax">DML Syntax</a>
* @see <a href=""></a>
* @see <a href=""></a>
* @see <a href="https://cloud.google.com/spanner/docs/reference/rpc">spanner rpc api calls</a>
* @see <a href="https://cloud.google.com/spanner/docs/reference/standard-sql/data-definition-language#vector_index_statements">SQL functionality related to vector indices</a>
* @see <a href=""></a>
* @see <a href=""></a>
* @see <a href=""></a>
@ -60,7 +60,7 @@ public class GCPSpannerSpace implements AutoCloseable {
public synchronized Spanner getSpanner() {
if (spanner == null) {
spanner = createSpanner();
createSpanner();
}
return spanner;
}
@ -85,8 +85,8 @@ public class GCPSpannerSpace implements AutoCloseable {
return cfg.get("database_id");
}
private Spanner createSpanner() {
if (/*cfg.getOptional("service_account_file").isEmpty() ||*/
private void createSpanner() {
if (
cfg.getOptional("database_id").isEmpty() ||
cfg.getOptional("project_id").isEmpty() ||
cfg.getOptional("instance_id").isEmpty()) {
@ -95,18 +95,14 @@ public class GCPSpannerSpace implements AutoCloseable {
String projectId = cfg.get("project_id");
String instanceId = cfg.get("instance_id");
String databaseId = cfg.get("database_id");
var spannerClient = SpannerOptions.newBuilder().setProjectId(projectId).build().getService();
dbAdminClient = spannerClient.getDatabaseAdminClient();
spanner = SpannerOptions.newBuilder().setProjectId(projectId).build().getService();
dbAdminClient = spanner.getDatabaseAdminClient();
dbClient = spanner.getDatabaseClient(DatabaseId.of(projectId, instanceId, databaseId));
return spannerClient;
}
public static NBConfigModel getConfigModel() {
return ConfigModel.of(GCPSpannerSpace.class)
.add(Param.optional("service_account_file", String.class, "the file to load the api token/key from. See https://cloud.google.com/docs/authentication/provide-credentials-adc#service-account"))
// .add(Param.defaultTo("token", "my-spanner-admin-key-changeme")
// .setDescription("the Spanner api token/key to use to connect to the database"))
.add(Param.optional("project_id", String.class,"Project ID containing the Spanner database. See https://cloud.google.com/resource-manager/docs/creating-managing-projects"))
.add(Param.optional("instance_id", String.class, "Spanner database's Instance ID containing. See https://cloud.google.com/spanner/docs/getting-started/java#create_an_instance"))
.add(Param.optional("database_id", String.class, "Spanner Database ID. See https://cloud.google.com/spanner/docs/getting-started/java#create_a_database"))

View File

@ -25,10 +25,29 @@ import io.nosqlbench.adapters.api.templating.ParsedOp;
import java.util.function.LongFunction;
/**
* Abstract base class for GCP Spanner operation dispensers.
* This class extends the BaseOpDispenser and provides common functionality
* for creating GCP Spanner operations.
*/
public abstract class GCPSpannerBaseOpDispenser extends BaseOpDispenser<GCPSpannerBaseOp<?>, GCPSpannerSpace> {
/**
* A function that provides the target string based on a long input.
*/
protected final LongFunction<String> targetFunction;
/**
* A function that provides the GCP Spanner space based on a long input.
*/
protected final LongFunction<GCPSpannerSpace> spaceFunction;
/**
* Constructs a new GCPSpannerBaseOpDispenser.
*
* @param adapter the driver adapter for GCP Spanner operations
* @param op the parsed operation
* @param targetFunction a function that provides the target string
*/
protected GCPSpannerBaseOpDispenser(DriverAdapter<? extends GCPSpannerBaseOp<?>, GCPSpannerSpace> adapter, ParsedOp op,
LongFunction<String> targetFunction) {
super(adapter, op);

View File

@ -27,29 +27,58 @@ import org.apache.logging.log4j.Logger;
import java.util.function.LongFunction;
/**
* Dispenser class for creating GCP Spanner Execute DML operations.
* This class extends the GCPSpannerBaseOpDispenser and provides functionality
* to create and configure GCPSpannerExecuteDmlOp instances.
*/
public class GCPSpannerExecuteDmlOpDispenser extends GCPSpannerBaseOpDispenser {
private static final Logger logger = LogManager.getLogger(GCPSpannerExecuteDmlOpDispenser.class);
private final LongFunction<GCPSpannerExecuteDmlOp> opFunction;
/**
* Constructs a new GCPSpannerExecuteDmlOpDispenser.
*
* @param adapter the driver adapter for GCP Spanner operations
* @param op the parsed operation
* @param targetFunction a function that provides the target string
*/
public GCPSpannerExecuteDmlOpDispenser(GCPSpannerDriverAdapter adapter, ParsedOp op, LongFunction<String> targetFunction) {
super(adapter, op, targetFunction);
this.opFunction = createOpFunction(op);
}
/**
* Creates a function that generates GCPSpannerExecuteDmlOp instances.
*
* @param op the parsed operation
* @return a function that generates GCPSpannerExecuteDmlOp instances
*/
private LongFunction<GCPSpannerExecuteDmlOp> createOpFunction(ParsedOp op) {
return (l) -> new GCPSpannerExecuteDmlOp(
spaceFunction.apply(l).getSpanner(),
l,
generateStatement(op.getAsRequiredFunction("DML", String.class).apply(l)),
generateStatement(targetFunction.apply(l)),
spaceFunction.apply(l).getDbClient()
);
}
/**
* Generates a Spanner Statement from a DML string.
*
* @param dml the DML string
* @return the generated Statement
*/
private Statement generateStatement(String dml) {
return Statement.of(dml);
}
/**
* Retrieves the GCP Spanner operation for the given value.
*
* @param value the input value
* @return the GCP Spanner operation
*/
@Override
public GCPSpannerBaseOp<?> getOp(long value) {
return opFunction.apply(value);

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.adapter.gcpspanner.opdispensers;
import com.google.cloud.spanner.Mutation;
import com.google.cloud.spanner.Value;
import io.nosqlbench.adapter.gcpspanner.GCPSpannerDriverAdapter;
import io.nosqlbench.adapter.gcpspanner.ops.GCPSpannerBaseOp;
import io.nosqlbench.adapter.gcpspanner.ops.GCPSpannerInsertOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Collections;
import java.util.Map;
import java.util.function.LongFunction;
/**
* This class is responsible for dispensing GCP Spanner insert vector operations.
* It extends the GCPSpannerBaseOpDispenser and provides the necessary implementation
* to create and configure GCPSpannerInsertVectorOp instances.
*/
public class GCPSpannerInsertOpDispenser extends GCPSpannerBaseOpDispenser {
private static final Logger logger = LogManager.getLogger(GCPSpannerInsertOpDispenser.class);
private final LongFunction<Map> queryParamsFunction;
/**
* Constructs a new GCPSpannerInsertVectorOpDispenser.
*
* @param adapter the GCP Spanner driver adapter
* @param op the parsed operation
* @param targetFunction a function that provides the target table name based on a long value
*/
public GCPSpannerInsertOpDispenser(GCPSpannerDriverAdapter adapter, ParsedOp op, LongFunction<String> targetFunction) {
super(adapter, op, targetFunction);
this.queryParamsFunction = createParamsFunction(op);
}
/**
* Creates a function that provides query parameters based on a long value.
*
* @param op the parsed operation
* @return a function that provides query parameters
*/
private LongFunction<Map> createParamsFunction(ParsedOp op) {
return op.getAsOptionalFunction("query_params", Map.class)
.orElse(_ -> Collections.emptyMap());
}
/**
* Returns a GCPSpannerInsertVectorOp instance configured with the provided value.
*
* @param value the value used to configure the operation
* @return a configured GCPSpannerInsertVectorOp instance
*/
@Override
public GCPSpannerBaseOp<?> getOp(long value) {
Mutation.WriteBuilder builder = Mutation.newInsertBuilder(targetFunction.apply(value));
Map<String, Object> params = queryParamsFunction.apply(value);
for (Map.Entry<String, Object> entry : params.entrySet()) {
builder.set(entry.getKey()).to(convertToValue(entry));
}
return new GCPSpannerInsertOp(
spaceFunction.apply(value).getSpanner(),
value,
builder.build(),
spaceFunction.apply(value).getDbClient()
);
}
private Value convertToValue(Map.Entry<String, Object> entry) {
return switch(entry.getValue()) {
case String s -> Value.string(s);
case Integer i -> Value.int64(i);
case Long l -> Value.int64(l);
case Double d -> Value.float64(d);
case Float f -> Value.float32(f);
case long[] larr -> Value.int64Array(larr);
case float[] farr -> Value.float32Array(farr);
case double[] darr -> Value.float64Array(darr);
default -> throw new IllegalArgumentException("Unsupported value type: " + entry.getValue().getClass());
};
}
}

View File

@ -1,57 +0,0 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.adapter.gcpspanner.opdispensers;
import com.google.cloud.spanner.Mutation;
import io.nosqlbench.adapter.gcpspanner.GCPSpannerDriverAdapter;
import io.nosqlbench.adapter.gcpspanner.ops.GCPSpannerBaseOp;
import io.nosqlbench.adapter.gcpspanner.ops.GCPSpannerInsertVectorOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.function.LongFunction;
public class GCPSpannerInsertVectorOpDispenser extends GCPSpannerBaseOpDispenser {
private static final Logger logger = LogManager.getLogger(GCPSpannerInsertVectorOpDispenser.class);
private final LongFunction<GCPSpannerInsertVectorOp> opFunction;
public GCPSpannerInsertVectorOpDispenser(GCPSpannerDriverAdapter adapter, ParsedOp op, LongFunction<String> targetFunction) {
super(adapter, op, targetFunction);
this.opFunction = createOpFunction(op);
}
private LongFunction<GCPSpannerInsertVectorOp> createOpFunction(ParsedOp op) {
LongFunction<float[]> vectorF= op.getAsRequiredFunction("vector", float[].class);
return (l) -> new GCPSpannerInsertVectorOp(
spaceFunction.apply(l).getSpanner(),
l,
Mutation.newInsertBuilder(op.getStaticValue("table", java.lang.String.class))
.set(op.getStaticValue("pkey", java.lang.String.class)).to(l)
.set("VectorData").toFloat32Array(vectorF.apply(l))
.build(),
spaceFunction.apply(l).getDbClient()
);
}
@Override
public GCPSpannerBaseOp<?> getOp(long value) {
return opFunction.apply(value);
}
}

View File

@ -26,26 +26,47 @@ import org.apache.logging.log4j.Logger;
import java.util.function.LongFunction;
/**
* Dispenser class for creating instances of GCPSpannerUpdateDatabaseDdlOp.
*/
public class GCPSpannerUpdateDatabaseDdlOpDispenser extends GCPSpannerBaseOpDispenser {
private static final Logger logger = LogManager.getLogger(GCPSpannerUpdateDatabaseDdlOpDispenser.class);
private final LongFunction<GCPSpannerUpdateDatabaseDdlOp> opFunction;
/**
* Constructor for GCPSpannerUpdateDatabaseDdlOpDispenser.
*
* @param adapter the GCPSpannerDriverAdapter instance
* @param op the ParsedOp instance
* @param targetFunction a LongFunction that provides the target string
*/
public GCPSpannerUpdateDatabaseDdlOpDispenser(GCPSpannerDriverAdapter adapter, ParsedOp op, LongFunction<String> targetFunction) {
super(adapter, op, targetFunction);
this.opFunction = createOpFunction(op);
}
/**
* Creates a LongFunction that generates GCPSpannerUpdateDatabaseDdlOp instances.
*
* @param op the ParsedOp instance
* @return a LongFunction that generates GCPSpannerUpdateDatabaseDdlOp instances
*/
private LongFunction<GCPSpannerUpdateDatabaseDdlOp> createOpFunction(ParsedOp op) {
return (l) -> new GCPSpannerUpdateDatabaseDdlOp(
spaceFunction.apply(l).getSpanner(),
l,
op.getAsRequiredFunction("DDL", String.class).apply(l),
targetFunction.apply(l),
spaceFunction.apply(l).getDbAdminClient(),
spaceFunction.apply(l).getDbAdminClient().getDatabase(spaceFunction.apply(l).getInstanceId(), spaceFunction.apply(l).getDatabaseIdString())
);
}
/**
* Retrieves an operation instance based on the provided value.
*
* @param value the long value used to generate the operation
* @return a GCPSpannerBaseOp instance
*/
@Override
public GCPSpannerBaseOp<?> getOp(long value) {
return opFunction.apply(value);

View File

@ -23,6 +23,12 @@ import org.apache.logging.log4j.Logger;
import java.util.function.LongFunction;
/**
* Abstract base class for GCP Spanner operations.
* This class implements the CycleOp interface and provides a template for executing operations with a Spanner client.
*
* @param <T> the type of the request parameter
*/
public abstract class GCPSpannerBaseOp<T> implements CycleOp<Object> {
protected final static Logger logger = LogManager.getLogger(GCPSpannerBaseOp.class);
@ -31,27 +37,50 @@ public abstract class GCPSpannerBaseOp<T> implements CycleOp<Object> {
protected final T request;
protected final LongFunction<Object> apiCall;
public GCPSpannerBaseOp(Spanner searchIndexClient, T requestParam) {
this.spannerClient = searchIndexClient;
/**
* Constructs a new GCPSpannerBaseOp with the specified Spanner client and request parameter.
*
* @param spannerClient the Spanner client to use for operations
* @param requestParam the request parameter for the operation
*/
public GCPSpannerBaseOp(Spanner spannerClient, T requestParam) {
this.spannerClient = spannerClient;
this.request = requestParam;
this.apiCall = this::applyOp;
}
/**
* Applies the operation for the given cycle value.
* This method logs the operation and handles any exceptions by throwing a RuntimeException.
*
* @param value the cycle value
* @return the result of the operation
*/
@Override
public final Object apply(long value) {
logger.trace(() -> "applying op: " + this);
try {
Object result = applyOp(value);
return result;
} catch (Exception rte) {
throw new RuntimeException(rte);
}
}
/**
* Abstract method to be implemented by subclasses to define the specific operation logic.
*
* @param value the cycle value
* @return the result of the operation
*/
public abstract Object applyOp(long value);
/**
* Returns a string representation of the GCPSpannerBaseOp.
*
* @return a string representation of the GCPSpannerBaseOp
*/
@Override
public String toString() {
return "GCPSpannerBaseOp(" + this.request.getClass().getSimpleName() + ")";

View File

@ -19,10 +19,22 @@ package io.nosqlbench.adapter.gcpspanner.ops;
import com.google.cloud.spanner.*;
/**
* This class represents an operation to execute a DML statement on Google Cloud Spanner.
* It extends the GCPSpannerBaseOp class and overrides the applyOp method to execute the DML statement.
*/
public class GCPSpannerExecuteDmlOp extends GCPSpannerBaseOp<Long> {
private final Statement statement;
private final DatabaseClient dbClient;
/**
* Constructs a new GCPSpannerExecuteDmlOp.
*
* @param spanner the Spanner instance
* @param requestParam the request parameter
* @param statement the DML statement to execute
* @param dbClient the DatabaseClient to use for executing the statement
*/
public GCPSpannerExecuteDmlOp(Spanner spanner, Long requestParam, Statement statement,
DatabaseClient dbClient) {
super(spanner, requestParam);
@ -30,6 +42,12 @@ public class GCPSpannerExecuteDmlOp extends GCPSpannerBaseOp<Long> {
this.dbClient = dbClient;
}
/**
* Executes the DML statement using the provided value.
*
* @param value the value to use for the operation
* @return the result of the DML execution
*/
@Override
public Object applyOp(long value) {
try (ReadContext context = dbClient.singleUse()) {

View File

@ -23,16 +23,34 @@ import com.google.cloud.spanner.Mutation;
import java.util.Collections;
public class GCPSpannerInsertVectorOp extends GCPSpannerBaseOp<Long> {
/**
* This class represents an operation to insert a vector into a Google Cloud Spanner database.
* It extends the GCPSpannerBaseOp class and provides the implementation for the applyOp method.
*/
public class GCPSpannerInsertOp extends GCPSpannerBaseOp<Long> {
private final Mutation mutation;
private final DatabaseClient dbClient;
public GCPSpannerInsertVectorOp(Spanner searchIndexClient, Long requestParam, Mutation mutation, DatabaseClient dbClient) {
/**
* Constructs a new GCPSpannerInsertVectorOp.
*
* @param searchIndexClient the Spanner client used to interact with the database
* @param requestParam the request parameter
* @param mutation the Mutation object representing the data to be inserted
* @param dbClient the DatabaseClient used to execute the mutation
*/
public GCPSpannerInsertOp(Spanner searchIndexClient, Long requestParam, Mutation mutation, DatabaseClient dbClient) {
super(searchIndexClient, requestParam);
this.mutation = mutation;
this.dbClient = dbClient;
}
/**
* Applies the insert operation using the provided mutation.
*
* @param value the value to be used in the operation
* @return the result of the write operation
*/
@Override
public Object applyOp(long value) {
return dbClient.write(Collections.singletonList(mutation));

View File

@ -22,11 +22,24 @@ import com.google.cloud.spanner.*;
import com.google.common.collect.ImmutableList;
import com.google.spanner.admin.database.v1.UpdateDatabaseDdlMetadata;
/**
* This class represents an operation to update the database DDL (Data Definition Language) in Google Cloud Spanner.
* It extends the GCPSpannerBaseOp class and provides the implementation for applying the DDL update operation.
*/
public class GCPSpannerUpdateDatabaseDdlOp extends GCPSpannerBaseOp<Long> {
private final String createTableStatement;
private final DatabaseAdminClient dbAdminClient;
private final Database db;
/**
* Constructs a new GCPSpannerUpdateDatabaseDdlOp.
*
* @param searchIndexClient the Spanner client
* @param requestParam the request parameter
* @param createTableStatement the SQL statement to create the table
* @param dbAdminClient the DatabaseAdminClient to execute the DDL update
* @param db the Database object representing the target database
*/
public GCPSpannerUpdateDatabaseDdlOp(Spanner searchIndexClient, Long requestParam, String createTableStatement,
DatabaseAdminClient dbAdminClient, Database db) {
super(searchIndexClient, requestParam);
@ -35,6 +48,13 @@ public class GCPSpannerUpdateDatabaseDdlOp extends GCPSpannerBaseOp<Long> {
this.db = db;
}
/**
* Applies the DDL update operation.
*
* @param value the value to be used in the operation
* @return the result of the operation
* @throws RuntimeException if an error occurs during the operation
*/
@Override
public Object applyOp(long value) {
OperationFuture<Void, UpdateDatabaseDdlMetadata> operation = dbAdminClient.updateDatabaseDdl(

View File

@ -31,6 +31,6 @@ package io.nosqlbench.adapter.gcpspanner.types;
*/
public enum GCPSpannerOpType {
update_database_ddl,
insert_vector,
insert,
execute_dml,
}

View File

@ -0,0 +1,13 @@
scenarios:
default:
execute_ddl: run driver=gcp_spanner tags==blocks:execute_ddl service_account_file=TEMPLATE(service_account_file)
project_id=TEMPLATE(project_id) instance_id=TEMPLATE(instance_id) database_id=TEMPLATE(database_id) cycles=1
# https://cloud.google.com/spanner/docs/reference/standard-sql/data-definition-language#vector_index_option_list
blocks:
execute_ddl:
ops:
op1:
update_database_ddl: |
CREATE VECTOR INDEX VectorsIndex ON vectors(value)
OPTIONS (distance_type = 'COSINE', tree_depth = 3, num_branches=1000, num_leaves = 1000000);

View File

@ -0,0 +1,18 @@
scenarios:
default:
execute_dml: run driver=gcp_spanner tags==blocks:execute_dml service_account_file=TEMPLATE(service_account_file)
project_id=TEMPLATE(project_id) instance_id=TEMPLATE(instance_id) database_id=TEMPLATE(database_id) cycles=TEMPLATE(cycles)
bindings:
rw_key: ToString();
test_vector_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/test"); ToCqlVector()
validation_set_hdf5: HdfFileToIntArray("testdata/TEMPLATE(dataset).hdf5", "/neighbors")
blocks:
execute_dml:
ops:
op1:
execute_dml: |
SELECT * FROM vectors@{FORCE_INDEX=VectorsIndex} ORDER BY APPROX_COSINE_DISTANCE(ARRAY<FLOAT32>{test_vector_hdf5},
value, options => JSON '{"num_leaves_to_search": 10}') LIMIT 100

View File

@ -0,0 +1,11 @@
scenarios:
default:
execute_ddl: run driver=gcp_spanner tags==blocks:execute_ddl service_account_file=TEMPLATE(service_account_file)
project_id=TEMPLATE(project_id) instance_id=TEMPLATE(instance_id) database_id=TEMPLATE(database_id) cycles=1
blocks:
execute_ddl:
ops:
op1:
update_database_ddl: |
CREATE TABLE vectors (keycol STRING(100),value ARRAY<FLOAT32>(vector_length=>25) NOT NULL) PRIMARY KEY(keycol)

View File

@ -0,0 +1,18 @@
scenarios:
default:
insert_vector: >-
run driver=gcp_spanner tags==blocks:insert_vector service_account_file=TEMPLATE(service_account_file)
project_id=TEMPLATE(project_id) instance_id=TEMPLATE(instance_id) database_id=TEMPLATE(database_id) cycles=TEMPLATE(cycles)
bindings:
rw_key: ToString();
train_floatlist: HdfFileToFloatArray("glove-25-angular.hdf5", "/train");
blocks:
insert_vector:
ops:
op1:
insert_vector: "vectors"
query_params:
keycol: "{rw_key}"
value: "{train_floatlist}"

View File

@ -1,67 +1,30 @@
# Google Spanner driver adapter
The Azure AI Search driver adapter is a NoSQLBench adapter for the `azure-aisearch` driver, a Java driver
for connecting to and performing operations on an instance of a Azure AI Search vector database. The driver is
leveraged from GitHub at https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/search/azure-search-documents/.
## Run Commands (Remove prior to merge)
### Create Collection Schema
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.rampup dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Delete Collection
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.delete_index dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 similarity_function=cosine azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azure_aisearch,instance:vectors,vendor:azure_aisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### List Indexes
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.list_indexes dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Upload Documents
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.upload_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Search Documents
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.search_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
The Google Cloud Spanner driver adapter is a NoSQLBench adapter for the `gcp_spanner` driver, a Java driver
for connecting to and performing operations on an instance of a Google Cloud Spanner database.
## Activity Parameters
The following parameters must be supplied to the adapter at runtime in order to successfully connect to an
instance of the [Azure AI Search database](https://learn.microsoft.com/en-us/rest/api/searchservice/?view=rest-searchservice-2024-07-01):
instance of the [Google Cloud Spanner database](https://cloud.google.com/java/docs/reference/google-cloud-spanner/latest/overview):
* `token` - In order to use the Weaviate database you must have an account. Once the account is created you can [request
an api key/token](https://weaviate.io/developers/wcs/quickstart#explore-the-details-panel). This key will need to be
provided any time a database connection is desired. Alternatively, the api key can be stored in a file securely and
referenced via the `token_file` config option pointing to the path of the file.
* `endpoint` - When a collection/index is created in the database the URI (aka endpoint) must be specified as well. The adapter will
use the default value of `localhost:8080` if none is provided at runtime.
* `api_version` - the api version to be used by the search client. Defaults to the latest service/api version supported
by the version of client SDK.
* `service_account_file` - In order to connect to a Spanner database you must have a [IAM service account](https://cloud.google.com/docs/authentication/provide-credentials-adc#service-account)
defined with the appropriate permissions associated with the adapter. Once the service account is created you can download
a file from the gcp console in JSON format that contains the credentials for the service account. This file must be provided
to the adapter at runtime.
* `project_id` - Project ID containing the Spanner database. See [Creating a project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).
* `instance_id` - Spanner database's Instance ID. See [Creating an instance](https://cloud.google.com/spanner/docs/getting-started/java#create_an_instance).
* `database_id` - Spanner database's Database ID. See [Creating a database](https://cloud.google.com/spanner/docs/getting-started/java#create_a_database).
* In addition to this the environment variable `GOOGLE_APPLICATION_CREDENTIALS` must be set to the path of the service account file.
## Op Templates
The Azure AI Search adapter supports [**all basic operations**](../java/io/nosqlbench/adapter/azure-aisearch/ops) supported by the [Java
client SDK published by Azure AI Search](https://github.com/weaviate/java-client). The official Azure AI Search API reference can be
found at https://learn.microsoft.com/en-us/rest/api/searchservice/operation-groups?view=rest-searchservice-2024-07-01.
The operations include a full-fledged support for key APIs available in the Java SDK client.
The following are a couple high level API operations.
* Create or Update Index
* Delete Index
* List Indexes
* Upload Documents (vectors)
* (Vector) Search Documents (vectors)
The Google Cloud Spanner adapter supports the following operations:
* `update_database_ddl` - Data Definition Language operations such as creating and dropping tables, indexes, etc.
* `execute_dml` - Data Manipulation Language operations. Read only operations are supported at this time, including queries
and vector queries.
* `insert` - Insert a single record, vector or non-vector, of data into the database.
## Examples
Check out the [full example workload available here](./activities/azure_aisearch_vectors_live.yaml).
---

View File

@ -33,18 +33,6 @@
into a PostegreSQL® compatible database leveraging HikariCP.
</description>
<!-- <dependencyManagement>-->
<!-- <dependencies>-->
<!-- <dependency>-->
<!-- <groupId>com.google.cloud</groupId>-->
<!-- <artifactId>libraries-bom</artifactId>-->
<!-- <version>26.45.0</version>-->
<!-- <type>pom</type>-->
<!-- <scope>import</scope>-->
<!-- </dependency>-->
<!-- </dependencies>-->
<!-- </dependencyManagement>-->
<dependencies>
<!-- core dependencies -->
<dependency>
@ -73,38 +61,6 @@
<version>5.0.1</version>
</dependency>
<!-- &lt;!&ndash; https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.apache.commons</groupId>-->
<!-- <artifactId>commons-collections4</artifactId>-->
<!-- <version>4.4</version>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.google.cloud</groupId>-->
<!-- <artifactId>google-cloud-spanner-jdbc</artifactId>-->
<!-- <exclusions>-->
<!-- <exclusion>-->
<!-- <groupId>com.google.api.grpc</groupId>-->
<!-- <artifactId>proto-google-cloud-spanner-executor-v1</artifactId>-->
<!-- </exclusion>-->
<!-- </exclusions>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.google.cloud/google-cloud-spanner-jdbc -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-spanner-jdbc</artifactId>
<version>2.22.0</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-spanner</artifactId>
<version>6.71.0</version>
</dependency>
</dependencies>
<build>

View File

@ -57,14 +57,13 @@ public abstract class JDBCOp implements CycleOp {
else {
String url = jdbcSpace.getConnConfig().getJdbcUrl();
Properties props = jdbcSpace.getConnConfig().getDataSourceProperties();
if (jdbcSpace.getConnConfig().getUsername() != null) props.put("user", jdbcSpace.getConnConfig().getUsername());
if (jdbcSpace.getConnConfig().getPassword() != null) props.put("password", jdbcSpace.getConnConfig().getPassword());
props.put("user", jdbcSpace.getConnConfig().getUsername());
props.put("password", jdbcSpace.getConnConfig().getPassword());
connection = DriverManager.getConnection(url, props);
}
if (connection.getMetaData().getDatabaseProductName().equals("PostgreSQL")) {
JDBCPgVector.addVectorType(connection);
}
// Register 'vector' type
JDBCPgVector.addVectorType(connection);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("A new JDBC connection ({}) is successfully created: {}",

View File

@ -1,98 +0,0 @@
# run driver=jdbc workload="/path/to/postgresql-keyvalue.yaml" tags="block:schema" threads=AUTO cycles=4 url="jdbc:postgresql://host:port/database" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName=insectdb sslrootcert="/path/to/postgresql_certs/root.crt" -vv --show-stacktraces
min_version: "5.17.2"
scenarios:
default:
# drop: run driver=jdbc tags==block:drop threads===1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt"
# schema: run driver=jdbc tags==block:schema threads===1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt"
train: run driver=jdbc tags==block:train threads=1 cycles===100 url="jdbc:cloudspanner:/projects/gcp-lcm-project/instances/stratperf-sep-24/databases/baselines" dml_batch=1 autoCommit=false databaseName="baselines"
# testann: run driver=jdbc tags==block:testann threads=AUTO cycles===TEMPLATE(main-cycles,1000) url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt"
# "jdbc:cloudspanner:/projects/gcp-lcm-project/instances/stratperf-sep-24/databases/baselines?credentials=/home/cloudspanner-keys/my-key.json;autocommit=false";
bindings:
rw_key: ToString();
train_floatlist: HdfFileToFloatList("/home/mwolters138/datasets/glove-25-angular.hdf5", "/train"); ToCqlVector();
test_floatlist: HdfFileToFloatList("/home/mwolters138/datasets/glove-25-angular.hdf5", "/test"); ToCqlVector();
relevant_indices: HdfFileToIntArray("/home/mwolters138/datasets/glove-25-angular.hdf5", "/neighbors")
blocks:
# drop:
# ops:
# drop_vector_index:
# ddl: |
# DROP INDEX IF EXISTS idx_TEMPLATE(tablename,baseline)_TEMPLATE(indextype)_TEMPLATE(similarity_function);
# drop_table:
# ddl: |
# DROP TABLE IF EXISTS TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline);
# ##
# # NOTE: Do NOT enable this block for 'runall.sh' script
# # --------------------------------------------------
# # drop_schema:
# # ddl: |
# # DROP SCHEMA IF EXISTS TEMPLATE(schemaname,public);
#
# schema:
# ops:
# create_schema:
# ddl: |
# CREATE SCHEMA IF NOT EXISTS TEMPLATE(schemaname,public);
# create_table:
# ddl: |
# CREATE TABLE IF NOT EXISTS TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline)
# (key TEXT PRIMARY KEY, value vector(TEMPLATE(dimensions,5)));
# create_vector_index:
# ddl: |
# CREATE INDEX IF NOT EXISTS idx_TEMPLATE(tablename,baseline)_TEMPLATE(indextype)_TEMPLATE(similarity_function)
# ON TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline)
# USING TEMPLATE(indextype) (value vector_TEMPLATE(similarity_function)_ops)
# WITH (TEMPLATE(indexopt));
train:
params:
prepared: true
ops:
main_insert:
dmlwrite: |
INSERT INTO TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline) VALUES (?,?)
ON CONFLICT DO NOTHING;
prep_stmt_val_arr: |
{rw_key},{train_floatlist}
# testann:
# params:
# prepared: true
# ops:
# # NOTE: right now this is only for cosine similarity.
# # in baselinetor, '<=>' is for cosine similarity
# # '<->' is for euclidean distance
# # '<#>' is for inner product
# main_select:
# dmlread: |
# SELECT *
# FROM TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline)
# ORDER BY value <=> ?
# LIMIT TEMPLATE(top_k,100);
# prep_stmt_val_arr: |
# {test_floatlist}
# #################################
# ## NOTE:
# # 1). The script blocks below are ONLY relevant with Vector relevancy score verification
# # 2). The "verifier-key" must match the Vector data identifier column name (e.g. primary key name)
# # right now the identifier must be a type that can be converted to int.
# verifier-key: "key"
# verifier-init: |
# relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op);
# k=TEMPLATE(top_k,100)
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
# verifier: |
# // driver-specific function
# actual_indices=pgvec_utils.getValueListForVerifierKey(result);
# // driver-agnostic function
# relevancy.accept({relevant_indices},actual_indices);
# // because we are "verifying" although this needs to be reorganized
# return true;

View File

@ -74,7 +74,7 @@
<profile>
<id>adapter-cqld4-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -102,7 +102,7 @@
<profile>
<id>adapter-http-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -130,7 +130,7 @@
<profile>
<id>adapter-tcp-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -144,7 +144,7 @@
<profile>
<id>adapter-dataapi-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -158,7 +158,7 @@
<profile>
<id>adapter-dynamodb-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -172,7 +172,7 @@
<profile>
<id>adapter-mongodb-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -186,7 +186,7 @@
<profile>
<id>adapter-pulsar-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -200,7 +200,7 @@
<profile>
<id>adapter-s4j-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -214,7 +214,7 @@
<profile>
<id>adapter-neo4j-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -228,7 +228,7 @@
<profile>
<id>adapter-kafka-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -242,7 +242,7 @@
<profile>
<id>adapter-amqp-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -256,7 +256,7 @@
<profile>
<id>adapter-qdrant-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -270,7 +270,7 @@
<profile>
<id>adapter-weaviate-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -284,7 +284,7 @@
<profile>
<id>adapter-azure-aisearch-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>

View File

@ -57,7 +57,7 @@
<profile>
<id>adapter-cqld4-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-cqld4</module>
@ -77,7 +77,7 @@
<profile>
<id>adapter-http-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-http</module>
@ -97,7 +97,7 @@
<profile>
<id>adapter-tcp-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-tcp</module>
@ -107,7 +107,7 @@
<profile>
<id>adapter-dynamodb-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-dynamodb</module>
@ -117,7 +117,7 @@
<profile>
<id>adapter-mongodb-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-mongodb</module>
@ -127,7 +127,7 @@
<profile>
<id>adapter-neo4j-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-neo4j</module>
@ -137,7 +137,7 @@
<profile>
<id>adapter-pulsar-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-pulsar</module>
@ -147,7 +147,7 @@
<profile>
<id>adapter-s4j-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-s4j</module>
@ -157,7 +157,7 @@
<profile>
<id>adapter-kafka-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-kafka</module>
@ -167,7 +167,7 @@
<profile>
<id>adapter-amqp-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-amqp</module>
@ -177,7 +177,7 @@
<profile>
<id>adapter-dataapi-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-dataapi</module>
@ -187,7 +187,7 @@
<profile>
<id>adapter-qdrant-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-qdrant</module>
@ -197,7 +197,7 @@
<profile>
<id>adapter-weaviate-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-weaviate</module>
@ -207,7 +207,7 @@
<profile>
<id>adapter-azure-aisearch-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-azure-aisearch</module>