From 0cb84c27c394ba328a0945c4d014f2b0c53fb345 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Fri, 20 Sep 2024 13:44:52 -0400 Subject: [PATCH] changes to allow for spanner --- nb-adapters/adapter-jdbc/pom.xml | 37 +++++++ .../adapter/jdbc/optypes/JDBCOp.java | 9 +- .../activities.baselinesv2/spanner.yaml | 98 +++++++++++++++++++ 3 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 nb-adapters/adapter-jdbc/src/main/resources/activities.baselinesv2/spanner.yaml diff --git a/nb-adapters/adapter-jdbc/pom.xml b/nb-adapters/adapter-jdbc/pom.xml index 987742f5e..23d3e9d28 100644 --- a/nb-adapters/adapter-jdbc/pom.xml +++ b/nb-adapters/adapter-jdbc/pom.xml @@ -33,6 +33,18 @@ into a PostegreSQL® compatible database leveraging HikariCP. + + + + + + + + + + + + @@ -68,6 +80,31 @@ + + + + + + + + + + + + + + com.google.cloud + google-cloud-spanner-jdbc + 2.22.0 + + + + com.google.cloud + google-cloud-spanner + 6.71.0 + + + diff --git a/nb-adapters/adapter-jdbc/src/main/java/io/nosqlbench/adapter/jdbc/optypes/JDBCOp.java b/nb-adapters/adapter-jdbc/src/main/java/io/nosqlbench/adapter/jdbc/optypes/JDBCOp.java index 11d06fe9c..fa9b6eb47 100644 --- a/nb-adapters/adapter-jdbc/src/main/java/io/nosqlbench/adapter/jdbc/optypes/JDBCOp.java +++ b/nb-adapters/adapter-jdbc/src/main/java/io/nosqlbench/adapter/jdbc/optypes/JDBCOp.java @@ -57,13 +57,14 @@ public abstract class JDBCOp implements CycleOp { else { String url = jdbcSpace.getConnConfig().getJdbcUrl(); Properties props = jdbcSpace.getConnConfig().getDataSourceProperties(); - props.put("user", jdbcSpace.getConnConfig().getUsername()); - props.put("password", jdbcSpace.getConnConfig().getPassword()); + if (jdbcSpace.getConnConfig().getUsername() != null) props.put("user", jdbcSpace.getConnConfig().getUsername()); + if (jdbcSpace.getConnConfig().getPassword() != null) props.put("password", jdbcSpace.getConnConfig().getPassword()); connection = DriverManager.getConnection(url, props); } - // Register 'vector' type - JDBCPgVector.addVectorType(connection); + if (connection.getMetaData().getDatabaseProductName().equals("PostgreSQL")) { + JDBCPgVector.addVectorType(connection); + } if (LOGGER.isDebugEnabled()) { LOGGER.debug("A new JDBC connection ({}) is successfully created: {}", diff --git a/nb-adapters/adapter-jdbc/src/main/resources/activities.baselinesv2/spanner.yaml b/nb-adapters/adapter-jdbc/src/main/resources/activities.baselinesv2/spanner.yaml new file mode 100644 index 000000000..9e44873d4 --- /dev/null +++ b/nb-adapters/adapter-jdbc/src/main/resources/activities.baselinesv2/spanner.yaml @@ -0,0 +1,98 @@ +# run driver=jdbc workload="/path/to/postgresql-keyvalue.yaml" tags="block:schema" threads=AUTO cycles=4 url="jdbc:postgresql://host:port/database" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName=insectdb sslrootcert="/path/to/postgresql_certs/root.crt" -vv --show-stacktraces +min_version: "5.17.2" + +scenarios: + default: +# drop: run driver=jdbc tags==block:drop threads===1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" +# schema: run driver=jdbc tags==block:schema threads===1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" + train: run driver=jdbc tags==block:train threads=1 cycles===100 url="jdbc:cloudspanner:/projects/gcp-lcm-project/instances/stratperf-sep-24/databases/baselines" dml_batch=1 autoCommit=false databaseName="baselines" +# testann: run driver=jdbc tags==block:testann threads=AUTO cycles===TEMPLATE(main-cycles,1000) url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" + +# "jdbc:cloudspanner:/projects/gcp-lcm-project/instances/stratperf-sep-24/databases/baselines?credentials=/home/cloudspanner-keys/my-key.json;autocommit=false"; + +bindings: + rw_key: ToString(); + train_floatlist: HdfFileToFloatList("/home/mwolters138/datasets/glove-25-angular.hdf5", "/train"); ToCqlVector(); + test_floatlist: HdfFileToFloatList("/home/mwolters138/datasets/glove-25-angular.hdf5", "/test"); ToCqlVector(); + relevant_indices: HdfFileToIntArray("/home/mwolters138/datasets/glove-25-angular.hdf5", "/neighbors") + +blocks: +# drop: +# ops: +# drop_vector_index: +# ddl: | +# DROP INDEX IF EXISTS idx_TEMPLATE(tablename,baseline)_TEMPLATE(indextype)_TEMPLATE(similarity_function); +# drop_table: +# ddl: | +# DROP TABLE IF EXISTS TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline); +# ## +# # NOTE: Do NOT enable this block for 'runall.sh' script +# # -------------------------------------------------- +# # drop_schema: +# # ddl: | +# # DROP SCHEMA IF EXISTS TEMPLATE(schemaname,public); +# +# schema: +# ops: +# create_schema: +# ddl: | +# CREATE SCHEMA IF NOT EXISTS TEMPLATE(schemaname,public); +# create_table: +# ddl: | +# CREATE TABLE IF NOT EXISTS TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline) +# (key TEXT PRIMARY KEY, value vector(TEMPLATE(dimensions,5))); +# create_vector_index: +# ddl: | +# CREATE INDEX IF NOT EXISTS idx_TEMPLATE(tablename,baseline)_TEMPLATE(indextype)_TEMPLATE(similarity_function) +# ON TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline) +# USING TEMPLATE(indextype) (value vector_TEMPLATE(similarity_function)_ops) +# WITH (TEMPLATE(indexopt)); + + train: + params: + prepared: true + ops: + main_insert: + dmlwrite: | + INSERT INTO TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline) VALUES (?,?) + ON CONFLICT DO NOTHING; + prep_stmt_val_arr: | + {rw_key},{train_floatlist} + +# testann: +# params: +# prepared: true +# ops: +# # NOTE: right now this is only for cosine similarity. +# # in baselinetor, '<=>' is for cosine similarity +# # '<->' is for euclidean distance +# # '<#>' is for inner product +# main_select: +# dmlread: | +# SELECT * +# FROM TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline) +# ORDER BY value <=> ? +# LIMIT TEMPLATE(top_k,100); +# prep_stmt_val_arr: | +# {test_floatlist} +# ################################# +# ## NOTE: +# # 1). The script blocks below are ONLY relevant with Vector relevancy score verification +# # 2). The "verifier-key" must match the Vector data identifier column name (e.g. primary key name) +# # right now the identifier must be a type that can be converted to int. +# verifier-key: "key" +# verifier-init: | +# relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op); +# k=TEMPLATE(top_k,100) +# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k)); +# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k)); +# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k)); +# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k)); +# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k)); +# verifier: | +# // driver-specific function +# actual_indices=pgvec_utils.getValueListForVerifierKey(result); +# // driver-agnostic function +# relevancy.accept({relevant_indices},actual_indices); +# // because we are "verifying" although this needs to be reorganized +# return true;