changes to allow for spanner

This commit is contained in:
Mark Wolters 2024-09-20 13:44:52 -04:00
parent 38970a6f23
commit 0cb84c27c3
3 changed files with 140 additions and 4 deletions

View File

@ -33,6 +33,18 @@
into a PostegreSQL® compatible database leveraging HikariCP. into a PostegreSQL® compatible database leveraging HikariCP.
</description> </description>
<!-- <dependencyManagement>-->
<!-- <dependencies>-->
<!-- <dependency>-->
<!-- <groupId>com.google.cloud</groupId>-->
<!-- <artifactId>libraries-bom</artifactId>-->
<!-- <version>26.45.0</version>-->
<!-- <type>pom</type>-->
<!-- <scope>import</scope>-->
<!-- </dependency>-->
<!-- </dependencies>-->
<!-- </dependencyManagement>-->
<dependencies> <dependencies>
<!-- core dependencies --> <!-- core dependencies -->
<dependency> <dependency>
@ -68,6 +80,31 @@
<!-- <version>4.4</version>--> <!-- <version>4.4</version>-->
<!-- </dependency>--> <!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.google.cloud</groupId>-->
<!-- <artifactId>google-cloud-spanner-jdbc</artifactId>-->
<!-- <exclusions>-->
<!-- <exclusion>-->
<!-- <groupId>com.google.api.grpc</groupId>-->
<!-- <artifactId>proto-google-cloud-spanner-executor-v1</artifactId>-->
<!-- </exclusion>-->
<!-- </exclusions>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.google.cloud/google-cloud-spanner-jdbc -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-spanner-jdbc</artifactId>
<version>2.22.0</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-spanner</artifactId>
<version>6.71.0</version>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -57,13 +57,14 @@ public abstract class JDBCOp implements CycleOp {
else { else {
String url = jdbcSpace.getConnConfig().getJdbcUrl(); String url = jdbcSpace.getConnConfig().getJdbcUrl();
Properties props = jdbcSpace.getConnConfig().getDataSourceProperties(); Properties props = jdbcSpace.getConnConfig().getDataSourceProperties();
props.put("user", jdbcSpace.getConnConfig().getUsername()); if (jdbcSpace.getConnConfig().getUsername() != null) props.put("user", jdbcSpace.getConnConfig().getUsername());
props.put("password", jdbcSpace.getConnConfig().getPassword()); if (jdbcSpace.getConnConfig().getPassword() != null) props.put("password", jdbcSpace.getConnConfig().getPassword());
connection = DriverManager.getConnection(url, props); connection = DriverManager.getConnection(url, props);
} }
// Register 'vector' type if (connection.getMetaData().getDatabaseProductName().equals("PostgreSQL")) {
JDBCPgVector.addVectorType(connection); JDBCPgVector.addVectorType(connection);
}
if (LOGGER.isDebugEnabled()) { if (LOGGER.isDebugEnabled()) {
LOGGER.debug("A new JDBC connection ({}) is successfully created: {}", LOGGER.debug("A new JDBC connection ({}) is successfully created: {}",

View File

@ -0,0 +1,98 @@
# run driver=jdbc workload="/path/to/postgresql-keyvalue.yaml" tags="block:schema" threads=AUTO cycles=4 url="jdbc:postgresql://host:port/database" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName=insectdb sslrootcert="/path/to/postgresql_certs/root.crt" -vv --show-stacktraces
min_version: "5.17.2"
scenarios:
default:
# drop: run driver=jdbc tags==block:drop threads===1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt"
# schema: run driver=jdbc tags==block:schema threads===1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt"
train: run driver=jdbc tags==block:train threads=1 cycles===100 url="jdbc:cloudspanner:/projects/gcp-lcm-project/instances/stratperf-sep-24/databases/baselines" dml_batch=1 autoCommit=false databaseName="baselines"
# testann: run driver=jdbc tags==block:testann threads=AUTO cycles===TEMPLATE(main-cycles,1000) url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt"
# "jdbc:cloudspanner:/projects/gcp-lcm-project/instances/stratperf-sep-24/databases/baselines?credentials=/home/cloudspanner-keys/my-key.json;autocommit=false";
bindings:
rw_key: ToString();
train_floatlist: HdfFileToFloatList("/home/mwolters138/datasets/glove-25-angular.hdf5", "/train"); ToCqlVector();
test_floatlist: HdfFileToFloatList("/home/mwolters138/datasets/glove-25-angular.hdf5", "/test"); ToCqlVector();
relevant_indices: HdfFileToIntArray("/home/mwolters138/datasets/glove-25-angular.hdf5", "/neighbors")
blocks:
# drop:
# ops:
# drop_vector_index:
# ddl: |
# DROP INDEX IF EXISTS idx_TEMPLATE(tablename,baseline)_TEMPLATE(indextype)_TEMPLATE(similarity_function);
# drop_table:
# ddl: |
# DROP TABLE IF EXISTS TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline);
# ##
# # NOTE: Do NOT enable this block for 'runall.sh' script
# # --------------------------------------------------
# # drop_schema:
# # ddl: |
# # DROP SCHEMA IF EXISTS TEMPLATE(schemaname,public);
#
# schema:
# ops:
# create_schema:
# ddl: |
# CREATE SCHEMA IF NOT EXISTS TEMPLATE(schemaname,public);
# create_table:
# ddl: |
# CREATE TABLE IF NOT EXISTS TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline)
# (key TEXT PRIMARY KEY, value vector(TEMPLATE(dimensions,5)));
# create_vector_index:
# ddl: |
# CREATE INDEX IF NOT EXISTS idx_TEMPLATE(tablename,baseline)_TEMPLATE(indextype)_TEMPLATE(similarity_function)
# ON TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline)
# USING TEMPLATE(indextype) (value vector_TEMPLATE(similarity_function)_ops)
# WITH (TEMPLATE(indexopt));
train:
params:
prepared: true
ops:
main_insert:
dmlwrite: |
INSERT INTO TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline) VALUES (?,?)
ON CONFLICT DO NOTHING;
prep_stmt_val_arr: |
{rw_key},{train_floatlist}
# testann:
# params:
# prepared: true
# ops:
# # NOTE: right now this is only for cosine similarity.
# # in baselinetor, '<=>' is for cosine similarity
# # '<->' is for euclidean distance
# # '<#>' is for inner product
# main_select:
# dmlread: |
# SELECT *
# FROM TEMPLATE(schemaname,public).TEMPLATE(tablename,baseline)
# ORDER BY value <=> ?
# LIMIT TEMPLATE(top_k,100);
# prep_stmt_val_arr: |
# {test_floatlist}
# #################################
# ## NOTE:
# # 1). The script blocks below are ONLY relevant with Vector relevancy score verification
# # 2). The "verifier-key" must match the Vector data identifier column name (e.g. primary key name)
# # right now the identifier must be a type that can be converted to int.
# verifier-key: "key"
# verifier-init: |
# relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op);
# k=TEMPLATE(top_k,100)
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
# relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
# verifier: |
# // driver-specific function
# actual_indices=pgvec_utils.getValueListForVerifierKey(result);
# // driver-agnostic function
# relevancy.accept({relevant_indices},actual_indices);
# // because we are "verifying" although this needs to be reorganized
# return true;