mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2024-12-22 23:23:56 -06:00
Fix the issue that causes the vector relevancy score verification test failed.
This commit is contained in:
parent
439180ca9f
commit
1242ff2dd0
@ -66,8 +66,7 @@ public class JDBCSpace implements AutoCloseable {
|
||||
|
||||
public JDBCSpace(String spaceName, NBConfiguration cfg) {
|
||||
this.spaceName = spaceName;
|
||||
this.totalCycleNum = NumberUtils.toLong(cfg.get("cycles"));
|
||||
|
||||
this.totalCycleNum = NumberUtils.toLong(cfg.getOptional("cycles").orElse("1"));
|
||||
int totalThreads = NumberUtils.toInt(cfg.getOptional("threads").orElse("1"));
|
||||
int numConnInput = NumberUtils.toInt(cfg.getOptional("num_conn").orElse("10"));
|
||||
this.maxNumConn = Math.min(totalThreads, numConnInput);
|
||||
|
@ -36,10 +36,12 @@ public class JDBCDMLOpDispenser extends JDBCBaseOpDispenser {
|
||||
private static final Logger logger = LogManager.getLogger(JDBCDMLOpDispenser.class);
|
||||
|
||||
private final boolean isReadStatement;
|
||||
|
||||
private final LongFunction<String> pStmtSqlStrFunc;
|
||||
private final LongFunction<List<Object>> pStmtValListFunc;
|
||||
|
||||
// Only for Vector relevancy score testing (Vector read statement)
|
||||
private final String verifierKeyName;
|
||||
|
||||
public JDBCDMLOpDispenser(DriverAdapter<JDBCOp, JDBCSpace> adapter,
|
||||
JDBCSpace jdbcSpace,
|
||||
ParsedOp op,
|
||||
@ -66,6 +68,8 @@ public class JDBCDMLOpDispenser extends JDBCBaseOpDispenser {
|
||||
}
|
||||
return pStmtValListObj;
|
||||
};
|
||||
|
||||
this.verifierKeyName = op.getStaticConfigOr("verifier-key", "");
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -77,7 +81,8 @@ public class JDBCDMLOpDispenser extends JDBCBaseOpDispenser {
|
||||
jdbcSpace,
|
||||
true,
|
||||
pStmtSqlStrFunc.apply(cycle),
|
||||
pStmtValListFunc.apply(cycle));
|
||||
pStmtValListFunc.apply(cycle),
|
||||
this.verifierKeyName);
|
||||
}
|
||||
else {
|
||||
int ddlStmtBatchNum = jdbcSpace.getDmlBatchNum();
|
||||
|
@ -17,8 +17,7 @@ package io.nosqlbench.adapter.jdbc.optypes;
|
||||
|
||||
import io.nosqlbench.adapter.jdbc.JDBCSpace;
|
||||
import io.nosqlbench.adapter.jdbc.exceptions.JDBCAdapterUnexpectedException;
|
||||
import io.nosqlbench.adapter.jdbc.utils.JDBCPgVector;
|
||||
import io.nosqlbench.engine.extensions.vectormath.PgvecUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
@ -29,11 +28,15 @@ import java.util.List;
|
||||
public class JDBCDMLReadOp extends JDBCDMLOp {
|
||||
private static final Logger LOGGER = LogManager.getLogger(JDBCDMLReadOp.class);
|
||||
|
||||
private String verifierKeyName;
|
||||
|
||||
public JDBCDMLReadOp(JDBCSpace jdbcSpace,
|
||||
boolean isReadStmt,
|
||||
String pStmtSqlStr,
|
||||
List<Object> pStmtValList) {
|
||||
List<Object> pStmtValList,
|
||||
String verifierKeyName) {
|
||||
super(jdbcSpace, isReadStmt, pStmtSqlStr, pStmtValList);
|
||||
this.verifierKeyName = verifierKeyName;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -44,14 +47,17 @@ public class JDBCDMLReadOp extends JDBCDMLOp {
|
||||
}
|
||||
|
||||
try {
|
||||
int resultFetched = 0;
|
||||
List<ResultSet> resultSetList = new ArrayList<>();
|
||||
// key string list to be used in the "Vector" relevancy score verification
|
||||
List<String> verifierValueList = new ArrayList<>();
|
||||
|
||||
ResultSet rs;
|
||||
if (!isPreparedStmt) {
|
||||
rs = stmt.executeQuery(pStmtSqlStr);
|
||||
do {
|
||||
resultSetList.add(rs);
|
||||
String keyVal = rs.getString(this.verifierKeyName);
|
||||
if (StringUtils.isNotBlank(keyVal)) {
|
||||
verifierValueList.add(keyVal);
|
||||
}
|
||||
} while (rs.next());
|
||||
closeStatement(stmt);
|
||||
}
|
||||
@ -63,8 +69,10 @@ public class JDBCDMLReadOp extends JDBCDMLOp {
|
||||
if(isResultSet) {
|
||||
rs = stmt.getResultSet();
|
||||
while(rs.next()) {
|
||||
resultSetList.add(rs);
|
||||
resultFetched++;
|
||||
String keyVal = rs.getString(this.verifierKeyName);
|
||||
if (StringUtils.isNotBlank(keyVal)) {
|
||||
verifierValueList.add(keyVal);
|
||||
}
|
||||
}
|
||||
rs.close();
|
||||
} else {
|
||||
@ -78,11 +86,7 @@ public class JDBCDMLReadOp extends JDBCDMLOp {
|
||||
closeStatement(stmt);
|
||||
}
|
||||
|
||||
if (LOGGER.isDebugEnabled()) {
|
||||
LOGGER.debug("Total {} of results have been returned.", resultFetched);
|
||||
}
|
||||
|
||||
return resultSetList;
|
||||
return verifierValueList;
|
||||
}
|
||||
catch (SQLException sqlException) {
|
||||
throw new JDBCAdapterUnexpectedException(
|
||||
|
@ -42,13 +42,10 @@ public abstract class JDBCOp implements CycleOp {
|
||||
}
|
||||
|
||||
protected void closeStatement(Statement stmt) throws SQLException {
|
||||
/*
|
||||
* NO-op for now
|
||||
* ------------------
|
||||
if (! (stmt instanceof PreparedStatement)) {
|
||||
stmt.close();
|
||||
} else if (jdbcSpace.isShuttingDown()) {
|
||||
stmt.close();
|
||||
}*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,75 +16,15 @@
|
||||
|
||||
package io.nosqlbench.engine.extensions.vectormath;
|
||||
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class PgvecUtils {
|
||||
|
||||
public static long[] sqlResultSetFieldsToLongArray(String fieldName, List<ResultSet> resultSets) {
|
||||
return resultSets.stream().filter(r -> {
|
||||
try {
|
||||
return ((r!=null) && !r.isClosed());
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).mapToLong(r -> {
|
||||
try {
|
||||
return r.getLong(fieldName);
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).toArray();
|
||||
}
|
||||
|
||||
public static String[] sqlResultSetFieldsToStringArray(String fieldName, List<ResultSet> resultSets) {
|
||||
return resultSets.stream().filter(r -> {
|
||||
try {
|
||||
return ((r!=null) && !r.isClosed());
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).map(r -> {
|
||||
try {
|
||||
return r.getString(fieldName);
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).toArray(String[]::new);
|
||||
}
|
||||
|
||||
public static int[] sqlResultSetListToIntArray(String fieldName, List<ResultSet> resultSets) {
|
||||
return resultSets.stream().filter(r -> {
|
||||
try {
|
||||
return ((r!=null) && !r.isClosed());
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).mapToInt(r -> {
|
||||
try {
|
||||
return r.getInt(fieldName);
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).toArray();
|
||||
}
|
||||
|
||||
public static int[] sqlStringColumnToIntArray(String fieldName, List<ResultSet> resultSets) {
|
||||
return resultSets.stream().filter(r -> {
|
||||
try {
|
||||
return ((r!=null) && !r.isClosed());
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).mapToInt(r -> {
|
||||
try {
|
||||
return Integer.parseInt(Objects.requireNonNull(r.getString(fieldName)));
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
public static int[] getValueListForVerifierKey(List<String> values) {
|
||||
int[] intArr = values.stream().mapToInt(v -> {
|
||||
return Integer.parseInt(Objects.requireNonNull(v));
|
||||
}).toArray();
|
||||
return intArr;
|
||||
}
|
||||
}
|
||||
|
@ -15,23 +15,23 @@ scenarios:
|
||||
###
|
||||
## For DDL workload, turn on 'AutoCommit'. Turning it off will cause errors.
|
||||
###
|
||||
drop-tbl: run driver=jdbc tags==block:drop-tbl threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
drop-tbl: run driver=jdbc tags==block:drop-tbl threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
# The following CLI parameters is needed for 'create-tbl' named scenario:
|
||||
# - dimensions: vector dimension size (MUST match the actual ANN benchmark data)
|
||||
create-tbl: run driver=jdbc tags==block:create-tbl threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
create-tbl: run driver=jdbc tags==block:create-tbl threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
#
|
||||
# Vectors with up to 2,000 dimensions can be indexed.
|
||||
#
|
||||
# The following extra CLI parameter is needed for both 'create-vec-idx' and 'drop-vec-idx' named scenarios:
|
||||
# - indexName: index name
|
||||
drop-vec-idx: run driver=jdbc tags==block:drop-vec-idx threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
drop-vec-idx: run driver=jdbc tags==block:drop-vec-idx threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
# The following extra CLI parameters are needed for 'create-vec-idx' named scenario:
|
||||
# - indexType: index type; valid values: 'ivfflat' or 'hnsw' (see: https://github.com/pgvector/pgvector#indexing)
|
||||
# - indexOpt: index options
|
||||
# * for 'ivfflat' index type, the option is like: "lists=<number>"
|
||||
# * for 'hnsw' index type, the option is like: "m=<number>,ef_construction =<number>"
|
||||
# - relFunc: relevancy function; valid values: 'l2' (L2 distance), 'ip' (Inner product), or 'cosine' (Cosine distance)
|
||||
create-vec-idx: run driver=jdbc tags==block:create-vec-idx threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
create-vec-idx: run driver=jdbc tags==block:create-vec-idx threads==1 cycles==UNDEF url="jdbc:postgresql://host:port/" databaseName="defaultdb" portNumber=5432 user="newuser" password="CHANGE_ME" sslmode="prefer" serverName="pgsql" sslrootcert="/path/to/postgresql_certs/root.crt" autoCommit="true"
|
||||
|
||||
###
|
||||
## For DML workload, 'AutoCommit' can be off or on
|
||||
@ -103,7 +103,7 @@ blocks:
|
||||
# Using PostgreSQl upsert (INSERT ON CONFLICT statement)
|
||||
vec-write:
|
||||
params:
|
||||
# DML statement MUST be prepared
|
||||
# DML write statement MUST be prepared
|
||||
prepared: true
|
||||
ops:
|
||||
main-insert:
|
||||
@ -115,14 +115,25 @@ blocks:
|
||||
|
||||
vec-read:
|
||||
ops:
|
||||
params:
|
||||
# DML READ statement can be prepared or not
|
||||
prepared: true
|
||||
main-select:
|
||||
dmlread: |
|
||||
SELECT key, (value <-> ?) as relevancy, value
|
||||
FROM TEMPLATE(schema,public).TEMPLATE(table,pgvec)
|
||||
ORDER BY value <-> ?
|
||||
LIMIT TEMPLATE(queryLimit,10);
|
||||
LIMIT TEMPLATE(queryLimit,100);
|
||||
prep_stmt_val_arr: |
|
||||
{test_vector},{test_vector}
|
||||
#################################
|
||||
## NOTE:
|
||||
# 1). The script blocks below are ONLY relevant with Vector relevancy score verification
|
||||
# 2). The "verifier-key" must match the Vector data identifier column name (e.g. primary key name)
|
||||
# right now the identifier must be a type that can be converted to int.
|
||||
verifier-key: "key"
|
||||
verifier-imports:
|
||||
- io.nosqlbench.adapter.mongodb.MongoDbUtils
|
||||
verifier-init: |
|
||||
relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op);
|
||||
for (int k in List.of(100)) {
|
||||
@ -134,7 +145,7 @@ blocks:
|
||||
}
|
||||
verifier: |
|
||||
// driver-specific function
|
||||
actual_indices=pgvec_utils.sqlStringColumnToIntArray("key",result);
|
||||
actual_indices=pgvec_utils.getValueListForVerifierKey(result);
|
||||
// driver-agnostic function
|
||||
relevancy.accept({validation_set},actual_indices);
|
||||
// because we are "verifying" although this needs to be reorganized
|
||||
|
Loading…
Reference in New Issue
Block a user