Merge pull request #1650 from nosqlbench/jshook/ivec_fvec

add support for ivec and fvec formats
2024-12-22 23:23:56 -06:00 · 2023-10-26 20:33:55 -05:00 · 2023-10-26 20:33:55 -05:00 · 14b836dd51
commit 14b836dd51
parent a54d51ee62 651aaecbf0
37 changed files with 805 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,5 @@
 exported_docs.zip
 .nosqlbench/**
-.run/**
 workspaces/**
 workshop/**
 local/**
--- a/.run/cql_vector2dropE5-BASE-V2.run.xml
+++ b/.run/cql_vector2dropE5-BASE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__drop__E5-BASE-V2" type="JarApplication" folderName="E5-BASE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.drop userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=768 testsize=10000 trainsize=100000 datafile=intfloat_e5-base-v2 filetype=fvec table=e5_base_v2 similarity_function=cosine --add-labels &quot;dimensions:768,dataset=e5_base_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2dropE5-LARGE-V2.run.xml
+++ b/.run/cql_vector2dropE5-LARGE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__drop__E5-LARGE-V2" type="JarApplication" folderName="E5-LARGE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.drop userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=1024 testsize=10000 trainsize=100000 datafile=intfloat_e5-large-v2 filetype=fvec table=e5_large_v2 similarity_function=cosine --add-labels &quot;dimensions:1024,dataset=e5_large_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2dropE5-SMALL-MULI.run.xml
+++ b/.run/cql_vector2dropE5-SMALL-MULI.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__drop__E5-SMALL-MULI" type="JarApplication" folderName="E5-SMALL-MULI">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.drop userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_multilingual-e5-small filetype=fvec table=e5_small_muli similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_muli&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2dropE5-SMALL-Q.run.xml
+++ b/.run/cql_vector2dropE5-SMALL-Q.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__drop__E5-SMALL-Q" type="JarApplication" folderName="E5-SMALL-Q-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.drop userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-q-v2 filetype=fvec table=e5_small_q_v2 similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_q_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2dropE5-SMALL-V2.run.xml
+++ b/.run/cql_vector2dropE5-SMALL-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__drop__E5-SMALL-V2" type="JarApplication" folderName="E5-SMALL-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.drop userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-v2 filetype=fvec table=e5_small similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2schemaE5-BASE-V2.run.xml
+++ b/.run/cql_vector2schemaE5-BASE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__schema__E5-BASE-V2" type="JarApplication" folderName="E5-BASE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.schema userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=768 testsize=10000 trainsize=100000 datafile=intfloat_e5-base-v2 filetype=fvec table=e5_base_v2 similarity_function=cosine --add-labels &quot;dimensions:768,dataset=e5_base_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2schemaE5-LARGE-V2.run.xml
+++ b/.run/cql_vector2schemaE5-LARGE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__schema__E5-LARGE-V2" type="JarApplication" folderName="E5-LARGE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.schema userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=1024 testsize=10000 trainsize=100000 datafile=intfloat_e5-large-v2 filetype=fvec table=e5_large_v2 similarity_function=cosine --add-labels &quot;dimensions:1024,dataset=e5_large_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2schemaE5-SMALL-MULI.run.xml
+++ b/.run/cql_vector2schemaE5-SMALL-MULI.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__schema__E5-SMALL-MULI" type="JarApplication" folderName="E5-SMALL-MULI">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.schema userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_multilingual-e5-small filetype=fvec table=e5_small_muli similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_muli&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2schemaE5-SMALL-Q.run.xml
+++ b/.run/cql_vector2schemaE5-SMALL-Q.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__schema__E5-SMALL-Q" type="JarApplication" folderName="E5-SMALL-Q-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.schema userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-q-v2 filetype=fvec table=e5_small_q_v2 similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_q_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2schemaE5-SMALL-V2.run.xml
+++ b/.run/cql_vector2schemaE5-SMALL-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__schema__E5-SMALL-V2" type="JarApplication" folderName="E5-SMALL-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.schema userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-v2 filetype=fvec table=e5_small similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2testannE5-BASE-V2.run.xml
+++ b/.run/cql_vector2testannE5-BASE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__testann__E5-BASE-V2" type="JarApplication" folderName="E5-BASE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.testann userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=768 testsize=10000 trainsize=100000 datafile=intfloat_e5-base-v2 filetype=fvec table=e5_base_v2 similarity_function=cosine --add-labels &quot;dimensions:768,dataset=e5_base_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2testannE5-LARGE-V2.run.xml
+++ b/.run/cql_vector2testannE5-LARGE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__testann__E5-LARGE-V2" type="JarApplication" folderName="E5-LARGE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.testann userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=1024 testsize=10000 trainsize=100000 datafile=intfloat_e5-large-v2 filetype=fvec table=e5_large_v2 similarity_function=cosine --add-labels &quot;dimensions:1024,dataset=e5_large_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2testannE5-SMALL-MULI.run.xml
+++ b/.run/cql_vector2testannE5-SMALL-MULI.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__testann__E5-SMALL-MULI" type="JarApplication" folderName="E5-SMALL-MULI">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.testann userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_multilingual-e5-small filetype=fvec table=e5_small_muli similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_muli&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2testannE5-SMALL-Q.run.xml
+++ b/.run/cql_vector2testannE5-SMALL-Q.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__testann__E5-SMALL-Q" type="JarApplication" folderName="E5-SMALL-Q-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.testann userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-q-v2 filetype=fvec table=e5_small_q_v2 similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_q_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2testannE5-SMALL-V2.run.xml
+++ b/.run/cql_vector2testannE5-SMALL-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__testann__E5-SMALL-V2" type="JarApplication" folderName="E5-SMALL-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.testann userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-v2 filetype=fvec table=e5_small similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2trainE5-BASE-V2.run.xml
+++ b/.run/cql_vector2trainE5-BASE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__train__E5-BASE-V2" type="JarApplication" folderName="E5-BASE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.train userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=768 testsize=10000 trainsize=100000 datafile=intfloat_e5-base-v2 filetype=fvec table=e5_base_v2 similarity_function=cosine --add-labels &quot;dimensions:768,dataset=e5_base_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2trainE5-LARGE-V2.run.xml
+++ b/.run/cql_vector2trainE5-LARGE-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__train__E5-LARGE-V2" type="JarApplication" folderName="E5-LARGE-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.train userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=1024 testsize=10000 trainsize=100000 datafile=intfloat_e5-large-v2 filetype=fvec table=e5_large_v2 similarity_function=cosine --add-labels &quot;dimensions:1024,dataset=e5_large_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2trainE5-SMALL-MULI.run.xml
+++ b/.run/cql_vector2trainE5-SMALL-MULI.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__train__E5-SMALL-MULI" type="JarApplication" folderName="E5-SMALL-MULI">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.train userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_multilingual-e5-small filetype=fvec table=e5_small_muli similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small_muli&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2trainE5-SMALL-Q.run.xml
+++ b/.run/cql_vector2trainE5-SMALL-Q.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__train__E5-SMALL-Q" type="JarApplication" folderName="E5-SMALL-Q-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.train userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-q-v2 filetype=fvec table=e5_small_q_v2 similarity_function=cosine --add-labels &quot;dimensions:384,dataset:e5_small_q_v2&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/cql_vector2trainE5-SMALL-V2.run.xml
+++ b/.run/cql_vector2trainE5-SMALL-V2.run.xml
@ -0,0 +1,14 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="cql_vector2__train__E5-SMALL-V2" type="JarApplication" folderName="E5-SMALL-V2">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="cql_vector2_fvec astra_vectors.train userfile=auth/userfile passfile=auth/passfile scb=auth/scb.zip --show-stacktraces dimensions=384 testsize=10000 trainsize=100000 datafile=intfloat_e5-small-v2 filetype=fvec table=e5_small similarity_function=cosine --add-labels &quot;dimensions:384,dataset=e5_small&quot;" />
+    <option name="WORKING_DIRECTORY" value="$ProjectFileDir$/local/jvector" />
+    <option name="ALTERNATIVE_JRE_PATH" value="jdk21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/linkedinput.run.xml
+++ b/.run/linkedinput.run.xml
@ -0,0 +1,15 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="linkedinput" type="JarApplication" folderName="nbr integration tests">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nbr/target/nbr.jar" />
+    <option name="PROGRAM_PARAMETERS" value="script src/test/resources/scripts/examples/linkedinput.js" />
+    <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/nbr-examples" />
+    <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
+    <option name="ALTERNATIVE_JRE_PATH" value="/usr/java/jdk-21" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/.run/vectorsearch-consistency-levels.run.xml
+++ b/.run/vectorsearch-consistency-levels.run.xml
@ -0,0 +1,15 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="vectorsearch-consistency-levels" type="JarApplication">
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <option name="JAR_PATH" value="$PROJECT_DIR$/nb5/target/nb5.jar" />
+    <option name="PROGRAM_PARAMETERS" value="vector-search.yaml reads errors=stop driverconfig=driver-config.json dimensions=128 read_ratio=1 main-cycles=1 keyspace=baselines128 --report-csv-to metrics read_cl=LOCAL_ONE -v --show-stacktraces" />
+    <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/local/vectors-consistency" />
+    <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
+    <option name="ALTERNATIVE_JRE_PATH" value="17" />
+    <method v="2" />
+  </configuration>
+</component>
--- a/adapter-cqld4/src/main/resources/activities/baselinesv2/cql_vector2.yaml
+++ b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql_vector2.yaml
@ -28,7 +28,7 @@ scenarios:
  astra_vectors:
    drop: run tags='block:drop' tags='block:drop' threads==undef cycles==undef
    schema: run tags='block:schema' tags='op=create_.*(table|index)' threads==undef cycles==undef dimensions==TEMPLATE(dimensions,25)
-    train: run tags='block:rampup' threads=auto cycles=TEMPLATE(trainsize) errors=counter,warn maxtries=2 dimensions==TEMPLATE(dimensions,25)
+    train: run tags='block:rampup' threads=20x cycles=TEMPLATE(trainsize) errors=counter,warn maxtries=2 dimensions==TEMPLATE(dimensions,25)
 #    search_and_index_unthrottled: >-
 #      run tags='block:search_and_index,optype=select' labels='target:astra'
 #      cycles=TEMPLATE(testsize) threads=10 errors=count,retry stride=500 errors=counter
--- a/adapter-cqld4/src/main/resources/activities/baselinesv2/cql_vector2_fvec.yaml
+++ b/adapter-cqld4/src/main/resources/activities/baselinesv2/cql_vector2_fvec.yaml
@ -0,0 +1,155 @@
+min_version: 5.21
+description: |
+  This is a template for live vector search testing.
+
+  schema: Install the schema required to run the test
+  rampup: Measure how long it takes to load a set of embeddings
+  search_and_index: Measure how the system responds to queries while it
+   is indexing recently ingested data.
+  #? await_index: Pause and wait for the system to complete compactions or index processing
+  search: Run vector search with a set of default (or overridden) parameters
+  search_and_rewrite: Run the same search operations as above, but while rewriting the data
+  search_and_invalidate: Run the same search operations as above, but while overwriting the data
+   with different content using the same vector id.
+  In all of these phases, it is important to instance the metrics with distinct names.
+  Also, aggregates of recall should include total aggregate as well as a moving average.
+
+scenarios:
+  cassandra:
+    drop: run tags='block:drop' threads==undef cycles==undef
+    # nb5 cql-vector2 cassandra.schema host=localhost localdc=datacenter1 dimensions=100
+    schema: run tags='op=create_.*' threads==undef cycles==undef
+    # nb5 cql-vector2 cassandra.rampup host=localhost localdc=datacenter1 dimensions=100 trainsize=1000000 dataset=glove-100-angular rate=10000
+    rampup: run tags='block:rampup' threads=auto cycles=TEMPLATE(trainsize,set-the-trainsize) errors=counter,warn
+    # nb5 cql-vector2 cassandra.search_and_index testsize=10000 host=localhost localdc=datacenter1 dimensions=100 dataset=glove-100-angular --report-csv-to rmetrics:.*:5s
+    read_recall: >-
+      run alias=search_and_index tags='block:search_and_index,optype=select' labels='target:cassandra'
+      cycles=TEMPLATE(testsize) errors=counter,warn threads=1
+  astra_vectors:
+    drop: run tags='block:drop' tags='block:drop' threads==undef cycles==undef
+    schema: run tags='block:schema' tags='op=create_.*(table|index)' threads==undef cycles==undef dimensions==TEMPLATE(dimensions,25)
+    train: run tags='block:rampup' threads=20x cycles=TEMPLATE(trainsize) errors=counter,warn maxtries=2 dimensions==TEMPLATE(dimensions,25)
+#    search_and_index_unthrottled: >-
+#      run tags='block:search_and_index,optype=select' labels='target:astra'
+#      cycles=TEMPLATE(testsize) threads=10 errors=count,retry stride=500 errors=counter
+    testann: >-
+      run tags='block:testann' cycles=TEMPLATE(testsize) errors=count,retry maxtries=2 threads=auto
+    # one activity or two? data leap-frog? or concurrency separate for both?
+  #  await_index: run tags='block:await_index' # This would need to exit when a condition is met
+  #  stop_search_and_index: stop search_and_index
+    # only possible if we have a triggering event to indicated
+    # live_search: run tags='block:search' labels='target:astra' threads=1 cycles=TEMPLATE(testsize,10000)
+    search_and_rewrite: run tags='block:search_and_rewrite' labels='target:astra'
+    search_and_invalidate: run tags='block:search_and_invalidate' labels='target:astra'
+
+params:
+  driver: cqld4
+  instrument: true
+
+bindings:
+  id: ToString()
+  # filetype=hdf5 for TEMPLATE(filetype,hdf5)
+  test_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/test"); ToCqlVector();
+  relevant_indices_hdf5: HdfFileToIntArray("testdata/TEMPLATE(datafile).hdf5", "/neighbors")
+  distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/distance")
+  train_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/train"); ToCqlVector();
+  # filetype=fvec for TEMPLATE(filetype,fvec)
+  test_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_query_vectors.fvec"); ToCqlVector();
+  relevant_indices_fvec: IVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_indices_query.ivec");
+  distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0);
+  train_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0); ToCqlVector();
+  # synthetic
+  synthetic_vectors: HashedFloatVectors(TEMPLATE(dimensions));
+
+blocks:
+  drop:
+    params:
+      cl: TEMPLATE(cl,LOCAL_QUORUM)
+    ops:
+      drop_index:
+        raw: |
+          DROP INDEX IF EXISTS TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors);
+      drop_table:
+        raw: |
+          DROP TABLE IF EXISTS TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors);
+  schema:
+    params:
+      cl: TEMPLATE(cl,LOCAL_QUORUM)
+    ops:
+      create_keyspace:
+        raw: |
+          CREATE KEYSPACE IF NOT EXISTS TEMPLATE(keyspace,baselines)
+          WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'};
+      create_table:
+        raw: |
+          CREATE TABLE IF NOT EXISTS TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors) (
+            key TEXT,
+            value vector<float,TEMPLATE(dimensions,set-the-dimensions-template-var)>,
+            PRIMARY KEY (key)
+          );
+      create_sai_index:
+        raw: |
+          CREATE CUSTOM INDEX IF NOT EXISTS ON TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors) (value) USING 'StorageAttachedIndex'
+          WITH OPTIONS = {'similarity_function' : 'TEMPLATE(similarity_function,cosine)'};
+#         WITH OPTIONS = {'maximum_node_connections' : TEMPLATE(M,16), 'construction_beam_width' : TEMPLATE(ef,100), 'similarity_function' : 'TEMPLATE(similarity_function,dot_product)'};
+  rampup:
+    params:
+      cl: TEMPLATE(write_cl,LOCAL_QUORUM)
+      prepared: true
+    ops:
+      insert: |
+        INSERT INTO TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
+        (key, value) VALUES ({id},{train_floatlist_TEMPLATE(filetype,hdf5)});
+#  await_index:
+#    ops:
+  testann:
+    ops:
+      select_ann_limit_TEMPLATE(k,100):
+        prepared: |
+          SELECT * FROM TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
+          ORDER BY value ANN OF {test_floatlist_TEMPLATE(filetype,hdf5)} LIMIT TEMPLATE(select_limit,100);
+        tags:
+          optype: select
+        verifier-init: |
+          k=TEMPLATE(k,100)
+          relevancy= new io.nosqlbench.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
+          relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
+          relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
+          relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
+          relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
+          relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
+        verifier: |
+          actual_indices=io.nosqlbench.engine.extensions.vectormath.CqlUtils.cqlStringColumnToIntArray("key",result);
+          relevancy.accept({relevant_indices_TEMPLATE(filetype,hdf5)},actual_indices);
+          return true;
+      insert_rewrite:
+        prepared: |
+          INSERT INTO TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
+          (key, value) VALUES ({id},{train_floatlist_TEMPLATE(filetype,hdf5)});
+        tags:
+          optype: insert
+
+  search_and_rewrite:
+    ops:
+      select_ann_limit:
+        stmt: |
+          SELECT * FROM TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors) ORDER BY value ANN OF {test_vector} LIMIT TEMPLATE(select_limit,100);
+        verifier-init: |
+          scriptingmetrics.newSummaryGauge(_parsed_op,"recall")
+#        verifier: |
+      upsert_same:
+        stmt: |
+          INSERT INTO TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
+          (key, value) VALUES ({rw_key},{train_vector});
+  search_and_invalidate:
+    ops:
+      select_ann_limit:
+        stmt: |
+          SELECT * FROM TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors) ORDER BY value ANN OF {test_vector} LIMIT TEMPLATE(select_limit,100);
+#        verifier-init: |
+#        verifier: |
+      upsert_random: |
+        INSERT INTO TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
+        (key, value) VALUES ({rw_key},{train_vector});
+
+
--- a/adapter-jdbc/src/main/java/io/nosqlbench/adapter/jdbc/JDBCDriverAdapter.java
+++ b/adapter-jdbc/src/main/java/io/nosqlbench/adapter/jdbc/JDBCDriverAdapter.java
@ -31,6 +31,7 @@ import org.apache.logging.log4j.Logger;

 import java.util.function.Function;

+@Service(value = DriverAdapter.class,selector = "jdbc")
 public class JDBCDriverAdapter extends BaseDriverAdapter<JDBCOp, JDBCSpace> {
    private final static Logger logger = LogManager.getLogger(JDBCDriverAdapter.class);

--- a/mvn-defaults/pom.xml
+++ b/mvn-defaults/pom.xml
@ -537,7 +537,7 @@
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <configuration>
-                    <argLine>-ea ${argLine}</argLine>
+                    <argLine>-ea @{argLine}</argLine>
                    <systemPropertyVariables>
                        <Log4jContextSelector>
                            org.apache.logging.log4j.core.async.AsyncLoggerContextSelector
--- a/virtdata-api/src/main/java/io/nosqlbench/virtdata/api/annotations/Category.java
+++ b/virtdata-api/src/main/java/io/nosqlbench/virtdata/api/annotations/Category.java
@ -33,5 +33,6 @@ public enum Category {
    experimental,
    combinitoric,
    vectors,
-    HOF
+    HOF,
+    readers
 }
--- a/virtdata-lib-hdf5/pom.xml
+++ b/virtdata-lib-hdf5/pom.xml
@ -53,6 +53,8 @@
                <directory>src/test/resources</directory>
                <excludes>
                    <exclude>h5ex_t_float.h5</exclude>
+                    <exclude>**/*.ivec</exclude>
+                    <exclude>**/*.fvec</exclude>
                </excludes>
                <filtering>true</filtering>
            </testResource>
--- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/ivecfvec/FVecReader.java
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/ivecfvec/FVecReader.java
@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.ivecfvec;
+
+import io.nosqlbench.api.content.Content;
+import io.nosqlbench.api.content.NBIO;
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.function.LongFunction;
+
+/**
+ * Reads ivec files with random access, using the input to specify the record number.
+ */
+@ThreadSafeMapper
+@Categories(Category.readers)
+public class FVecReader implements LongFunction<float[]> {
+
+    private final MappedByteBuffer bb;
+    private final int dimensions;
+    private final int reclen;
+    private final long filesize;
+    private final Path path;
+    private final int reclim;
+
+    public FVecReader(String pathname) {
+        this(pathname,0,0);
+    }
+    public FVecReader(String pathname, int expectedDimensions, int recordLimit) {
+        Content<?> src = NBIO.fs().search(pathname).one();
+        this.path = src.asPath();
+        try {
+            FileChannel channel = FileChannel.open(this.path, StandardOpenOption.READ, StandardOpenOption.SPARSE);
+            this.filesize = channel.size();
+            this.bb = channel.map(FileChannel.MapMode.READ_ONLY, 0, filesize);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        this.dimensions = Integer.reverseBytes(bb.getInt(0));
+        if(expectedDimensions>0 && expectedDimensions!=dimensions) {
+            throw new RuntimeException("Invalid dimensions specified for '" +pathname + "', found " + dimensions + ", but expected " + expectedDimensions);
+        }
+        int datalen = (dimensions * Float.BYTES);
+        this.reclen = Integer.BYTES + datalen;
+        int totalRecords = (int) (filesize/reclen);
+        if (recordLimit > totalRecords) {
+            throw new RuntimeException("Specified record range of " + recordLimit + ", but file only contained " + totalRecords + " total");
+        }
+        this.reclim = recordLimit==0? totalRecords : recordLimit;
+        if ((filesize % reclen)!=0) {
+            throw new RuntimeException("The filesize (" + filesize + ") for '" + pathname + "' must be a multiple of the reclen (" + reclen + ")");
+        }
+    }
+
+    @Override
+    public float[] apply(long value) {
+        int recordIdx = (int) (value % reclim);
+        int recpos = recordIdx*reclen;
+        int recdim = Integer.reverseBytes(bb.getInt(recpos));
+        if(recdim!=dimensions) {
+            throw new RuntimeException("dimensions are not uniform for fvec file '" + this.path.toString() + "', found dim " + recdim + " at record " + value);
+        }
+        var vbuf = new byte[dimensions*Float.BYTES];
+        bb.get(recpos + Integer.BYTES, vbuf);
+
+        FloatBuffer fbuf=ByteBuffer.wrap(vbuf).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer();
+        var vectors = new float[dimensions];
+        fbuf.get(vectors);
+        return vectors;
+    }
+}
--- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/ivecfvec/IVecReader.java
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/ivecfvec/IVecReader.java
@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.ivecfvec;
+
+import io.nosqlbench.api.content.Content;
+import io.nosqlbench.api.content.NBIO;
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.Example;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.function.LongFunction;
+
+/**
+ * Reads ivec files with random access, using the input to specify the record number.
+ * This is used for testing with generated KNN test data which is uniform in dimensions and neighborhood size.
+ * While it is possible to specify different dimensioned vectors per record, this is not supported, since this
+ * function honors the pure-function behavior of other NB binding functions. This requires uniform record structure for random access.
+ */
+@ThreadSafeMapper
+@Categories(Category.readers)
+public class IVecReader implements LongFunction<int[]> {
+
+    private final MappedByteBuffer bb;
+    private final int dimensions;
+    private final int reclen;
+    private final long filesize;
+    private final Path path;
+    private final int reclim;
+
+    /**
+     * Read the ivec file, determining the record size from the first record.
+     * @param pathname The location of the ivec file
+     */
+    @Example({"IvecReader('testfile.ivec')","Create a reader for int vectors, detecting the dimensions and dataset size automatically."})
+    public IVecReader(String pathname) {
+        this(pathname,0,0);
+    }
+    @Example({"IvecReader('testfile.ivec', 46, 12)","Create a reader for int vectors, asserting 46 dimensions and limit total records to 12."})
+    public IVecReader(String pathname, int expectedDimensions, int recordLimit) {
+        Content<?> src = NBIO.fs().search(pathname).one();
+        this.path = src.asPath();
+        try {
+            FileChannel channel = FileChannel.open(this.path, StandardOpenOption.READ, StandardOpenOption.SPARSE);
+            this.filesize = channel.size();
+            this.bb = channel.map(FileChannel.MapMode.READ_ONLY, 0, filesize);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        this.dimensions = Integer.reverseBytes(bb.getInt(0));
+        if(expectedDimensions>0 && expectedDimensions!=dimensions) {
+            throw new RuntimeException("Invalid dimensions specified for '" +pathname + "', found " + dimensions + ", but expected " + expectedDimensions);
+        }
+        int datalen = (dimensions * Integer.BYTES);
+        this.reclen = Integer.BYTES + datalen;
+        int totalRecords = (int) (filesize/reclen);
+        if (recordLimit > totalRecords) {
+            throw new RuntimeException("Specified record range of " + recordLimit + ", but file only contained " + totalRecords + " total");
+        }
+        this.reclim = recordLimit==0? totalRecords : recordLimit;
+        if ((filesize % reclen)!=0) {
+            throw new RuntimeException("The filesize (" + filesize + ") for '" + pathname + "' must be a multiple of the reclen (" + reclen + ")");
+        }
+    }
+
+    @Override
+    public int[] apply(long value) {
+        int recordIdx = (int) (value % reclim);
+        int recpos = recordIdx*reclen;
+        byte[] buf = new byte[reclen];
+        this.bb.get(recpos,buf);
+        ByteBuffer record = ByteBuffer.wrap(buf);
+        int recdim = Integer.reverseBytes(record.getInt());
+        if(recdim!=dimensions) {
+            throw new RuntimeException("dimensions are not uniform for ivec file '" + this.path.toString() + "', found dim " + recdim + " at record " + value);
+        }
+        int[] data = new int[recdim];
+        for (int i = 0; i < dimensions; i++) {
+            data[i]=Integer.reverseBytes(record.getInt());
+        }
+        return data;
+    }
+}
--- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/ivecfvec/IvecFvecMethods.java
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/ivecfvec/IvecFvecMethods.java
@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.ivecfvec;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.HashSet;
+
+public class IvecFvecMethods {
+
+    public static ArrayList<float[]> readFvecs(String filePath) throws IOException {
+        var vectors = new ArrayList<float[]>();
+        try (var dis = new DataInputStream(new BufferedInputStream(new FileInputStream(filePath)))) {
+            while (dis.available() > 0) {
+                var dimension = Integer.reverseBytes(dis.readInt());
+                assert dimension > 0 : dimension;
+                var buffer = new byte[dimension * Float.BYTES];
+                dis.readFully(buffer);
+                var byteBuffer = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN);
+
+                var vector = new float[dimension];
+                var floatBuffer = byteBuffer.asFloatBuffer();
+                floatBuffer.get(vector);
+                vectors.add(vector);
+            }
+        }
+        return vectors;
+    }
+
+    public static ArrayList<HashSet<Integer>> readIvecs(String filename) {
+        var groundTruthTopK = new ArrayList<HashSet<Integer>>();
+
+        try (var dis = new DataInputStream(new FileInputStream(filename))) {
+            while (dis.available() > 0) {
+                var numNeighbors = Integer.reverseBytes(dis.readInt());
+                var neighbors = new HashSet<Integer>(numNeighbors);
+
+                for (var i = 0; i < numNeighbors; i++) {
+                    var neighbor = Integer.reverseBytes(dis.readInt());
+                    neighbors.add(neighbor);
+                }
+
+                groundTruthTopK.add(neighbors);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+        return groundTruthTopK;
+    }
+
+}
--- a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/ivecfvec/IVecReaderTest.java
+++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/ivecfvec/IVecReaderTest.java
@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.ivecfvec;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.*;
+
+class IVecReaderTest {
+
+    @Test
+    public void testReadIvec() {
+
+        ArrayList<HashSet<Integer>> idx_ref = IvecFvecMethods.readIvecs("src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec");
+
+        IVecReader ir = new IVecReader("src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec");
+        for (int i = 0; i < 10; i++) {
+            int[] indices = ir.apply(0);
+            HashSet<Integer> ref = idx_ref.get(0);
+            for (int j = 0; j < indices.length; j++) {
+                assertThat(indices[j]).isGreaterThanOrEqualTo(0);
+                assertThat(indices[j]).isLessThanOrEqualTo(10000);
+            }
+        }
+    }
+
+    @Test
+    public void testReadFvec() {
+        FVecReader ir = new FVecReader("src/test/resources/ivecfvec/test_ada_002_10000_distances_count.fvec");
+        for (int i = 0; i < 10; i++) {
+            float[] dist = ir.apply(i);
+            for (int j = 1; j < dist.length; j++) {
+                assertThat(dist[j]).isGreaterThanOrEqualTo(dist[j-1]).describedAs("dist[" + j +"]=(" +dist[j]+") dist[j-1]=(" + dist[j-1] + ")");
+            }
+        }
+    }
+
+    @Test
+    public void testReadFvecSpecificDims() {
+        FVecReader ir = new FVecReader(
+            "src/test/resources/ivecfvec/test_ada_002_10000_base_vectors.fvec",
+            1536,0);
+        float[] vec0 = ir.apply(0);
+        assertThat(vec0.length).isEqualTo(1536);
+    }
+
+}
--- a/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_base_vectors.fvec
+++ b/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_base_vectors.fvec
--- a/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_distances_count.fvec
+++ b/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_distances_count.fvec
--- a/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec
+++ b/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec
--- a/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_query_vectors_10000.fvec
+++ b/virtdata-lib-hdf5/src/test/resources/ivecfvec/test_ada_002_10000_query_vectors_10000.fvec