fix readers

This commit is contained in:
Jonathan Shook 2023-10-26 20:20:37 -05:00
parent 92d62ae88c
commit e99196c36e
4 changed files with 27 additions and 22 deletions

View File

@ -53,11 +53,12 @@ bindings:
relevant_indices_hdf5: HdfFileToIntArray("testdata/TEMPLATE(datafile).hdf5", "/neighbors")
distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/distance")
train_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/train"); ToCqlVector();
# filetype=vecs for TEMPLATE(filetype,vecs)
test_floatlist_vecs: FVecReader("testdata/TEMPLATE(datafile).fvec"); ToCqlVector();
relevant_indices_vecs: IVecReader("testdata/TEMPLATE(datafile).ivec");
distance_floatlist_vecs: FVecReader("testdata/TEMPLATE(datafile).fvec");
train_floatlist_vecs: FVecReader("testdata/TEMPLATE(datafile).fvec"); ToCqlVector();
# filetype=fvec for TEMPLATE(filetype,fvec)
test_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_query_vectors.fvec"); ToCqlVector();
relevant_indices_fvec: IVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_indices_query.ivec");
distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0); ToCqlVector();
# synthetic
synthetic_vectors: HashedFloatVectors(TEMPLATE(dimensions));
blocks:

View File

@ -25,6 +25,7 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
@ -77,18 +78,17 @@ public class FVecReader implements LongFunction<float[]> {
@Override
public float[] apply(long value) {
int recordIdx = (int) (value % reclim);
long offset = value * recordIdx;
int recpos = (int) (offset %filesize) ;
byte[] buf = new byte[reclen];
ByteBuffer record = this.bb.get(recpos,buf).order(ByteOrder.LITTLE_ENDIAN);
int recdim = record.getInt();
int recpos = recordIdx*reclen;
int recdim = Integer.reverseBytes(bb.getInt(recpos));
if(recdim!=dimensions) {
throw new RuntimeException("dimensions are not uniform for fvec file '" + this.path.toString() + "', found dim " + recdim + " at record " + value);
}
float[] data = new float[recdim];
for (int i = 0; i < dimensions; i++) {
data[i]=record.getFloat();
}
return data;
var vbuf = new byte[dimensions*Float.BYTES];
bb.get(recpos + Integer.BYTES, vbuf);
FloatBuffer fbuf=ByteBuffer.wrap(vbuf).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer();
var vectors = new float[dimensions];
fbuf.get(vectors);
return vectors;
}
}

View File

@ -16,7 +16,6 @@
package io.nosqlbench.virtdata.library.ivecfvec;
import io.nosqlbench.api.config.standard.ConfigModel;
import io.nosqlbench.api.content.Content;
import io.nosqlbench.api.content.NBIO;
import io.nosqlbench.virtdata.api.annotations.Categories;
@ -25,14 +24,12 @@ import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.function.LongFunction;
import java.util.function.LongToIntFunction;
/**
* Reads ivec files with random access, using the input to specify the record number.
@ -89,10 +86,10 @@ public class IVecReader implements LongFunction<int[]> {
@Override
public int[] apply(long value) {
int recordIdx = (int) (value % reclim);
long offset = value * recordIdx;
int recpos = (int) (offset %filesize) ;
int recpos = recordIdx*reclen;
byte[] buf = new byte[reclen];
ByteBuffer record = this.bb.get(recpos,buf);
this.bb.get(recpos,buf);
ByteBuffer record = ByteBuffer.wrap(buf);
int recdim = Integer.reverseBytes(record.getInt());
if(recdim!=dimensions) {
throw new RuntimeException("dimensions are not uniform for ivec file '" + this.path.toString() + "', found dim " + recdim + " at record " + value);

View File

@ -18,6 +18,9 @@ package io.nosqlbench.virtdata.library.ivecfvec;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.HashSet;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;
@ -25,9 +28,13 @@ class IVecReaderTest {
@Test
public void testReadIvec() {
ArrayList<HashSet<Integer>> idx_ref = IvecFvecMethods.readIvecs("src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec");
IVecReader ir = new IVecReader("src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec");
for (int i = 0; i < 10; i++) {
int[] indices = ir.apply(0);
HashSet<Integer> ref = idx_ref.get(0);
for (int j = 0; j < indices.length; j++) {
assertThat(indices[j]).isGreaterThanOrEqualTo(0);
assertThat(indices[j]).isLessThanOrEqualTo(10000);
@ -41,7 +48,7 @@ class IVecReaderTest {
for (int i = 0; i < 10; i++) {
float[] dist = ir.apply(i);
for (int j = 1; j < dist.length; j++) {
assertThat(dist[j]).isGreaterThanOrEqualTo(dist[j-1]);
assertThat(dist[j]).isGreaterThanOrEqualTo(dist[j-1]).describedAs("dist[" + j +"]=(" +dist[j]+") dist[j-1]=(" + dist[j-1] + ")");
}
}
}