mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2024-12-25 08:11:06 -06:00
fix readers
This commit is contained in:
parent
92d62ae88c
commit
e99196c36e
@ -53,11 +53,12 @@ bindings:
|
||||
relevant_indices_hdf5: HdfFileToIntArray("testdata/TEMPLATE(datafile).hdf5", "/neighbors")
|
||||
distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/distance")
|
||||
train_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(datafile).hdf5", "/train"); ToCqlVector();
|
||||
# filetype=vecs for TEMPLATE(filetype,vecs)
|
||||
test_floatlist_vecs: FVecReader("testdata/TEMPLATE(datafile).fvec"); ToCqlVector();
|
||||
relevant_indices_vecs: IVecReader("testdata/TEMPLATE(datafile).ivec");
|
||||
distance_floatlist_vecs: FVecReader("testdata/TEMPLATE(datafile).fvec");
|
||||
train_floatlist_vecs: FVecReader("testdata/TEMPLATE(datafile).fvec"); ToCqlVector();
|
||||
# filetype=fvec for TEMPLATE(filetype,fvec)
|
||||
test_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_query_vectors.fvec"); ToCqlVector();
|
||||
relevant_indices_fvec: IVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_indices_query.ivec");
|
||||
distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0);
|
||||
train_floatlist_fvec: FVecReader("testdata/TEMPLATE(datafile)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0); ToCqlVector();
|
||||
# synthetic
|
||||
synthetic_vectors: HashedFloatVectors(TEMPLATE(dimensions));
|
||||
|
||||
blocks:
|
||||
|
@ -25,6 +25,7 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Path;
|
||||
@ -77,18 +78,17 @@ public class FVecReader implements LongFunction<float[]> {
|
||||
@Override
|
||||
public float[] apply(long value) {
|
||||
int recordIdx = (int) (value % reclim);
|
||||
long offset = value * recordIdx;
|
||||
int recpos = (int) (offset %filesize) ;
|
||||
byte[] buf = new byte[reclen];
|
||||
ByteBuffer record = this.bb.get(recpos,buf).order(ByteOrder.LITTLE_ENDIAN);
|
||||
int recdim = record.getInt();
|
||||
int recpos = recordIdx*reclen;
|
||||
int recdim = Integer.reverseBytes(bb.getInt(recpos));
|
||||
if(recdim!=dimensions) {
|
||||
throw new RuntimeException("dimensions are not uniform for fvec file '" + this.path.toString() + "', found dim " + recdim + " at record " + value);
|
||||
}
|
||||
float[] data = new float[recdim];
|
||||
for (int i = 0; i < dimensions; i++) {
|
||||
data[i]=record.getFloat();
|
||||
}
|
||||
return data;
|
||||
var vbuf = new byte[dimensions*Float.BYTES];
|
||||
bb.get(recpos + Integer.BYTES, vbuf);
|
||||
|
||||
FloatBuffer fbuf=ByteBuffer.wrap(vbuf).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer();
|
||||
var vectors = new float[dimensions];
|
||||
fbuf.get(vectors);
|
||||
return vectors;
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,6 @@
|
||||
|
||||
package io.nosqlbench.virtdata.library.ivecfvec;
|
||||
|
||||
import io.nosqlbench.api.config.standard.ConfigModel;
|
||||
import io.nosqlbench.api.content.Content;
|
||||
import io.nosqlbench.api.content.NBIO;
|
||||
import io.nosqlbench.virtdata.api.annotations.Categories;
|
||||
@ -25,14 +24,12 @@ import io.nosqlbench.virtdata.api.annotations.Example;
|
||||
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.function.LongFunction;
|
||||
import java.util.function.LongToIntFunction;
|
||||
|
||||
/**
|
||||
* Reads ivec files with random access, using the input to specify the record number.
|
||||
@ -89,10 +86,10 @@ public class IVecReader implements LongFunction<int[]> {
|
||||
@Override
|
||||
public int[] apply(long value) {
|
||||
int recordIdx = (int) (value % reclim);
|
||||
long offset = value * recordIdx;
|
||||
int recpos = (int) (offset %filesize) ;
|
||||
int recpos = recordIdx*reclen;
|
||||
byte[] buf = new byte[reclen];
|
||||
ByteBuffer record = this.bb.get(recpos,buf);
|
||||
this.bb.get(recpos,buf);
|
||||
ByteBuffer record = ByteBuffer.wrap(buf);
|
||||
int recdim = Integer.reverseBytes(record.getInt());
|
||||
if(recdim!=dimensions) {
|
||||
throw new RuntimeException("dimensions are not uniform for ivec file '" + this.path.toString() + "', found dim " + recdim + " at record " + value);
|
||||
|
@ -18,6 +18,9 @@ package io.nosqlbench.virtdata.library.ivecfvec;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@ -25,9 +28,13 @@ class IVecReaderTest {
|
||||
|
||||
@Test
|
||||
public void testReadIvec() {
|
||||
|
||||
ArrayList<HashSet<Integer>> idx_ref = IvecFvecMethods.readIvecs("src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec");
|
||||
|
||||
IVecReader ir = new IVecReader("src/test/resources/ivecfvec/test_ada_002_10000_indices_query_10000.ivec");
|
||||
for (int i = 0; i < 10; i++) {
|
||||
int[] indices = ir.apply(0);
|
||||
HashSet<Integer> ref = idx_ref.get(0);
|
||||
for (int j = 0; j < indices.length; j++) {
|
||||
assertThat(indices[j]).isGreaterThanOrEqualTo(0);
|
||||
assertThat(indices[j]).isLessThanOrEqualTo(10000);
|
||||
@ -41,7 +48,7 @@ class IVecReaderTest {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
float[] dist = ir.apply(i);
|
||||
for (int j = 1; j < dist.length; j++) {
|
||||
assertThat(dist[j]).isGreaterThanOrEqualTo(dist[j-1]);
|
||||
assertThat(dist[j]).isGreaterThanOrEqualTo(dist[j-1]).describedAs("dist[" + j +"]=(" +dist[j]+") dist[j-1]=(" + dist[j-1] + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user