adding new binding for single array int types

This commit is contained in:
Mark Wolters 2025-01-24 17:24:05 -05:00
parent c0093efdd3
commit 812c53283d
7 changed files with 86 additions and 4 deletions

View File

@ -36,10 +36,13 @@ public abstract class AbstractHdfFileToVectorType {
long[] sliceOffset = new long[dims.length];
sliceOffset[0] = (l % dims[0]);
int[] sliceDimensions = new int[dims.length];
// We always want to read a single vector
// We always want to read a single value
sliceDimensions[0] = 1;
// Number of elements in the vector
sliceDimensions[1] = dims[1];
return dataset.getData(sliceOffset, sliceDimensions);
if (dims.length > 1) {
sliceDimensions[1] = dims[1];
return dataset.getData(sliceOffset, sliceDimensions);
} else {
return dataset.getData();
}
}
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_int;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory;
import java.util.function.LongFunction;
/**
* This function reads a vector dataset from an HDF5 file. The dataset itself is not
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
* reads a single vector from the dataset, based on the long input value. As currently
* written this class will only work for datasets with 2 dimensions where the 1st dimension
* specifies the number of vectors and the 2nd dimension specifies the number of elements in
* each vector. Only datatypes short, int, and float are supported at this time.
* <p>
* This implementation is specific to returning a single int
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfFileToInt extends AbstractHdfFileToVectorType implements LongFunction<Integer> {
private final EmbeddingGenerator embeddingGenerator;
public HdfFileToInt(String filename, String datasetName) {
super(filename, datasetName);
embeddingGenerator = EmbeddingGeneratorFactory.getGenerator(dataset.getJavaType().getSimpleName().toLowerCase());
}
@Override
public Integer apply(long l) {
Object data = getDataFrom(l);
return embeddingGenerator.generateIntFrom(data, l);
}
}

View File

@ -81,4 +81,10 @@ public class DoubleEmbeddingGenerator implements EmbeddingGenerator {
return vector2;
}
@Override
public int generateIntFrom(Object o, long l) {
double[] source = (double[]) o;
return (int) source[(int) (l % source.length)];
}
}

View File

@ -30,4 +30,6 @@ public interface EmbeddingGenerator {
List<Integer> generateIntListEmbeddingFrom(Object data, int[] dims);
int[] generateIntArrayEmbeddingFrom(Object data, int[] dims);
int generateIntFrom(Object data, long l);
}

View File

@ -76,4 +76,10 @@ public class FloatEmbeddingGenerator implements EmbeddingGenerator {
return vector2;
}
@Override
public int generateIntFrom(Object o, long l) {
float[] source = (float[]) o;
return (int) source[(int) (l % source.length)];
}
}

View File

@ -74,4 +74,10 @@ public class IntEmbeddingGenerator implements EmbeddingGenerator {
public int[] generateIntArrayEmbeddingFrom(Object o, int[] dims) {
return ((int[][]) o)[0];
}
@Override
public int generateIntFrom(Object o, long l) {
int[] source = (int[]) o;
return source[(int) (l % source.length)];
}
}

View File

@ -75,4 +75,10 @@ public class LongEmbeddingGenerator implements EmbeddingGenerator {
}
return vector2;
}
@Override
public int generateIntFrom(Object o, long l) {
long[] source = (long[]) o;
return Math.toIntExact(source[(int) (l % source.length)]);
}
}