diff --git a/virtdata-lib-hdf5/pom.xml b/virtdata-lib-hdf5/pom.xml index b15119c65..689978460 100644 --- a/virtdata-lib-hdf5/pom.xml +++ b/virtdata-lib-hdf5/pom.xml @@ -53,6 +53,7 @@ src/test/resources h5ex_t_float.h5 + hdf5_test_strings.h5 **/*.ivec **/*.fvec diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java index cab669196..c0533a140 100644 --- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java @@ -36,8 +36,9 @@ public abstract class AbstractHdfFileToVectorType { long[] sliceOffset = new long[dims.length]; sliceOffset[0] = (l % dims[0]); int[] sliceDimensions = new int[dims.length]; + // We always want to read a single vector sliceDimensions[0] = 1; - // Do we want to give the option of reducing vector dimensions here? + // Number of elements in the vector sliceDimensions[1] = dims[1]; return dataset.getData(sliceOffset, sliceDimensions); } diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java new file mode 100644 index 000000000..8a37d949f --- /dev/null +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.hdf5.from_long.to_string; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType; + +import java.util.function.LongFunction; + +/** + * This function reads a dataset from an HDF5 file. The dataset itself is not + * read into memory, only the metadata (the "dataset" Java Object). The lambda function + * reads a single vector from the dataset, based on the long input value. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class HdfDatasetToStrings extends AbstractHdfFileToVectorType implements LongFunction { + + public HdfDatasetToStrings(String filename, String datasetName) { + super(filename, datasetName); + } + @Override + public String apply(long l) { + long[] sliceOffset = new long[dims.length]; + sliceOffset[0] = (l % dims[0]); + int[] sliceDimensions = new int[dims.length]; + sliceDimensions[0] = 1; + if (dims.length > 1) { + for (int i = 1; i < dims.length; i++) { + sliceDimensions[i] = dims[i]; + } + } + return ((String[])dataset.getData(sliceOffset, sliceDimensions))[0]; + } + +} diff --git a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java new file mode 100644 index 000000000..a520157fe --- /dev/null +++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.hdf5.from_long.to_string; + +import io.nosqlbench.virtdata.library.hdf5.from_long.to_list.HdfFileToFloatList; +import org.junit.jupiter.api.Test; + +import java.util.List; + +public class HdfDatasetToStringsTest { + + @Test + public void testHdfFileToVector() { + final String[] results = new String[]{ + "String 1", + "String 2", + "String 3", + "String 4" + }; + + HdfDatasetToStrings hdfFileToVector = new HdfDatasetToStrings( + "src/test/resources/hdf5_test_strings.h5", + "/strings"); + + String read; + for (int i = 0; i < 4; i++) { + read = hdfFileToVector.apply(i); + assert (read.equals(results[i])); + } + } +} diff --git a/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5 b/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5 new file mode 100644 index 000000000..c14af7336 Binary files /dev/null and b/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5 differ