diff --git a/virtdata-lib-hdf5/pom.xml b/virtdata-lib-hdf5/pom.xml
index b15119c65..689978460 100644
--- a/virtdata-lib-hdf5/pom.xml
+++ b/virtdata-lib-hdf5/pom.xml
@@ -53,6 +53,7 @@
src/test/resources
h5ex_t_float.h5
+ hdf5_test_strings.h5
**/*.ivec
**/*.fvec
diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java
index cab669196..c0533a140 100644
--- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java
@@ -36,8 +36,9 @@ public abstract class AbstractHdfFileToVectorType {
long[] sliceOffset = new long[dims.length];
sliceOffset[0] = (l % dims[0]);
int[] sliceDimensions = new int[dims.length];
+ // We always want to read a single vector
sliceDimensions[0] = 1;
- // Do we want to give the option of reducing vector dimensions here?
+ // Number of elements in the vector
sliceDimensions[1] = dims[1];
return dataset.getData(sliceOffset, sliceDimensions);
}
diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java
new file mode 100644
index 000000000..8a37d949f
--- /dev/null
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
+
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType;
+
+import java.util.function.LongFunction;
+
+/**
+ * This function reads a dataset from an HDF5 file. The dataset itself is not
+ * read into memory, only the metadata (the "dataset" Java Object). The lambda function
+ * reads a single vector from the dataset, based on the long input value.
+ */
+@ThreadSafeMapper
+@Categories(Category.experimental)
+public class HdfDatasetToStrings extends AbstractHdfFileToVectorType implements LongFunction {
+
+ public HdfDatasetToStrings(String filename, String datasetName) {
+ super(filename, datasetName);
+ }
+ @Override
+ public String apply(long l) {
+ long[] sliceOffset = new long[dims.length];
+ sliceOffset[0] = (l % dims[0]);
+ int[] sliceDimensions = new int[dims.length];
+ sliceDimensions[0] = 1;
+ if (dims.length > 1) {
+ for (int i = 1; i < dims.length; i++) {
+ sliceDimensions[i] = dims[i];
+ }
+ }
+ return ((String[])dataset.getData(sliceOffset, sliceDimensions))[0];
+ }
+
+}
diff --git a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java
new file mode 100644
index 000000000..a520157fe
--- /dev/null
+++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
+
+import io.nosqlbench.virtdata.library.hdf5.from_long.to_list.HdfFileToFloatList;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+public class HdfDatasetToStringsTest {
+
+ @Test
+ public void testHdfFileToVector() {
+ final String[] results = new String[]{
+ "String 1",
+ "String 2",
+ "String 3",
+ "String 4"
+ };
+
+ HdfDatasetToStrings hdfFileToVector = new HdfDatasetToStrings(
+ "src/test/resources/hdf5_test_strings.h5",
+ "/strings");
+
+ String read;
+ for (int i = 0; i < 4; i++) {
+ read = hdfFileToVector.apply(i);
+ assert (read.equals(results[i]));
+ }
+ }
+}
diff --git a/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5 b/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5
new file mode 100644
index 000000000..c14af7336
Binary files /dev/null and b/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5 differ