Merge pull request #1682 from nosqlbench/mwolters/hdf5_enhancements

Mwolters/hdf5 enhancements
2025-02-25 18:55:28 -06:00 · 2023-11-20 10:35:39 -06:00 · 2023-11-20 10:35:39 -06:00 · 1138b3b18e
commit 1138b3b18e
parent 8a53863b96 43178f3650
5 changed files with 100 additions and 1 deletions
--- a/virtdata-lib-hdf5/pom.xml
+++ b/virtdata-lib-hdf5/pom.xml
@ -53,6 +53,7 @@
                <directory>src/test/resources</directory>
                <excludes>
                    <exclude>h5ex_t_float.h5</exclude>
+                    <exclude>hdf5_test_strings.h5</exclude>
                    <exclude>**/*.ivec</exclude>
                    <exclude>**/*.fvec</exclude>
                </excludes>
--- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVectorType.java
@ -36,8 +36,9 @@ public abstract class AbstractHdfFileToVectorType {
        long[] sliceOffset = new long[dims.length];
        sliceOffset[0] = (l % dims[0]);
        int[] sliceDimensions = new int[dims.length];
+        // We always want to read a single vector
        sliceDimensions[0] = 1;
-        // Do we want to give the option of reducing vector dimensions here?
+        // Number of elements in the vector
        sliceDimensions[1] = dims[1];
        return dataset.getData(sliceOffset, sliceDimensions);
    }
--- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java
+++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStrings.java
@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
+
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType;
+
+import java.util.function.LongFunction;
+
+/**
+ * This function reads a dataset from an HDF5 file. The dataset itself is not
+ * read into memory, only the metadata (the "dataset" Java Object). The lambda function
+ * reads a single vector from the dataset, based on the long input value.
+ */
+@ThreadSafeMapper
+@Categories(Category.experimental)
+public class HdfDatasetToStrings extends AbstractHdfFileToVectorType implements LongFunction<String> {
+
+    public HdfDatasetToStrings(String filename, String datasetName) {
+        super(filename, datasetName);
+    }
+    @Override
+    public String apply(long l) {
+        long[] sliceOffset = new long[dims.length];
+        sliceOffset[0] = (l % dims[0]);
+        int[] sliceDimensions = new int[dims.length];
+        sliceDimensions[0] = 1;
+        if (dims.length > 1) {
+            for (int i = 1; i < dims.length; i++) {
+                sliceDimensions[i] = dims[i];
+            }
+        }
+        return ((String[])dataset.getData(sliceOffset, sliceDimensions))[0];
+    }
+
+}
--- a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java
+++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_string/HdfDatasetToStringsTest.java
@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
+
+import io.nosqlbench.virtdata.library.hdf5.from_long.to_list.HdfFileToFloatList;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+public class HdfDatasetToStringsTest {
+
+    @Test
+    public void testHdfFileToVector() {
+        final String[] results = new String[]{
+            "String 1",
+            "String 2",
+            "String 3",
+            "String 4"
+        };
+
+        HdfDatasetToStrings hdfFileToVector = new HdfDatasetToStrings(
+            "src/test/resources/hdf5_test_strings.h5",
+            "/strings");
+
+        String read;
+        for (int i = 0; i < 4; i++) {
+            read = hdfFileToVector.apply(i);
+            assert (read.equals(results[i]));
+        }
+    }
+}
--- a/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5
+++ b/virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5