Merge pull request #1682 from nosqlbench/mwolters/hdf5_enhancements

Mwolters/hdf5 enhancements
This commit is contained in:
Jonathan Shook 2023-11-20 10:35:39 -06:00 committed by GitHub
commit 1138b3b18e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 100 additions and 1 deletions

View File

@ -53,6 +53,7 @@
<directory>src/test/resources</directory>
<excludes>
<exclude>h5ex_t_float.h5</exclude>
<exclude>hdf5_test_strings.h5</exclude>
<exclude>**/*.ivec</exclude>
<exclude>**/*.fvec</exclude>
</excludes>

View File

@ -36,8 +36,9 @@ public abstract class AbstractHdfFileToVectorType {
long[] sliceOffset = new long[dims.length];
sliceOffset[0] = (l % dims[0]);
int[] sliceDimensions = new int[dims.length];
// We always want to read a single vector
sliceDimensions[0] = 1;
// Do we want to give the option of reducing vector dimensions here?
// Number of elements in the vector
sliceDimensions[1] = dims[1];
return dataset.getData(sliceOffset, sliceDimensions);
}

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType;
import java.util.function.LongFunction;
/**
* This function reads a dataset from an HDF5 file. The dataset itself is not
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
* reads a single vector from the dataset, based on the long input value.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfDatasetToStrings extends AbstractHdfFileToVectorType implements LongFunction<String> {
public HdfDatasetToStrings(String filename, String datasetName) {
super(filename, datasetName);
}
@Override
public String apply(long l) {
long[] sliceOffset = new long[dims.length];
sliceOffset[0] = (l % dims[0]);
int[] sliceDimensions = new int[dims.length];
sliceDimensions[0] = 1;
if (dims.length > 1) {
for (int i = 1; i < dims.length; i++) {
sliceDimensions[i] = dims[i];
}
}
return ((String[])dataset.getData(sliceOffset, sliceDimensions))[0];
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_list.HdfFileToFloatList;
import org.junit.jupiter.api.Test;
import java.util.List;
public class HdfDatasetToStringsTest {
@Test
public void testHdfFileToVector() {
final String[] results = new String[]{
"String 1",
"String 2",
"String 3",
"String 4"
};
HdfDatasetToStrings hdfFileToVector = new HdfDatasetToStrings(
"src/test/resources/hdf5_test_strings.h5",
"/strings");
String read;
for (int i = 0; i < 4; i++) {
read = hdfFileToVector.apply(i);
assert (read.equals(results[i]));
}
}
}