binding for hdf5 dataset to string

This commit is contained in:
MarkWolters 2023-11-20 09:13:04 -06:00
parent 8a53863b96
commit 5017f40927
5 changed files with 97 additions and 1 deletions

View File

@ -53,6 +53,7 @@
<directory>src/test/resources</directory>
<excludes>
<exclude>h5ex_t_float.h5</exclude>
<exclude>hdf5_test_strings.h5</exclude>
<exclude>**/*.ivec</exclude>
<exclude>**/*.fvec</exclude>
</excludes>

View File

@ -38,7 +38,7 @@ public abstract class AbstractHdfFileToVectorType {
int[] sliceDimensions = new int[dims.length];
sliceDimensions[0] = 1;
// Do we want to give the option of reducing vector dimensions here?
sliceDimensions[1] = dims[1];
sliceDimensions[1] = dims.length > 1 ? dims[1] : 1;
return dataset.getData(sliceOffset, sliceDimensions);
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory;
import java.util.function.LongFunction;
/**
* This function reads a dataset from an HDF5 file. The dataset itself is not
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
* reads a single vector from the dataset, based on the long input value.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfDatasetToStrings extends AbstractHdfFileToVectorType implements LongFunction<String> {
public HdfDatasetToStrings(String filename, String datasetName) {
super(filename, datasetName);
}
@Override
public String apply(long l) {
long[] sliceOffset = new long[dims.length];
sliceOffset[0] = (l % dims[0]);
int[] sliceDimensions = new int[dims.length];
sliceDimensions[0] = 1;
sliceDimensions[1] = dims.length > 1 ? dims[1] : 1;
return (String) dataset.getData(sliceOffset, sliceDimensions);
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_list.HdfFileToFloatList;
import org.junit.jupiter.api.Test;
import java.util.List;
public class HdfDatasetToStringsTest {
@Test
public void testHdfFileToVector() {
final String[] results = new String[]{
"String 1",
"String 2",
"String 3",
"String 4"
};
HdfDatasetToStrings hdfFileToVector = new HdfDatasetToStrings(
"/Users/mark.wolters/dev/vec/hdf5_test_strings.h5",
"/strings");
String read;
for (int i = 0; i < 4; i++) {
read = hdfFileToVector.apply(i);
assert (read.equals(results[i]));
}
}
}