mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2024-11-26 18:50:41 -06:00
binding for hdf5 dataset to string
This commit is contained in:
parent
8a53863b96
commit
5017f40927
@ -53,6 +53,7 @@
|
|||||||
<directory>src/test/resources</directory>
|
<directory>src/test/resources</directory>
|
||||||
<excludes>
|
<excludes>
|
||||||
<exclude>h5ex_t_float.h5</exclude>
|
<exclude>h5ex_t_float.h5</exclude>
|
||||||
|
<exclude>hdf5_test_strings.h5</exclude>
|
||||||
<exclude>**/*.ivec</exclude>
|
<exclude>**/*.ivec</exclude>
|
||||||
<exclude>**/*.fvec</exclude>
|
<exclude>**/*.fvec</exclude>
|
||||||
</excludes>
|
</excludes>
|
||||||
|
@ -38,7 +38,7 @@ public abstract class AbstractHdfFileToVectorType {
|
|||||||
int[] sliceDimensions = new int[dims.length];
|
int[] sliceDimensions = new int[dims.length];
|
||||||
sliceDimensions[0] = 1;
|
sliceDimensions[0] = 1;
|
||||||
// Do we want to give the option of reducing vector dimensions here?
|
// Do we want to give the option of reducing vector dimensions here?
|
||||||
sliceDimensions[1] = dims[1];
|
sliceDimensions[1] = dims.length > 1 ? dims[1] : 1;
|
||||||
return dataset.getData(sliceOffset, sliceDimensions);
|
return dataset.getData(sliceOffset, sliceDimensions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023 nosqlbench
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
|
||||||
|
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.Categories;
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.Category;
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||||
|
import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVectorType;
|
||||||
|
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator;
|
||||||
|
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory;
|
||||||
|
|
||||||
|
import java.util.function.LongFunction;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function reads a dataset from an HDF5 file. The dataset itself is not
|
||||||
|
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
|
||||||
|
* reads a single vector from the dataset, based on the long input value.
|
||||||
|
*/
|
||||||
|
@ThreadSafeMapper
|
||||||
|
@Categories(Category.experimental)
|
||||||
|
public class HdfDatasetToStrings extends AbstractHdfFileToVectorType implements LongFunction<String> {
|
||||||
|
|
||||||
|
public HdfDatasetToStrings(String filename, String datasetName) {
|
||||||
|
super(filename, datasetName);
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String apply(long l) {
|
||||||
|
long[] sliceOffset = new long[dims.length];
|
||||||
|
sliceOffset[0] = (l % dims[0]);
|
||||||
|
int[] sliceDimensions = new int[dims.length];
|
||||||
|
sliceDimensions[0] = 1;
|
||||||
|
sliceDimensions[1] = dims.length > 1 ? dims[1] : 1;
|
||||||
|
return (String) dataset.getData(sliceOffset, sliceDimensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023 nosqlbench
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string;
|
||||||
|
|
||||||
|
import io.nosqlbench.virtdata.library.hdf5.from_long.to_list.HdfFileToFloatList;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class HdfDatasetToStringsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHdfFileToVector() {
|
||||||
|
final String[] results = new String[]{
|
||||||
|
"String 1",
|
||||||
|
"String 2",
|
||||||
|
"String 3",
|
||||||
|
"String 4"
|
||||||
|
};
|
||||||
|
|
||||||
|
HdfDatasetToStrings hdfFileToVector = new HdfDatasetToStrings(
|
||||||
|
"/Users/mark.wolters/dev/vec/hdf5_test_strings.h5",
|
||||||
|
"/strings");
|
||||||
|
|
||||||
|
String read;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
read = hdfFileToVector.apply(i);
|
||||||
|
assert (read.equals(results[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5
Normal file
BIN
virtdata-lib-hdf5/src/test/resources/hdf5_test_strings.h5
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user