mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
adding in functionality for all datasets in a file
This commit is contained in:
@@ -47,8 +47,8 @@ public class LoaderConfig {
|
||||
return configMap.get(key).toString();
|
||||
}
|
||||
|
||||
public List<Map<String,String>> getDatasets() {
|
||||
return (List<Map<String,String>>) configMap.get("datasets");
|
||||
public List<String> getDatasets() {
|
||||
return (List<String>) configMap.get("datasets");
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
|
||||
@@ -27,6 +27,7 @@ import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
@@ -34,6 +35,7 @@ import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
public class Hdf5Reader implements HdfReader {
|
||||
private static final Logger logger = LogManager.getLogger(Hdf5Reader.class);
|
||||
public static final String ALL = "all";
|
||||
private VectorWriter writer;
|
||||
private final LoaderConfig config;
|
||||
private final ExecutorService executorService;
|
||||
@@ -53,12 +55,36 @@ public class Hdf5Reader implements HdfReader {
|
||||
public void read() throws HDF5LibraryException {
|
||||
String sourceFile = config.getSourceFile();
|
||||
int fileId = H5.H5Fopen(sourceFile, HDF5Constants.H5F_ACC_RDONLY, HDF5Constants.H5P_DEFAULT);
|
||||
for (Map<String,String> dataset : config.getDatasets()) {
|
||||
List<String> datasets = config.getDatasets();
|
||||
if (datasets.get(0).equalsIgnoreCase(ALL)) {
|
||||
try {
|
||||
int numObjects = H5.H5Fget_obj_count(fileId, HDF5Constants.H5F_OBJ_ALL);
|
||||
String[] objNames = new String[numObjects];
|
||||
int[] objTypes = new int[numObjects];
|
||||
long[] refArray = new long[numObjects];
|
||||
//H5.H5Fget_obj_ids(fileId, HDF5Constants.H5F_OBJ_ALL, numObjects, objNames, objTypes);
|
||||
H5.H5Gget_obj_info_all(fileId, null, objNames, objTypes, refArray);
|
||||
|
||||
for (int i = 0; i < numObjects; i++) {
|
||||
String objName = objNames[i];
|
||||
int objType = objTypes[i];
|
||||
if (objType == HDF5Constants.H5G_DATASET) {
|
||||
datasets.add(objName);
|
||||
}
|
||||
}
|
||||
} catch (HDF5Exception e) {
|
||||
logger.error("Error getting all datasets from file: " + sourceFile, e);
|
||||
}
|
||||
}
|
||||
for (String dataset : config.getDatasets()) {
|
||||
if (dataset.equalsIgnoreCase(ALL)) {
|
||||
continue;
|
||||
}
|
||||
executorService.submit(() -> {
|
||||
// Your lambda code that runs in a separate thread for each object
|
||||
logger.info("Processing dataset: " + dataset.get("name"));
|
||||
logger.info("Processing dataset: " + dataset);
|
||||
try {
|
||||
int datasetId = H5.H5Dopen(fileId, dataset.get("name"));
|
||||
int datasetId = H5.H5Dopen(fileId, dataset);
|
||||
// Get the dataspace of the dataset
|
||||
int dataspaceId = H5.H5Dget_space(datasetId);
|
||||
// Get the number of dimensions in the dataspace
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
format: HDF5
|
||||
sourceFile: /home/mwolters138/Downloads/embeddings.h5
|
||||
sourceFile: /home/mwolters138/Downloads/NEONDSTowerTemperatureData.hdf5
|
||||
datasets:
|
||||
- name: dataset1
|
||||
type: string
|
||||
- name: dataset2
|
||||
type: int
|
||||
- all
|
||||
embedding: word2vec
|
||||
writer: filewriter
|
||||
astra:
|
||||
|
||||
Reference in New Issue
Block a user