diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java index 3b706faff..ac1c05a76 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java @@ -47,8 +47,8 @@ public class LoaderConfig { return configMap.get(key).toString(); } - public List> getDatasets() { - return (List>) configMap.get("datasets"); + public List getDatasets() { + return (List) configMap.get("datasets"); } public String getFormat() { diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 657fd7b0b..a02575a79 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -27,6 +27,7 @@ import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -34,6 +35,7 @@ import java.util.concurrent.LinkedBlockingQueue; public class Hdf5Reader implements HdfReader { private static final Logger logger = LogManager.getLogger(Hdf5Reader.class); + public static final String ALL = "all"; private VectorWriter writer; private final LoaderConfig config; private final ExecutorService executorService; @@ -53,12 +55,36 @@ public class Hdf5Reader implements HdfReader { public void read() throws HDF5LibraryException { String sourceFile = config.getSourceFile(); int fileId = H5.H5Fopen(sourceFile, HDF5Constants.H5F_ACC_RDONLY, HDF5Constants.H5P_DEFAULT); - for (Map dataset : config.getDatasets()) { + List datasets = config.getDatasets(); + if (datasets.get(0).equalsIgnoreCase(ALL)) { + try { + int numObjects = H5.H5Fget_obj_count(fileId, HDF5Constants.H5F_OBJ_ALL); + String[] objNames = new String[numObjects]; + int[] objTypes = new int[numObjects]; + long[] refArray = new long[numObjects]; + //H5.H5Fget_obj_ids(fileId, HDF5Constants.H5F_OBJ_ALL, numObjects, objNames, objTypes); + H5.H5Gget_obj_info_all(fileId, null, objNames, objTypes, refArray); + + for (int i = 0; i < numObjects; i++) { + String objName = objNames[i]; + int objType = objTypes[i]; + if (objType == HDF5Constants.H5G_DATASET) { + datasets.add(objName); + } + } + } catch (HDF5Exception e) { + logger.error("Error getting all datasets from file: " + sourceFile, e); + } + } + for (String dataset : config.getDatasets()) { + if (dataset.equalsIgnoreCase(ALL)) { + continue; + } executorService.submit(() -> { // Your lambda code that runs in a separate thread for each object - logger.info("Processing dataset: " + dataset.get("name")); + logger.info("Processing dataset: " + dataset); try { - int datasetId = H5.H5Dopen(fileId, dataset.get("name")); + int datasetId = H5.H5Dopen(fileId, dataset); // Get the dataspace of the dataset int dataspaceId = H5.H5Dget_space(datasetId); // Get the number of dimensions in the dataspace diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index 11f1be3e5..16869b8c6 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,10 +1,7 @@ format: HDF5 -sourceFile: /home/mwolters138/Downloads/embeddings.h5 +sourceFile: /home/mwolters138/Downloads/NEONDSTowerTemperatureData.hdf5 datasets: - - name: dataset1 - type: string - - name: dataset2 - type: int + - all embedding: word2vec writer: filewriter astra: