added javadoc comments

This commit is contained in:
Mark Wolters 2023-12-05 13:25:45 -04:00
parent 93d9dc5914
commit 796c786d1a
7 changed files with 50 additions and 4 deletions

View File

@ -16,7 +16,16 @@
package io.nosqlbench.datamappers.functions.hdf_to_cql;
/**
* This interface is used to parse the raw JSON from the HDF dataset into a CQL predicate.
*/
public interface DatasetParser {
/**
* Return the specified class to parse the raw JSON from the HDF dataset into a CQL predicate.
* @param parsername
* @return A new instance of the specified parser class.
*/
static DatasetParser parserFactory(String parsername) {
return switch (parsername) {
case "default" -> new DefaultDatasetParser();

View File

@ -22,6 +22,11 @@ import com.google.gson.JsonParser;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* This class is used to parse the raw JSON from the HDF dataset into a CQL predicate. This is the default
* implementation. It accepts a JSON string of the form found at https://github.com/qdrant/ann-filtering-benchmark-datasets
* and converts it into a CQL predicate in String form
*/
public class DefaultDatasetParser implements DatasetParser {
private static final String WHERE = "WHERE";
private static final String MATCH = "match";

View File

@ -25,6 +25,10 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongFunction;
/**
* Binding function that accepts a long input value for the cycle and returns a string consisting of the
* CQL predicate parsed from a single record in an HDF5 dataset
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfDatasetToCqlPredicates implements LongFunction<String> {
@ -33,7 +37,12 @@ public class HdfDatasetToCqlPredicates implements LongFunction<String> {
private final int recordCount;
private final DatasetParser parser;
/**
* Create a new binding function that accepts a long input value for the cycle and returns a string
* @param filename
* @param datasetname
* @param parsername
*/
public HdfDatasetToCqlPredicates(String filename, String datasetname, String parsername) {
hdfFile = new HdfFile(NBIO.all().search(filename).one().asPath());
dataset = hdfFile.getDatasetByPath(datasetname);

View File

@ -15,7 +15,12 @@
*/
package io.nosqlbench.datamappers.functions.hdf_to_cql;
/**
* This class is used to parse the raw JSON from the HDF dataset into a CQL predicate. This implementation
* accepts a string consisting of the desired CQL predicate as translated from the original jsonl files
* and simply adds the WHERE keyword to the beginning of the string if it is not already present, hence
* the new Just Add Where (JAW) parser.
*/
public class JAWDatasetParser implements DatasetParser {
private static final String WHERE = "WHERE";
@Override

View File

@ -15,7 +15,11 @@
*/
package io.nosqlbench.datamappers.functions.hdf_to_cql;
/**
* This class is used to parse the raw JSON from the HDF dataset into a CQL predicate. This implementation
* accepts a string consisting of the desired CQL predicate as translated from the original jsonl files and
* simply returns the raw string, hence the name NoopDatasetParser.
*/
public class NoopDatasetParser implements DatasetParser {
@Override
public String parse(String raw) {

View File

@ -25,6 +25,13 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongFunction;
/**
* This function reads a vector dataset from an HDF5 file. The entire dataset is parsed into a single
* String Object with the discreet values separated by the user supplied separator character. It is
* intended for use only with small datasets where the entire dataset can be read into memory and there
* is no need to read individual vectors from the dataset.
* The lambda function simply returns the String representation of the dataset.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfDatasetToString implements LongFunction<String> {
@ -33,6 +40,13 @@ public class HdfDatasetToString implements LongFunction<String> {
private final String separator;
private final String datasetAsString;
/**
* Create a new binding function that accepts a long input value for the cycle and returns a string representation
* of the specified dataset
* @param filename
* @param dataset
* @param separator
*/
public HdfDatasetToString(String filename, String dataset, String separator) {
hdfFile = new HdfFile(NBIO.all().search(filename).one().asPath());
this.dataset = hdfFile.getDatasetByPath(dataset);

View File

@ -25,7 +25,7 @@ import java.util.Arrays;
import java.util.function.LongFunction;
/**
* This function reads a dataset from an HDF5 file. The dataset itself is not
* This function reads a dataset of any supported type from an HDF5 file. The dataset itself is not
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
* reads a single vector from the dataset, based on the long input value.
*/