more moving around, added doc

This commit is contained in:
Mark Wolters
2024-02-15 11:04:59 -04:00
parent c2e7393549
commit e6726221b1
5 changed files with 52 additions and 4 deletions

View File

@@ -501,7 +501,7 @@ public class CGWorkloadExporter implements BundledApp {
/**
* If keycount is 0, all key fields including partition and clustering fields
* are qualfied with predicates.
* are qualfied with predicates.md.
* If keycount is positive, then only that many will be included.
* If keycount is negative, then that many keyfields will be removed from the
* predicate starting with the rightmost (innermost) fields first.
@@ -538,7 +538,7 @@ public class CGWorkloadExporter implements BundledApp {
logger.debug("minimum keycount for " + table.getFullName() + " adjusted from " + lastcount + " to " + keycount);
}
// TODO; constraints on predicates based on valid constructions
// TODO; constraints on predicates.md based on valid constructions
pkeys.stream().map(this::genPredicatePart)
.forEach(p -> {
sb.append(p).append("\n AND ");

View File

@@ -17,7 +17,9 @@
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.NoopDatasetParser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.to_cql.DefaultDatasetParser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.to_cql.JAWDatasetParser;
/**
* This interface is used to parse the raw JSON from the HDF dataset into a CQL predicate.

View File

@@ -15,7 +15,7 @@
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser;
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetParser;

View File

@@ -15,7 +15,7 @@
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser;
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.to_cql;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetParser;

View File

@@ -0,0 +1,46 @@
# Description
The predicates parser is a way of adding predicates/filters/lmiting expressions to the vector queries generated by the bindings used in nb tests of various vector databases. The predicates will be stored as a dataset within an hdf5 file, which supports "file system like" data storage such as this. The possibility for expanding to either other data sources or other storage formats exists for the future but the initial implementation will be json within hdf5.
# Usage
Usage will be adapter dependent, as the bindings necessary to support different adapter types will vary.
## CQL
ops:
select_ann_limit:
raw: |
SELECT * FROM TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors) {query_predicates} ORDER BY value ANN OF {test_floatlist} LIMIT TEMPLATE(select_limit,100);
query_predicates: HdfDatasetToCqlPredicates("testdata/TEMPLATE(dataset).hdf5", "/predicates", "jaw")
In this case the parser needs to return a string representation of the predicates that limit the result set returned by the query using the expected cql syntax.
## Pinecone filter
op1:
query: "test-index"
vector: "0.8602578079921012,0.12103044768221516,0.7737329191858439,0.4521093269320254,0.29351661477669416,0.4261807015226558,0.14131665592103335,0.882370813029422,0.4412833140430886,0.9916525700115515"
namespace: "example_namespace"
top_k: 10
include_values: true
include_metadata: true
filter:
filterfield: {filterfield_predicate}
operator: {operator_predicate}
comparator: {comparator_predicate}
filterfield_predicate: HdfDatasetToPcFilterPredicates("testdata/TEMPLATE(dataset).hdf5", "/predicates", "pcfilterfield")
operator_predicate: HdfDatasetToPcFilterPredicates("testdata/TEMPLATE(dataset).hdf5", "/predicates", "pcoperator")
comparator_predicate: HdfDatasetToPcFilterPredicates("testdata/TEMPLATE(dataset).hdf5", "/predicates", "pccomparator")