refactored filter handling

This commit is contained in:
Mark Wolters 2023-05-31 21:50:05 +00:00
parent 1bd22b992d
commit 6b63f9cc37
10 changed files with 146 additions and 69 deletions

View File

@ -16,6 +16,7 @@
package io.nosqlbench.adapter.pinecone.opdispensers;
import com.google.protobuf.ListValue;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
@ -27,7 +28,10 @@ import io.pinecone.proto.DeleteRequest;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.*;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
@ -61,7 +65,6 @@ public class PineconeDeleteOpDispenser extends PineconeOpDispenser {
/**
* @param op The ParsedOp used to build the Request
* @return A function that will take a long (the current cycle) and return a Pinecone DeleteRequest
*
* The pattern used here is to accommodate the way Request types are constructed for Pinecone.
* Requests use a Builder pattern, so at time of instantiation the methods should be chained together.
* For each method in the chain a function is created here and added to the chain of functions
@ -95,16 +98,10 @@ public class PineconeDeleteOpDispenser extends PineconeOpDispenser {
rFunc = l -> finalFunc.apply(l).setDeleteAll(af.apply(l));
}
Optional<LongFunction<String>> filterFunction = op.getAsOptionalFunction("filter", String.class);
Optional<LongFunction<Map>> filterFunction = op.getAsOptionalFunction("filter", Map.class);
if (filterFunction.isPresent()) {
LongFunction<DeleteRequest.Builder> finalFunc = rFunc;
LongFunction<Struct> builtFilter = l -> {
String[] filterFields = filterFunction.get().apply(l).split(" ");
return Struct.newBuilder().putFields(filterFields[0],
Value.newBuilder().setStructValue(Struct.newBuilder().putFields(filterFields[1],
Value.newBuilder().setNumberValue(Integer.parseInt(filterFields[2])).build()))
.build()).build();
};
LongFunction<Struct> builtFilter = buildFilterStruct(filterFunction.get());
rFunc = l -> finalFunc.apply(l).setFilter(builtFilter.apply(l));
}
@ -112,5 +109,4 @@ public class PineconeDeleteOpDispenser extends PineconeOpDispenser {
return l -> finalRFunc.apply(l).build();
}
}

View File

@ -27,6 +27,7 @@ import io.pinecone.proto.DescribeIndexStatsRequest;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
@ -63,18 +64,14 @@ public class PineconeDescribeIndexStatsOpDispenser extends PineconeOpDispenser {
*/
private LongFunction<DescribeIndexStatsRequest> createDescribeIndexStatsRequestFunction(ParsedOp op) {
LongFunction<DescribeIndexStatsRequest.Builder> rFunc = l -> DescribeIndexStatsRequest.newBuilder();
Optional<LongFunction<String>> filterFunction = op.getAsOptionalFunction("filter", String.class);
Optional<LongFunction<Map>> filterFunction = op.getAsOptionalFunction("filter", Map.class);
if (filterFunction.isPresent()) {
LongFunction<DescribeIndexStatsRequest.Builder> finalFunc = rFunc;
LongFunction<Struct> builtFilter = l -> {
String[] filterFields = filterFunction.get().apply(l).split(" ");
return Struct.newBuilder().putFields(filterFields[0],
Value.newBuilder().setStructValue(Struct.newBuilder().putFields(filterFields[1],
Value.newBuilder().setNumberValue(Integer.parseInt(filterFields[2])).build()))
.build()).build();
};
LongFunction<Struct> builtFilter = buildFilterStruct(filterFunction.get());
rFunc = l -> finalFunc.apply(l).setFilter(builtFilter.apply(l));
}
LongFunction<DescribeIndexStatsRequest.Builder> finalRFunc = rFunc;
return l -> finalRFunc.apply(l).build();
}

View File

@ -16,12 +16,17 @@
package io.nosqlbench.adapter.pinecone.opdispensers;
import com.google.protobuf.ListValue;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
import io.nosqlbench.adapter.pinecone.PineconeSpace;
import io.nosqlbench.adapter.pinecone.ops.PineconeOp;
import io.nosqlbench.engine.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.engine.api.templating.ParsedOp;
import java.util.List;
import java.util.Map;
import java.util.function.LongFunction;
public abstract class PineconeOpDispenser extends BaseOpDispenser<PineconeOp, PineconeSpace> {
@ -37,4 +42,37 @@ public abstract class PineconeOpDispenser extends BaseOpDispenser<PineconeOp, Pi
this.targetFunction = targetFunction;
}
protected LongFunction<Struct> buildFilterStruct(LongFunction<Map> filterFunction) {
return l -> {
Map<String,Object> filterFields = filterFunction.apply(l);
Value comparatorVal;
Object comparator = filterFields.get("comparator");
if (comparator instanceof String) {
comparatorVal = Value.newBuilder().setStringValue((String) comparator).build();
} else if (comparator instanceof Number) {
comparatorVal = Value.newBuilder().setNumberValue((Double) comparator).build();
} else if (comparator instanceof List) {
comparatorVal = Value.newBuilder().setListValue(generateListValue((List) comparator)).build();
} else {
throw new RuntimeException("Invalid type for filter comparator specified");
}
return Struct.newBuilder().putFields((String) filterFields.get("filterfield"),
Value.newBuilder().setStructValue(
Struct.newBuilder().putFields((String) filterFields.get("operator"),
comparatorVal))
.build()).build();
};
}
protected ListValue generateListValue(List comparator) {
ListValue.Builder listValueBuilder = ListValue.newBuilder();
for (Object entry : comparator) {
Value value = Value.newBuilder().setStringValue(String.valueOf(entry)).build();
listValueBuilder.addValues(value);
}
return listValueBuilder.build();
}
}

View File

@ -17,7 +17,6 @@
package io.nosqlbench.adapter.pinecone.opdispensers;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
import io.nosqlbench.adapter.pinecone.PineconeSpace;
import io.nosqlbench.adapter.pinecone.ops.PineconeOp;
@ -58,14 +57,14 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
/**
* @param op The ParsedOp used to build the Request
* @return A function that will take a long (the current cycle) and return a Pinecone QueryRequest Builder
*
* <p>
* The pattern used here is to accommodate the way Request types are constructed for Pinecone.
* Requests use a Builder pattern, so at time of instantiation the methods should be chained together.
* For each method in the chain a function is created here and added to the chain of functions
* called at time of instantiation.
*
* <p>
* The QueryVector objects used by the QueryRequest as sufficiently sophisticated in their own building process
* that it has been broken out into a separate method. At runtime they are built separately and then added
* that it has been broken out into a separate method. At runtime, they are built separately and then added
* to the build chain by the builder returned by this method.
*/
private LongFunction<QueryRequest.Builder> createQueryRequestFunc(ParsedOp op) {
@ -114,16 +113,10 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
rFunc = l -> finalFunc.apply(l).addAllVector(alf.apply(l));
}
Optional<LongFunction<String>> filterFunction = op.getAsOptionalFunction("filter", String.class);
Optional<LongFunction<Map>> filterFunction = op.getAsOptionalFunction("filter", Map.class);
if (filterFunction.isPresent()) {
LongFunction<QueryRequest.Builder> finalFunc = rFunc;
LongFunction<Struct> builtFilter = l -> {
String[] filterFields = filterFunction.get().apply(l).split(" ");
return Struct.newBuilder().putFields(filterFields[0],
Value.newBuilder().setStructValue(Struct.newBuilder().putFields(filterFields[1],
Value.newBuilder().setNumberValue(Integer.parseInt(filterFields[2])).build()))
.build()).build();
};
LongFunction<Struct> builtFilter = buildFilterStruct(filterFunction.get());
rFunc = l -> finalFunc.apply(l).setFilter(builtFilter.apply(l));
}
@ -133,7 +126,7 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
/**
* @param op the ParsedOp from which the Query Vector objects will be built
* @return an Iterable Collection of QueryVector objects to be added to a Pinecone QueryRequest
*
* <p>
* This method interrogates the subsection of the ParsedOp defined for QueryVector parameters and constructs
* a list of QueryVectors based on the included values, or returns null if this section is not populated. The
* base function returns either the List of vectors or null, while the interior function builds the vectors
@ -158,11 +151,10 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
qvb.setTopK((Integer) vector.get("top_k"));
}
if (vector.containsKey("filter")) {
String[] rawVals = ((String)vector.get("filter")).split(" ");
qvb.setFilter(Struct.newBuilder().putFields(rawVals[0],
Value.newBuilder().setStructValue(Struct.newBuilder().putFields(rawVals[1],
Value.newBuilder().setNumberValue(Integer.parseInt(rawVals[2])).build()))
.build()).build());
LongFunction<Struct> builtFilter = buildFilterStruct(l2 -> {
return (Map) vector.get("filter");
});
qvb.setFilter(builtFilter.apply(l));
}
if (vector.containsKey("sparse_values")) {
Map<String,String> sparse_values = (Map<String, String>) vector.get("sparse_values");

View File

@ -46,6 +46,9 @@ public class PineconeQueryOp extends PineconeOp {
public void run() {
QueryResponse response = connection.getBlockingStub().query(request);
if (logger.isDebugEnabled()) {
for (ScoredVector scored : response.getMatchesList()) {
logger.debug(scored.getId() + ": " + scored.getScore());
}
for (SingleQueryResults results : response.getResultsList()) {
for (ScoredVector scored : results.getMatchesList()) {
logger.debug(scored.getId() + ": " + scored.getScore());

View File

@ -8,5 +8,9 @@ blocks:
op1:
delete: "test-index"
namespace: "example_namespace"
ids: "item_0,item_6"
#ids: "item_0,item_6"
deleteall: false
filter:
filterfield: "category"
operator: "$eq"
comparator: "sports"

View File

@ -12,3 +12,7 @@ blocks:
topk: 10
include_values: true
include_metadata: true
filter:
filterfield: "category"
operator: "$eq"
comparator: "movies"

View File

@ -0,0 +1,10 @@
scenarios:
default:
fetch: run driver=pinecone tags==blocks:describeindexstats cycles=1
blocks:
describeindexstats:
ops:
op1:
describeindexstats: "test-index"
#filter: ""

View File

@ -43,7 +43,10 @@ ops:
# The number of results to return for each query.
top_k: int_query_topk
# You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/
filter: <field operator compval>
filter:
filterfield: metadata_field
operator: [$lt, $eq, $gt, ...]
comparator: value
# Indicates whether vector values are included in the response.
include_values: boolean
# Indicates whether metadata is included in the response as well as the ids.
@ -53,7 +56,10 @@ ops:
values: csv_separated_floats
top_k: int_val
namespace: string_val
filter: <field operator compval>
filter:
filterfield: metadata_field
operator: [$lt, $eq, $gt, ...]
comparator: value
sparse_values:
indices: list_of_ints
values: list_of_floats
@ -61,7 +67,10 @@ ops:
values: csv_separated_floats
top_k: int_val
namespace: string_val
filter: <field operator compval>
filter:
filterfield: metadata_field
operator: [$lt, $eq, $gt, ...]
comparator: value
sparse_values:
indices: list_of_ints
values: list_of_floats
@ -76,13 +85,19 @@ ops:
namespace: delete_namespace
ids: csv_list_of_vectors_to_delete
deleteall: [true,false]
filter: <field operator compval>
filter:
filterfield: metadata_field
operator: [$lt, $eq, $gt, ...]
comparator: value
# A describe index stats op. Specify metadata filters to narrow the range of indices described.
describe-index-stats-example:
type: describe-index-stats
index: describe_index
filter: <field operator compval>
filter:
filterfield: metadata_field
operator: [$lt, $eq, $gt, ...]
comparator: value
# A pinecone fetch op
fetch-example:

View File

@ -64,31 +64,40 @@ public class PineconeOpMapperTest {
@Test
public void testQueryOpDispenserSimple() {
ParsedOp pop = parsedOpFor("""
ops:
op1:
type: "query"
index: "test-index"
vector: "1.0,2.0,3.0"
ops:
op1:
type: "query"
index: "test-index"
vector: "1.0,2.0,3.0"
namespace: "test-namespace"
top_k: 10
filter:
filterfield: "field"
operator: "$gt"
comparator: 2.0
include_values: true
include_metadata: true
query_vectors:
- id: 1
values: "1.0,2.0,3.0"
top_k: 8
namespace: "test-namespace"
top_k: 10
filter: "value $lt 2"
include_values: true
include_metadata: true
query_vectors:
- id: 1
values: "1.0,2.0,3.0"
top_k: 8
namespace: "test-namespace"
filter: "value $lt 2"
sparse_values:
indices: "1,2,3"
values: "1.0,2.0,3.0"
- id: 2
values: "4.0,5.0,6.0"
top_k: 11
namespace: "test-namespace"
filter: "value $gt 10"
""");
filter:
filterfield: "field"
operator: "$lt"
comparator: 9.0
sparse_values:
indices: "1,2,3"
values: "1.0,2.0,3.0"
- id: 2
values: "4.0,5.0,6.0"
top_k: 11
namespace: "test-namespace"
filter:
filterfield: "field"
operator: "$eq"
comparator: "val"
""");
OpDispenser<? extends PineconeOp> dispenser = mapper.apply(pop);
assert(dispenser instanceof PineconeQueryOpDispenser);
PineconeOp op = dispenser.apply(0);
@ -105,7 +114,10 @@ public class PineconeOpMapperTest {
ids: "1.0,2.0,3.0"
namespace: "test-namespace"
deleteall: true
filter: "value $gt 10"
filter:
filterfield: "key"
operator: "$eq"
comparator: "val"
""");
OpDispenser<? extends PineconeOp> dispenser = mapper.apply(pop);
assert(dispenser instanceof PineconeDeleteOpDispenser);
@ -120,7 +132,13 @@ public class PineconeOpMapperTest {
op1:
type: "describeindexstats"
index: "test-index"
filter: "value $gt 10"
filter:
filterfield: "color"
operator: "$eq"
comparator:
- "green"
- "yellow"
- "red"
""");
OpDispenser<? extends PineconeOp> dispenser = mapper.apply(pop);
assert(dispenser instanceof PineconeDescribeIndexStatsOpDispenser);