mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
added functionality to handle vector values in format of text or list of text,float,double
This commit is contained in:
@@ -65,7 +65,7 @@ public class PineconeSpace {
|
||||
}
|
||||
|
||||
/**
|
||||
* Connections are index-specific so we need to allow for multiple connection management across indices.
|
||||
* Connections are index-specific, so we need to allow for multiple connection management across indices.
|
||||
* However, note that a single connection object is thread safe and can be used by multiple clients.
|
||||
*
|
||||
* @param index The database index for which a connection is being requested
|
||||
|
||||
@@ -57,7 +57,7 @@ public class PineconeDescribeIndexStatsOpDispenser extends PineconeOpDispenser {
|
||||
* The pattern used here is to accommodate the way Request types are constructed for Pinecone.
|
||||
* Requests use a Builder pattern, so at time of instantiation the methods should be chained together.
|
||||
* For each method in the chain a function is created here and added to the chain of functions
|
||||
* called at time of instantiation. Additionally some of the arguments to the builder methods require
|
||||
* called at time of instantiation. Additionally, some of the arguments to the builder methods require
|
||||
* creation through their own builder process. In these cases the pattern adopted includes multiple layers of
|
||||
* functions in order to build all objects in the correct manner and ordering.
|
||||
*/
|
||||
|
||||
@@ -76,15 +76,8 @@ public abstract class PineconeOpDispenser extends BaseOpDispenser<PineconeOp, Pi
|
||||
return listValueBuilder.build();
|
||||
}
|
||||
|
||||
protected LongFunction<ArrayList<Float>> extractFloatVals(LongFunction<String> af) {
|
||||
return l -> {
|
||||
String[] vals = af.apply(l).split(",");
|
||||
ArrayList<Float> fVals = new ArrayList<>();
|
||||
for (String val : vals) {
|
||||
fVals.add(Float.valueOf(val));
|
||||
}
|
||||
return fVals;
|
||||
};
|
||||
protected LongFunction<List<Float>> extractFloatVals(LongFunction<Object> af) {
|
||||
return l -> this.getVectorValues(af.apply(l));
|
||||
}
|
||||
|
||||
protected Map<String, Value> generateMetadataMap(Map<String, Object> metadata_values_map) {
|
||||
@@ -102,4 +95,41 @@ public abstract class PineconeOpDispenser extends BaseOpDispenser<PineconeOp, Pi
|
||||
return metadata_map;
|
||||
}
|
||||
|
||||
protected List<Float> getVectorValues(Object rawVectorValues) {
|
||||
List<Float> floatValues;
|
||||
if (rawVectorValues instanceof String) {
|
||||
floatValues = new ArrayList<>();
|
||||
String[] rawValues = (((String) rawVectorValues).split(","));
|
||||
for (String val : rawValues) {
|
||||
floatValues.add(Float.valueOf(val));
|
||||
}
|
||||
} else if (rawVectorValues instanceof List) {
|
||||
floatValues = switch (((List<?>) rawVectorValues).get(0).getClass().getSimpleName()) {
|
||||
case "Float" -> (List<Float>) rawVectorValues;
|
||||
case "Double" -> ((List<Double>) rawVectorValues).stream().map(Double::floatValue).toList();
|
||||
case "String" -> ((List<String>) rawVectorValues).stream().map(Float::parseFloat).toList();
|
||||
default -> throw new RuntimeException("Invalid type specified for values");
|
||||
};
|
||||
} else {
|
||||
throw new RuntimeException("Invalid type specified for values");
|
||||
}
|
||||
return floatValues;
|
||||
}
|
||||
|
||||
protected List<Integer> getIndexValues(Object rawIndexValues) {
|
||||
List<Integer> intValues;
|
||||
if (rawIndexValues instanceof String) {
|
||||
intValues = new ArrayList<>();
|
||||
String[] rawValues = (((String) rawIndexValues).split(","));
|
||||
for (String val : rawValues) {
|
||||
intValues.add(Integer.valueOf(val));
|
||||
}
|
||||
} else if (rawIndexValues instanceof List) {
|
||||
intValues = (List<Integer>) rawIndexValues;
|
||||
}else {
|
||||
throw new RuntimeException("Invalid type specified for Index values");
|
||||
}
|
||||
return intValues;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -98,12 +98,12 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
|
||||
rFunc = l -> finalFunc.apply(l).setIncludeValues(af.apply(l));
|
||||
}
|
||||
|
||||
Optional<LongFunction<String>> vFunc = op.getAsOptionalFunction("vector", String.class);
|
||||
Optional<LongFunction<Object>> vFunc = op.getAsOptionalFunction("vector", Object.class);
|
||||
if (vFunc.isPresent()) {
|
||||
LongFunction<QueryRequest.Builder> finalFunc = rFunc;
|
||||
LongFunction<String> af = vFunc.get();
|
||||
LongFunction<Object> af = vFunc.get();
|
||||
|
||||
LongFunction<ArrayList<Float>> alf = extractFloatVals(af);
|
||||
LongFunction<List<Float>> alf = extractFloatVals(af);
|
||||
rFunc = l -> finalFunc.apply(l).addAllVector(alf.apply(l));
|
||||
}
|
||||
|
||||
@@ -135,12 +135,7 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
|
||||
List<Map<String, Object>> vectors = listLongFunction.apply(l);
|
||||
for (Map<String, Object> vector : vectors) {
|
||||
QueryVector.Builder qvb = QueryVector.newBuilder();
|
||||
String[] rawValues = ((String) vector.get("values")).split(",");
|
||||
ArrayList<Float> floatValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
floatValues.add(Float.valueOf(val));
|
||||
}
|
||||
qvb.addAllValues(floatValues);
|
||||
qvb.addAllValues(getVectorValues(vector.get("values")));
|
||||
qvb.setNamespace((String) vector.get("namespace"));
|
||||
if (vector.containsKey("top_k")) {
|
||||
qvb.setTopK((Integer) vector.get("top_k"));
|
||||
@@ -151,19 +146,9 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
|
||||
}
|
||||
if (vector.containsKey("sparse_values")) {
|
||||
Map<String,String> sparse_values = (Map<String, String>) vector.get("sparse_values");
|
||||
rawValues = sparse_values.get("values").split(",");
|
||||
floatValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
floatValues.add(Float.valueOf(val));
|
||||
}
|
||||
rawValues = sparse_values.get("indices").split(",");
|
||||
List<Integer> intValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
intValues.add(Integer.valueOf(val));
|
||||
}
|
||||
qvb.setSparseValues(SparseValues.newBuilder()
|
||||
.addAllValues(floatValues)
|
||||
.addAllIndices(intValues)
|
||||
.addAllValues(getVectorValues(sparse_values.get("values")))
|
||||
.addAllIndices(getIndexValues(sparse_values.get("indices")))
|
||||
.build());
|
||||
}
|
||||
returnVectors.add(qvb.build());
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
package io.nosqlbench.adapter.pinecone.opdispensers;
|
||||
|
||||
import com.google.protobuf.Struct;
|
||||
import com.google.protobuf.Value;
|
||||
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
|
||||
import io.nosqlbench.adapter.pinecone.PineconeSpace;
|
||||
import io.nosqlbench.adapter.pinecone.ops.PineconeOp;
|
||||
@@ -28,8 +27,9 @@ import io.pinecone.proto.UpdateRequest;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.function.LongFunction;
|
||||
|
||||
public class PineconeUpdateOpDispenser extends PineconeOpDispenser {
|
||||
@@ -69,19 +69,9 @@ public class PineconeUpdateOpDispenser extends PineconeOpDispenser {
|
||||
Optional<LongFunction<Map>> mFunc = op.getAsOptionalFunction("sparse_values", Map.class);
|
||||
return mFunc.<LongFunction<SparseValues>>map(mapLongFunction -> l -> {
|
||||
Map<String, String> sparse_values_map = mapLongFunction.apply(l);
|
||||
String[] rawValues = (sparse_values_map.get("values")).split(",");
|
||||
ArrayList floatValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
floatValues.add(Float.valueOf(val));
|
||||
}
|
||||
rawValues = sparse_values_map.get("indices").split(",");
|
||||
List<Integer> intValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
intValues.add(Integer.valueOf(val));
|
||||
}
|
||||
return SparseValues.newBuilder()
|
||||
.addAllValues(floatValues)
|
||||
.addAllIndices(intValues)
|
||||
.addAllValues(getVectorValues(sparse_values_map.get("values")))
|
||||
.addAllIndices(getIndexValues(sparse_values_map.get("indices")))
|
||||
.build();
|
||||
}).orElse(null);
|
||||
}
|
||||
@@ -134,11 +124,11 @@ public class PineconeUpdateOpDispenser extends PineconeOpDispenser {
|
||||
rFunc = l -> finalFunc.apply(l).setId(af.apply(l));
|
||||
}
|
||||
|
||||
Optional<LongFunction<String>> vFunc = op.getAsOptionalFunction("values", String.class);
|
||||
Optional<LongFunction<Object>> vFunc = op.getAsOptionalFunction("values", Object.class);
|
||||
if (vFunc.isPresent()) {
|
||||
LongFunction<UpdateRequest.Builder> finalFunc = rFunc;
|
||||
LongFunction<String> af = vFunc.get();
|
||||
LongFunction<ArrayList<Float>> alf = extractFloatVals(af);
|
||||
LongFunction<Object> af = vFunc.get();
|
||||
LongFunction<List<Float>> alf = extractFloatVals(af);
|
||||
rFunc = l -> finalFunc.apply(l).addAllValues(alf.apply(l));
|
||||
}
|
||||
|
||||
|
||||
@@ -16,9 +16,7 @@
|
||||
|
||||
package io.nosqlbench.adapter.pinecone.opdispensers;
|
||||
|
||||
import com.google.protobuf.ListValue;
|
||||
import com.google.protobuf.Struct;
|
||||
import com.google.protobuf.Value;
|
||||
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
|
||||
import io.nosqlbench.adapter.pinecone.PineconeSpace;
|
||||
import io.nosqlbench.adapter.pinecone.ops.PineconeOp;
|
||||
@@ -31,7 +29,6 @@ import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.LongFunction;
|
||||
|
||||
public class PineconeUpsertOpDispenser extends PineconeOpDispenser {
|
||||
@@ -75,27 +72,12 @@ public class PineconeUpsertOpDispenser extends PineconeOpDispenser {
|
||||
Vector.Builder vb = Vector.newBuilder();
|
||||
// No need to check for key, it is invalid if id is not there, let it throw an exception
|
||||
vb.setId(vector.get("id").toString());
|
||||
String[] rawValues = ((String) vector.get("values")).split(",");
|
||||
ArrayList<Float> floatValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
floatValues.add(Float.valueOf(val));
|
||||
}
|
||||
vb.addAllValues(floatValues);
|
||||
vb.addAllValues(getVectorValues(vector.get("values")));
|
||||
if (vector.containsKey("sparse_values")) {
|
||||
Map<String,String> sparse_values = (Map<String, String>) vector.get("sparse_values");
|
||||
rawValues = sparse_values.get("values").split(",");
|
||||
floatValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
floatValues.add(Float.valueOf(val));
|
||||
}
|
||||
rawValues = sparse_values.get("indices").split(",");
|
||||
List<Integer> intValues = new ArrayList<>();
|
||||
for (String val : rawValues) {
|
||||
intValues.add(Integer.valueOf(val));
|
||||
}
|
||||
vb.setSparseValues(SparseValues.newBuilder()
|
||||
.addAllValues(floatValues)
|
||||
.addAllIndices(intValues)
|
||||
.addAllValues(getVectorValues(sparse_values.get("values")))
|
||||
.addAllIndices(getIndexValues(sparse_values.get("indices")))
|
||||
.build());
|
||||
}
|
||||
if (vector.containsKey("metadata")) {
|
||||
|
||||
@@ -18,7 +18,17 @@ blocks:
|
||||
operator: "$eq"
|
||||
comparator: "movies"
|
||||
- id: "2"
|
||||
values: "0.8602578079921012,0.12103044768221516,0.7737329191858439,0.4521093269320254,0.29351661477669416,0.4261807015226558,0.14131665592103335,0.882370813029422,0.4412833140430886,0.9916525700115515"
|
||||
values:
|
||||
- 0.8602578079921012
|
||||
- 0.12103044768221516
|
||||
- 0.7737329191858439
|
||||
- 0.4521093269320254
|
||||
- 0.29351661477669416
|
||||
- 0.4261807015226558
|
||||
- 0.14131665592103335
|
||||
- 0.882370813029422
|
||||
- 0.4412833140430886
|
||||
- 0.9916525700115515
|
||||
namespace: "example_namespace"
|
||||
top_k: 3
|
||||
filter:
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
scenarios:
|
||||
default:
|
||||
upsert: run driver=pinecone tags==blocks:upsert cycles=1000 cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
|
||||
upsert: run driver=pinecone tags==block:upsert cycles===TEMPLATE(upsert-cycles,10000000) threads=auto
|
||||
query: run driver=pinecone tags==block:query cycles===TEMPLATE(query-cycles,10000000) threads=auto
|
||||
|
||||
bindings:
|
||||
id: Mod(<<keycount:1000000000>>); ToString() -> String
|
||||
vector_value: ListSizedHashed(<<dimensions:5>>,HashRange(0.0f,100.0f) -> float); ToString() -> String
|
||||
vector_value: ListSizedHashed(<<dimensions:5>>,HashRange(0.0f,100.0f) -> float)
|
||||
city: Cities()
|
||||
|
||||
blocks:
|
||||
@@ -14,7 +15,7 @@ blocks:
|
||||
upsert: "test-index"
|
||||
namespace: "example_namespace"
|
||||
upsert_vectors:
|
||||
- id: {id}
|
||||
values: {vector_value}
|
||||
- id: "{id}"
|
||||
values: "{vector_value}"
|
||||
metadata:
|
||||
city: {city}
|
||||
city: "{city}"
|
||||
|
||||
@@ -10,7 +10,17 @@ blocks:
|
||||
namespace: "example_namespace"
|
||||
upsert_vectors:
|
||||
- id: "item_0"
|
||||
values: "0.8238042071878214,0.6579519242642494,0.2772098082162267,0.11434681368630942,0.7496610470760962,0.08914691629812001,0.13249049306247204,0.8875869003282576,0.21615924382746318,0.8454796243176312"
|
||||
values:
|
||||
- 0.8238042071878214
|
||||
- 0.6579519242642494
|
||||
- 0.2772098082162267
|
||||
- 0.11434681368630942
|
||||
- 0.7496610470760962
|
||||
- 0.08914691629812001
|
||||
- 0.13249049306247204
|
||||
- 0.8875869003282576
|
||||
- 0.21615924382746318
|
||||
- 0.8454796243176312
|
||||
metadata:
|
||||
category: "sports"
|
||||
colors: "blue,red,green"
|
||||
|
||||
@@ -198,7 +198,10 @@ public class PineconeOpMapperTest {
|
||||
index: "test-index"
|
||||
upsert_vectors:
|
||||
- id: 1
|
||||
values: "1.0,2.0,3.0"
|
||||
values:
|
||||
- 1.0
|
||||
- 2.0
|
||||
- 3.0
|
||||
sparse_values:
|
||||
indices: "1,2,3"
|
||||
values: "4.0,5.0,6.0"
|
||||
@@ -206,7 +209,10 @@ public class PineconeOpMapperTest {
|
||||
key1: "val1"
|
||||
key2: 2
|
||||
- id: 2
|
||||
values: "7.0,8.0,9.0"
|
||||
values:
|
||||
- 7.0
|
||||
- 8.0
|
||||
- 9.0
|
||||
sparse_values:
|
||||
indices: "4,5,6"
|
||||
values: "1.1,2.2,3.3"
|
||||
|
||||
Reference in New Issue
Block a user