added functionality to handle vector values in format of text or list of text,float,double

This commit is contained in:
Mark Wolters
2023-06-02 17:31:34 +00:00
parent 5551c1fb13
commit 9023afac39
10 changed files with 94 additions and 80 deletions

View File

@@ -65,7 +65,7 @@ public class PineconeSpace {
}
/**
* Connections are index-specific so we need to allow for multiple connection management across indices.
* Connections are index-specific, so we need to allow for multiple connection management across indices.
* However, note that a single connection object is thread safe and can be used by multiple clients.
*
* @param index The database index for which a connection is being requested

View File

@@ -57,7 +57,7 @@ public class PineconeDescribeIndexStatsOpDispenser extends PineconeOpDispenser {
* The pattern used here is to accommodate the way Request types are constructed for Pinecone.
* Requests use a Builder pattern, so at time of instantiation the methods should be chained together.
* For each method in the chain a function is created here and added to the chain of functions
* called at time of instantiation. Additionally some of the arguments to the builder methods require
* called at time of instantiation. Additionally, some of the arguments to the builder methods require
* creation through their own builder process. In these cases the pattern adopted includes multiple layers of
* functions in order to build all objects in the correct manner and ordering.
*/

View File

@@ -76,15 +76,8 @@ public abstract class PineconeOpDispenser extends BaseOpDispenser<PineconeOp, Pi
return listValueBuilder.build();
}
protected LongFunction<ArrayList<Float>> extractFloatVals(LongFunction<String> af) {
return l -> {
String[] vals = af.apply(l).split(",");
ArrayList<Float> fVals = new ArrayList<>();
for (String val : vals) {
fVals.add(Float.valueOf(val));
}
return fVals;
};
protected LongFunction<List<Float>> extractFloatVals(LongFunction<Object> af) {
return l -> this.getVectorValues(af.apply(l));
}
protected Map<String, Value> generateMetadataMap(Map<String, Object> metadata_values_map) {
@@ -102,4 +95,41 @@ public abstract class PineconeOpDispenser extends BaseOpDispenser<PineconeOp, Pi
return metadata_map;
}
protected List<Float> getVectorValues(Object rawVectorValues) {
List<Float> floatValues;
if (rawVectorValues instanceof String) {
floatValues = new ArrayList<>();
String[] rawValues = (((String) rawVectorValues).split(","));
for (String val : rawValues) {
floatValues.add(Float.valueOf(val));
}
} else if (rawVectorValues instanceof List) {
floatValues = switch (((List<?>) rawVectorValues).get(0).getClass().getSimpleName()) {
case "Float" -> (List<Float>) rawVectorValues;
case "Double" -> ((List<Double>) rawVectorValues).stream().map(Double::floatValue).toList();
case "String" -> ((List<String>) rawVectorValues).stream().map(Float::parseFloat).toList();
default -> throw new RuntimeException("Invalid type specified for values");
};
} else {
throw new RuntimeException("Invalid type specified for values");
}
return floatValues;
}
protected List<Integer> getIndexValues(Object rawIndexValues) {
List<Integer> intValues;
if (rawIndexValues instanceof String) {
intValues = new ArrayList<>();
String[] rawValues = (((String) rawIndexValues).split(","));
for (String val : rawValues) {
intValues.add(Integer.valueOf(val));
}
} else if (rawIndexValues instanceof List) {
intValues = (List<Integer>) rawIndexValues;
}else {
throw new RuntimeException("Invalid type specified for Index values");
}
return intValues;
}
}

View File

@@ -98,12 +98,12 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
rFunc = l -> finalFunc.apply(l).setIncludeValues(af.apply(l));
}
Optional<LongFunction<String>> vFunc = op.getAsOptionalFunction("vector", String.class);
Optional<LongFunction<Object>> vFunc = op.getAsOptionalFunction("vector", Object.class);
if (vFunc.isPresent()) {
LongFunction<QueryRequest.Builder> finalFunc = rFunc;
LongFunction<String> af = vFunc.get();
LongFunction<Object> af = vFunc.get();
LongFunction<ArrayList<Float>> alf = extractFloatVals(af);
LongFunction<List<Float>> alf = extractFloatVals(af);
rFunc = l -> finalFunc.apply(l).addAllVector(alf.apply(l));
}
@@ -135,12 +135,7 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
List<Map<String, Object>> vectors = listLongFunction.apply(l);
for (Map<String, Object> vector : vectors) {
QueryVector.Builder qvb = QueryVector.newBuilder();
String[] rawValues = ((String) vector.get("values")).split(",");
ArrayList<Float> floatValues = new ArrayList<>();
for (String val : rawValues) {
floatValues.add(Float.valueOf(val));
}
qvb.addAllValues(floatValues);
qvb.addAllValues(getVectorValues(vector.get("values")));
qvb.setNamespace((String) vector.get("namespace"));
if (vector.containsKey("top_k")) {
qvb.setTopK((Integer) vector.get("top_k"));
@@ -151,19 +146,9 @@ public class PineconeQueryOpDispenser extends PineconeOpDispenser {
}
if (vector.containsKey("sparse_values")) {
Map<String,String> sparse_values = (Map<String, String>) vector.get("sparse_values");
rawValues = sparse_values.get("values").split(",");
floatValues = new ArrayList<>();
for (String val : rawValues) {
floatValues.add(Float.valueOf(val));
}
rawValues = sparse_values.get("indices").split(",");
List<Integer> intValues = new ArrayList<>();
for (String val : rawValues) {
intValues.add(Integer.valueOf(val));
}
qvb.setSparseValues(SparseValues.newBuilder()
.addAllValues(floatValues)
.addAllIndices(intValues)
.addAllValues(getVectorValues(sparse_values.get("values")))
.addAllIndices(getIndexValues(sparse_values.get("indices")))
.build());
}
returnVectors.add(qvb.build());

View File

@@ -17,7 +17,6 @@
package io.nosqlbench.adapter.pinecone.opdispensers;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
import io.nosqlbench.adapter.pinecone.PineconeSpace;
import io.nosqlbench.adapter.pinecone.ops.PineconeOp;
@@ -28,8 +27,9 @@ import io.pinecone.proto.UpdateRequest;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
public class PineconeUpdateOpDispenser extends PineconeOpDispenser {
@@ -69,19 +69,9 @@ public class PineconeUpdateOpDispenser extends PineconeOpDispenser {
Optional<LongFunction<Map>> mFunc = op.getAsOptionalFunction("sparse_values", Map.class);
return mFunc.<LongFunction<SparseValues>>map(mapLongFunction -> l -> {
Map<String, String> sparse_values_map = mapLongFunction.apply(l);
String[] rawValues = (sparse_values_map.get("values")).split(",");
ArrayList floatValues = new ArrayList<>();
for (String val : rawValues) {
floatValues.add(Float.valueOf(val));
}
rawValues = sparse_values_map.get("indices").split(",");
List<Integer> intValues = new ArrayList<>();
for (String val : rawValues) {
intValues.add(Integer.valueOf(val));
}
return SparseValues.newBuilder()
.addAllValues(floatValues)
.addAllIndices(intValues)
.addAllValues(getVectorValues(sparse_values_map.get("values")))
.addAllIndices(getIndexValues(sparse_values_map.get("indices")))
.build();
}).orElse(null);
}
@@ -134,11 +124,11 @@ public class PineconeUpdateOpDispenser extends PineconeOpDispenser {
rFunc = l -> finalFunc.apply(l).setId(af.apply(l));
}
Optional<LongFunction<String>> vFunc = op.getAsOptionalFunction("values", String.class);
Optional<LongFunction<Object>> vFunc = op.getAsOptionalFunction("values", Object.class);
if (vFunc.isPresent()) {
LongFunction<UpdateRequest.Builder> finalFunc = rFunc;
LongFunction<String> af = vFunc.get();
LongFunction<ArrayList<Float>> alf = extractFloatVals(af);
LongFunction<Object> af = vFunc.get();
LongFunction<List<Float>> alf = extractFloatVals(af);
rFunc = l -> finalFunc.apply(l).addAllValues(alf.apply(l));
}

View File

@@ -16,9 +16,7 @@
package io.nosqlbench.adapter.pinecone.opdispensers;
import com.google.protobuf.ListValue;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import io.nosqlbench.adapter.pinecone.PineconeDriverAdapter;
import io.nosqlbench.adapter.pinecone.PineconeSpace;
import io.nosqlbench.adapter.pinecone.ops.PineconeOp;
@@ -31,7 +29,6 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.function.LongFunction;
public class PineconeUpsertOpDispenser extends PineconeOpDispenser {
@@ -75,27 +72,12 @@ public class PineconeUpsertOpDispenser extends PineconeOpDispenser {
Vector.Builder vb = Vector.newBuilder();
// No need to check for key, it is invalid if id is not there, let it throw an exception
vb.setId(vector.get("id").toString());
String[] rawValues = ((String) vector.get("values")).split(",");
ArrayList<Float> floatValues = new ArrayList<>();
for (String val : rawValues) {
floatValues.add(Float.valueOf(val));
}
vb.addAllValues(floatValues);
vb.addAllValues(getVectorValues(vector.get("values")));
if (vector.containsKey("sparse_values")) {
Map<String,String> sparse_values = (Map<String, String>) vector.get("sparse_values");
rawValues = sparse_values.get("values").split(",");
floatValues = new ArrayList<>();
for (String val : rawValues) {
floatValues.add(Float.valueOf(val));
}
rawValues = sparse_values.get("indices").split(",");
List<Integer> intValues = new ArrayList<>();
for (String val : rawValues) {
intValues.add(Integer.valueOf(val));
}
vb.setSparseValues(SparseValues.newBuilder()
.addAllValues(floatValues)
.addAllIndices(intValues)
.addAllValues(getVectorValues(sparse_values.get("values")))
.addAllIndices(getIndexValues(sparse_values.get("indices")))
.build());
}
if (vector.containsKey("metadata")) {

View File

@@ -18,7 +18,17 @@ blocks:
operator: "$eq"
comparator: "movies"
- id: "2"
values: "0.8602578079921012,0.12103044768221516,0.7737329191858439,0.4521093269320254,0.29351661477669416,0.4261807015226558,0.14131665592103335,0.882370813029422,0.4412833140430886,0.9916525700115515"
values:
- 0.8602578079921012
- 0.12103044768221516
- 0.7737329191858439
- 0.4521093269320254
- 0.29351661477669416
- 0.4261807015226558
- 0.14131665592103335
- 0.882370813029422
- 0.4412833140430886
- 0.9916525700115515
namespace: "example_namespace"
top_k: 3
filter:

View File

@@ -1,10 +1,11 @@
scenarios:
default:
upsert: run driver=pinecone tags==blocks:upsert cycles=1000 cycles===TEMPLATE(rampup-cycles,10000000) threads=auto
upsert: run driver=pinecone tags==block:upsert cycles===TEMPLATE(upsert-cycles,10000000) threads=auto
query: run driver=pinecone tags==block:query cycles===TEMPLATE(query-cycles,10000000) threads=auto
bindings:
id: Mod(<<keycount:1000000000>>); ToString() -> String
vector_value: ListSizedHashed(<<dimensions:5>>,HashRange(0.0f,100.0f) -> float); ToString() -> String
vector_value: ListSizedHashed(<<dimensions:5>>,HashRange(0.0f,100.0f) -> float)
city: Cities()
blocks:
@@ -14,7 +15,7 @@ blocks:
upsert: "test-index"
namespace: "example_namespace"
upsert_vectors:
- id: {id}
values: {vector_value}
- id: "{id}"
values: "{vector_value}"
metadata:
city: {city}
city: "{city}"

View File

@@ -10,7 +10,17 @@ blocks:
namespace: "example_namespace"
upsert_vectors:
- id: "item_0"
values: "0.8238042071878214,0.6579519242642494,0.2772098082162267,0.11434681368630942,0.7496610470760962,0.08914691629812001,0.13249049306247204,0.8875869003282576,0.21615924382746318,0.8454796243176312"
values:
- 0.8238042071878214
- 0.6579519242642494
- 0.2772098082162267
- 0.11434681368630942
- 0.7496610470760962
- 0.08914691629812001
- 0.13249049306247204
- 0.8875869003282576
- 0.21615924382746318
- 0.8454796243176312
metadata:
category: "sports"
colors: "blue,red,green"

View File

@@ -198,7 +198,10 @@ public class PineconeOpMapperTest {
index: "test-index"
upsert_vectors:
- id: 1
values: "1.0,2.0,3.0"
values:
- 1.0
- 2.0
- 3.0
sparse_values:
indices: "1,2,3"
values: "4.0,5.0,6.0"
@@ -206,7 +209,10 @@ public class PineconeOpMapperTest {
key1: "val1"
key2: 2
- id: 2
values: "7.0,8.0,9.0"
values:
- 7.0
- 8.0
- 9.0
sparse_values:
indices: "4,5,6"
values: "1.1,2.2,3.3"