adding vectormath for pinecone adapter

This commit is contained in:
Mark Wolters 2023-08-29 15:01:44 -04:00
parent f15a410ea9
commit a0c7687cf9
4 changed files with 205 additions and 26 deletions

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.engine.extensions.vectormath;
import java.util.Arrays;
public class PineconeIntersections {
public static long[] find(long[] reference, long[] sample) {
long[] result = new long[reference.length];
int a_index = 0, b_index = 0, acc_index = -1;
long a_element, b_element;
while (a_index < reference.length && b_index < sample.length) {
a_element = reference[a_index];
b_element = sample[b_index];
if (a_element == b_element) {
result = resize(result);
result[++acc_index] = a_element;
a_index++;
b_index++;
} else if (b_element < a_element) {
b_index++;
} else {
a_index++;
}
}
return Arrays.copyOfRange(result,0,acc_index+1);
}
public static int[] find(int[] reference, int[] sample) {
int[] result = new int[reference.length];
int a_index = 0, b_index = 0, acc_index = -1;
int a_element, b_element;
while (a_index < reference.length && b_index < sample.length) {
a_element = reference[a_index];
b_element = sample[b_index];
if (a_element == b_element) {
result = resize(result);
result[++acc_index] = a_element;
a_index++;
b_index++;
} else if (b_element < a_element) {
b_index++;
} else {
a_index++;
}
}
return Arrays.copyOfRange(result,0,acc_index+1);
}
public static int[] resize(int[] arr) {
int len = arr.length;
int[] copy = new int[len + 1];
for (int i = 0; i < len; i++) {
copy[i] = arr[i];
}
return copy;
}
public static long[] resize(long[] arr) {
int len = arr.length;
long[] copy = new long[len + 1];
for (int i = 0; i < len; i++) {
copy[i] = arr[i];
}
return copy;
}
}

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.engine.extensions.vectormath;
import io.pinecone.proto.QueryResponse;
import io.pinecone.proto.ScoredVector;
import java.util.Arrays;
public class PineconeVectorMath {
public static long[] stringArrayAsALongArray(String[] strings) {
long[] longs = new long[strings.length];
for (int i = 0; i < longs.length; i++) {
longs[i]=Long.parseLong(strings[i]);
}
return longs;
}
public static int[] stringArrayAsIntArray(String[] strings) {
int[] ints = new int[strings.length];
for (int i = 0; i < ints.length; i++) {
ints[i]=Integer.parseInt(strings[i]);
}
return ints;
}
public static String[] idsToStringArray(QueryResponse response) {
return response.getMatchesList().stream().map(ScoredVector::getId).toArray(String[]::new);
}
public static int[] idsToIntArray(QueryResponse response) {
return response.getMatchesList().stream().mapToInt(r -> Integer.parseInt(r.getId())).toArray();
}
public static double computeRecall(long[] referenceIndexes, long[] sampleIndexes) {
Arrays.sort(referenceIndexes);
Arrays.sort(sampleIndexes);
long[] intersection = PineconeIntersections.find(referenceIndexes,sampleIndexes);
return (double)intersection.length/(double)referenceIndexes.length;
}
public static double computeRecall(int[] referenceIndexes, int[] sampleIndexes) {
Arrays.sort(referenceIndexes);
Arrays.sort(sampleIndexes);
int[] intersection = PineconeIntersections.find(referenceIndexes,sampleIndexes);
return (double)intersection.length/(double)referenceIndexes.length;
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.engine.extensions.vectormath;
import com.codahale.metrics.MetricRegistry;
import io.nosqlbench.api.config.LabeledScenarioContext;
import io.nosqlbench.api.extensions.ScriptingPluginInfo;
import io.nosqlbench.nb.annotations.Service;
import org.apache.logging.log4j.Logger;
@Service(value = ScriptingPluginInfo.class,selector = "pinecone_vectormath")
public class PineconeVectorMathPluginInfo implements ScriptingPluginInfo<PineconeVectorMath> {
@Override
public String getDescription() {
return "various methods and utilities for working with vector math in a scripted environment";
}
@Override
public PineconeVectorMath getExtensionObject(Logger logger, MetricRegistry metricRegistry, LabeledScenarioContext scriptContext) {
return new PineconeVectorMath();
}
}

View File

@ -1,36 +1,32 @@
scenarios:
default:
mixed: run driver=pinecone cycles=1000 apiKey=2f55b2f0-670f-4c51-9073-4d37142b761a projectName=a850334 environment=us-east-1-aws tags='block:main-.*'
verify: run driver=pinecone cycles=10 threads=10 apiKey=6503c344-5967-421d-b19a-3e7955842253 projectName=f88a480 environment=eu-west4-gcp tags=block:verify
bindings:
id: Mod(<<keycount:1000000000>>); ToString() -> String
vector_value: CircleVectors(100000, "io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.GoldenAngle")
state: StateCodes()
vector: HdfFileToFloatList("glove-25-angular.hdf5", "/test")
validation_set: HdfFileToIntArray("glove-25-angular.hdf5", "/neighbors")
blocks:
# main-write:
# params:
# ratio: 1
# ops:
# op1:
# upsert: "circles"
# namespace: "example_namespace"
# upsert_vectors:
# - id: "{id}"
# values: "{vector_value}"
# metadata:
# state: "{state}"
main-read:
params:
ratio: 1
verify:
ops:
op1:
query: "circles"
vector: "{vector_value}"
ops1:
query: "glove25"
namespace: "example_namespace"
top_k: 1
vector: "{vector}"
top_k: 100
include_values: true
include_metadata: true
#result should be type QueryResponse
include_metadata: false
verifier-imports:
- "io.nosqlbench.api.engine.metrics.ActivityMetrics"
- "io.nosqlbench.engine.extensions.vectormath.Intersections"
verifier-init: |
recallHisto = ActivityMetrics.histogram(_parsed_op,"recall-histo",4);
verifier: |
result.getMatchesList().get(0).getValuesList()=={vector_value}
found_string_ids=pinecone_vectormath.idsToStringArray(result);
found_int_ids=pinecone_vectormath.stringArrayAsIntArray(found_string_ids);
print(found_int_ids)
print({validation_set})
recall = pinecone_vectormath.computeRecall(found_int_ids, {validation_set})
print(recall)
recallHisto.update((long)(recall*1000000.0));
return true;