mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-16 17:34:52 -06:00
adding vectormath for pinecone adapter
This commit is contained in:
parent
f15a410ea9
commit
a0c7687cf9
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.engine.extensions.vectormath;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PineconeIntersections {
|
||||
|
||||
public static long[] find(long[] reference, long[] sample) {
|
||||
long[] result = new long[reference.length];
|
||||
int a_index = 0, b_index = 0, acc_index = -1;
|
||||
long a_element, b_element;
|
||||
while (a_index < reference.length && b_index < sample.length) {
|
||||
a_element = reference[a_index];
|
||||
b_element = sample[b_index];
|
||||
if (a_element == b_element) {
|
||||
result = resize(result);
|
||||
result[++acc_index] = a_element;
|
||||
a_index++;
|
||||
b_index++;
|
||||
} else if (b_element < a_element) {
|
||||
b_index++;
|
||||
} else {
|
||||
a_index++;
|
||||
}
|
||||
}
|
||||
return Arrays.copyOfRange(result,0,acc_index+1);
|
||||
}
|
||||
|
||||
public static int[] find(int[] reference, int[] sample) {
|
||||
int[] result = new int[reference.length];
|
||||
int a_index = 0, b_index = 0, acc_index = -1;
|
||||
int a_element, b_element;
|
||||
while (a_index < reference.length && b_index < sample.length) {
|
||||
a_element = reference[a_index];
|
||||
b_element = sample[b_index];
|
||||
if (a_element == b_element) {
|
||||
result = resize(result);
|
||||
result[++acc_index] = a_element;
|
||||
a_index++;
|
||||
b_index++;
|
||||
} else if (b_element < a_element) {
|
||||
b_index++;
|
||||
} else {
|
||||
a_index++;
|
||||
}
|
||||
}
|
||||
return Arrays.copyOfRange(result,0,acc_index+1);
|
||||
}
|
||||
|
||||
public static int[] resize(int[] arr) {
|
||||
int len = arr.length;
|
||||
int[] copy = new int[len + 1];
|
||||
for (int i = 0; i < len; i++) {
|
||||
copy[i] = arr[i];
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
public static long[] resize(long[] arr) {
|
||||
int len = arr.length;
|
||||
long[] copy = new long[len + 1];
|
||||
for (int i = 0; i < len; i++) {
|
||||
copy[i] = arr[i];
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.engine.extensions.vectormath;
|
||||
|
||||
import io.pinecone.proto.QueryResponse;
|
||||
import io.pinecone.proto.ScoredVector;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PineconeVectorMath {
|
||||
|
||||
public static long[] stringArrayAsALongArray(String[] strings) {
|
||||
long[] longs = new long[strings.length];
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
longs[i]=Long.parseLong(strings[i]);
|
||||
}
|
||||
return longs;
|
||||
}
|
||||
|
||||
public static int[] stringArrayAsIntArray(String[] strings) {
|
||||
int[] ints = new int[strings.length];
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
ints[i]=Integer.parseInt(strings[i]);
|
||||
}
|
||||
return ints;
|
||||
}
|
||||
|
||||
public static String[] idsToStringArray(QueryResponse response) {
|
||||
return response.getMatchesList().stream().map(ScoredVector::getId).toArray(String[]::new);
|
||||
}
|
||||
|
||||
public static int[] idsToIntArray(QueryResponse response) {
|
||||
return response.getMatchesList().stream().mapToInt(r -> Integer.parseInt(r.getId())).toArray();
|
||||
}
|
||||
|
||||
public static double computeRecall(long[] referenceIndexes, long[] sampleIndexes) {
|
||||
Arrays.sort(referenceIndexes);
|
||||
Arrays.sort(sampleIndexes);
|
||||
long[] intersection = PineconeIntersections.find(referenceIndexes,sampleIndexes);
|
||||
return (double)intersection.length/(double)referenceIndexes.length;
|
||||
}
|
||||
|
||||
public static double computeRecall(int[] referenceIndexes, int[] sampleIndexes) {
|
||||
Arrays.sort(referenceIndexes);
|
||||
Arrays.sort(sampleIndexes);
|
||||
int[] intersection = PineconeIntersections.find(referenceIndexes,sampleIndexes);
|
||||
return (double)intersection.length/(double)referenceIndexes.length;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.engine.extensions.vectormath;
|
||||
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import io.nosqlbench.api.config.LabeledScenarioContext;
|
||||
import io.nosqlbench.api.extensions.ScriptingPluginInfo;
|
||||
import io.nosqlbench.nb.annotations.Service;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
@Service(value = ScriptingPluginInfo.class,selector = "pinecone_vectormath")
|
||||
public class PineconeVectorMathPluginInfo implements ScriptingPluginInfo<PineconeVectorMath> {
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "various methods and utilities for working with vector math in a scripted environment";
|
||||
}
|
||||
|
||||
@Override
|
||||
public PineconeVectorMath getExtensionObject(Logger logger, MetricRegistry metricRegistry, LabeledScenarioContext scriptContext) {
|
||||
return new PineconeVectorMath();
|
||||
}
|
||||
}
|
@ -1,36 +1,32 @@
|
||||
scenarios:
|
||||
default:
|
||||
mixed: run driver=pinecone cycles=1000 apiKey=2f55b2f0-670f-4c51-9073-4d37142b761a projectName=a850334 environment=us-east-1-aws tags='block:main-.*'
|
||||
verify: run driver=pinecone cycles=10 threads=10 apiKey=6503c344-5967-421d-b19a-3e7955842253 projectName=f88a480 environment=eu-west4-gcp tags=block:verify
|
||||
|
||||
bindings:
|
||||
id: Mod(<<keycount:1000000000>>); ToString() -> String
|
||||
vector_value: CircleVectors(100000, "io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.GoldenAngle")
|
||||
state: StateCodes()
|
||||
vector: HdfFileToFloatList("glove-25-angular.hdf5", "/test")
|
||||
validation_set: HdfFileToIntArray("glove-25-angular.hdf5", "/neighbors")
|
||||
|
||||
blocks:
|
||||
# main-write:
|
||||
# params:
|
||||
# ratio: 1
|
||||
# ops:
|
||||
# op1:
|
||||
# upsert: "circles"
|
||||
# namespace: "example_namespace"
|
||||
# upsert_vectors:
|
||||
# - id: "{id}"
|
||||
# values: "{vector_value}"
|
||||
# metadata:
|
||||
# state: "{state}"
|
||||
main-read:
|
||||
params:
|
||||
ratio: 1
|
||||
verify:
|
||||
ops:
|
||||
op1:
|
||||
query: "circles"
|
||||
vector: "{vector_value}"
|
||||
ops1:
|
||||
query: "glove25"
|
||||
namespace: "example_namespace"
|
||||
top_k: 1
|
||||
vector: "{vector}"
|
||||
top_k: 100
|
||||
include_values: true
|
||||
include_metadata: true
|
||||
#result should be type QueryResponse
|
||||
include_metadata: false
|
||||
verifier-imports:
|
||||
- "io.nosqlbench.api.engine.metrics.ActivityMetrics"
|
||||
- "io.nosqlbench.engine.extensions.vectormath.Intersections"
|
||||
verifier-init: |
|
||||
recallHisto = ActivityMetrics.histogram(_parsed_op,"recall-histo",4);
|
||||
verifier: |
|
||||
result.getMatchesList().get(0).getValuesList()=={vector_value}
|
||||
found_string_ids=pinecone_vectormath.idsToStringArray(result);
|
||||
found_int_ids=pinecone_vectormath.stringArrayAsIntArray(found_string_ids);
|
||||
print(found_int_ids)
|
||||
print({validation_set})
|
||||
recall = pinecone_vectormath.computeRecall(found_int_ids, {validation_set})
|
||||
print(recall)
|
||||
recallHisto.update((long)(recall*1000000.0));
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user