From 8f4aa5cec5a81db1987f8f0afe7a5c0200f91e7c Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Thu, 8 Feb 2024 19:23:04 -0400 Subject: [PATCH] adding ann_benchmarks.yaml --- .../resources/activities/ann_benchmarks.yaml | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 adapter-aws-opensearch/src/main/resources/activities/ann_benchmarks.yaml diff --git a/adapter-aws-opensearch/src/main/resources/activities/ann_benchmarks.yaml b/adapter-aws-opensearch/src/main/resources/activities/ann_benchmarks.yaml new file mode 100644 index 000000000..bfdcd474a --- /dev/null +++ b/adapter-aws-opensearch/src/main/resources/activities/ann_benchmarks.yaml @@ -0,0 +1,82 @@ +description: | + basic test for vectors in open search + https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html + template vars: + TEMPLATE(indexname,vectors_index) + TEMPLATE(dimensions,25) + +params: + driver: opensearch + instrument: true + +scenarios: + + vectors: + drop: run tags='block:drop' labels='target:opensearch' threads===1 cycles===UNDEF + schema: run tags='block:schema' labels='target:opensearch' threads===1 cycles===UNDEF + rampup: run tags='block:rampup' labels='target:opensearch' threads=TEMPLATE(rampup_threads,10) cycles=TEMPLATE(trainsize) + search: run tags='block:search' labels='target:opensearch' threads=TEMPLATE(search_threads,10) cycles=TEMPLATE(testsize) + # errors=counter,warn,log + +bindings: + id: ToString() + test_floatlist: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/test"); + relevant_indices: HdfFileToIntArray("testdata/TEMPLATE(dataset).hdf5", "/neighbors") + distance_floatlist: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/distance") + train_floatlist: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/train"); + +blocks: + drop: + ops: + drop_index: + delete_index: TEMPLATE(indexname,vectors_index) + + schema: + ops: + create_index: + create_index: TEMPLATE(indexname) + mappings: + m1: v1 + search: + ops: + search: + knn_search: TEMPLATE(indexname,vectors_index) + k: 100 + vector: "{test_floatlist}" + field: value + search_and_verify: + ops: + select_ann_limit_TEMPLATE(k,100): + knn_search: TEMPLATE(indexname,vectors_index) + k: 100 + vector: "{test_floatlist}" + field: value + verifier-init: | + relevancy=new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op) + for (int k in List.of(100)) { + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k)); + } + verifier: | + // driver-specific function + actual_indices=pinecone_utils.responseIdsToIntArray(result) + // driver-agnostic function + relevancy.accept({relevant_indices},actual_indices); + return true; + bulkrampup: + ops: + bulk_index: + bulk_index: + - '{"key":"{id}","value":"{train_floatlist}"' + - + + rampup: + ops: + index: + index: TEMPLATE(indexname) + document: + key: "{id}" + value: "{train_floatlist}"