mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2024-12-26 08:41:05 -06:00
update for dataset urls and database name on neo
This commit is contained in:
parent
fa39a64846
commit
72da2b0d43
@ -0,0 +1,135 @@
|
|||||||
|
min_version: 5.21.1
|
||||||
|
description: |
|
||||||
|
Vector workload for Neo4J
|
||||||
|
|
||||||
|
Template Variables:
|
||||||
|
TEMPLATE(dataset)
|
||||||
|
TEMPLATE(node_label,Node)
|
||||||
|
TEMPLATE(k,100)
|
||||||
|
TEMPLATE(batch_size)
|
||||||
|
TEMPLATE(delete_batch_size,1000)
|
||||||
|
|
||||||
|
params:
|
||||||
|
driver: neo4j
|
||||||
|
instrument: true
|
||||||
|
labels:
|
||||||
|
target: TEMPLATE(targetname,neo4j)
|
||||||
|
database: TEMPLATE(database,neo4j)
|
||||||
|
|
||||||
|
scenarios:
|
||||||
|
default:
|
||||||
|
# Remove any existing data
|
||||||
|
drop: >-
|
||||||
|
run tags='block:drop' threads===1 cycles===UNDEF
|
||||||
|
labels='target:TEMPLATE(targetname,neo4j)'
|
||||||
|
errors=count
|
||||||
|
# Install the schema required to run the test
|
||||||
|
schema: >-
|
||||||
|
run tags='block:schema' threads===1 cycles===UNDEF
|
||||||
|
labels='target:TEMPLATE(targetname,neo4j)'
|
||||||
|
# Load training data, measure how long it takes to load
|
||||||
|
rampup: >-
|
||||||
|
run tags='block:rampup_batch' threads=TEMPLATE(rampup_threads,auto)
|
||||||
|
cycles===TEMPLATE(rampup_cycles,TEMPLATE(trainsize))
|
||||||
|
errors=count,warn
|
||||||
|
labels='target:TEMPLATE(targetname,neo4j)'
|
||||||
|
# Measure how the system responds to queries under a read only workload
|
||||||
|
search_and_verify: >-
|
||||||
|
run alias=search_and_verify tags='block:search_and_verify'
|
||||||
|
threads=TEMPLATE(search_threads,auto) cycles===TEMPLATE(search_cycles,TEMPLATE(testsize))
|
||||||
|
errors=count,warn
|
||||||
|
labels='target:TEMPLATE(targetname,neo4j)'
|
||||||
|
verify_recall: >-
|
||||||
|
run alias=verify_recall tags='block:search_and_verify'
|
||||||
|
threads=TEMPLATE(search_threads,auto) cycles===TEMPLATE(search_cycles,TEMPLATE(testsize))
|
||||||
|
errors=count,warn
|
||||||
|
labels='target:TEMPLATE(targetname,neo4j)'
|
||||||
|
|
||||||
|
|
||||||
|
bindings:
|
||||||
|
id: ToString()
|
||||||
|
id_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),long->ToString());
|
||||||
|
train_vector: HdfFileToFloatList("TEMPLATE(dataset)", "/train");
|
||||||
|
train_vector_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),HdfFileToFloatList("TEMPLATE(dataset)", "/train"));
|
||||||
|
test_vector: HdfFileToFloatList("TEMPLATE(dataset)", "/test");
|
||||||
|
relevant_indices: HdfFileToIntArray("TEMPLATE(dataset)", "/neighbors")
|
||||||
|
|
||||||
|
blocks:
|
||||||
|
# TODO: Node deletion times out; attempt this in future: CREATE OR REPLACE DATABASE neo4j
|
||||||
|
drop:
|
||||||
|
ops:
|
||||||
|
# Reference: https://support.neo4j.com/s/article/360059882854-Deleting-large-numbers-of-nodes#h_01H95CXNJ8TN4126T3Y01BRWKS
|
||||||
|
delete_nodes:
|
||||||
|
sync_autocommit: |
|
||||||
|
MATCH (n)
|
||||||
|
CALL { WITH n
|
||||||
|
DETACH DELETE n
|
||||||
|
} IN TRANSACTIONS OF $delete_batch_size ROWS;
|
||||||
|
query_params:
|
||||||
|
delete_batch_size: TEMPLATE(delete_batch_size,1000)
|
||||||
|
drop_index:
|
||||||
|
sync_autocommit: DROP INDEX $index_name IF EXISTS
|
||||||
|
query_params:
|
||||||
|
index_name: vector_index
|
||||||
|
|
||||||
|
schema:
|
||||||
|
ops:
|
||||||
|
create_vector_index:
|
||||||
|
sync_autocommit: |
|
||||||
|
CREATE VECTOR INDEX $index_name IF NOT EXISTS FOR (n:TEMPLATE(node_label,Node))
|
||||||
|
ON (n.embedding) OPTIONS
|
||||||
|
{indexConfig: {`vector.dimensions`: $dimensions, `vector.similarity_function`: $similarity_function}}
|
||||||
|
query_params:
|
||||||
|
index_name: vector_index
|
||||||
|
dimensions: TEMPLATE(dimensions)
|
||||||
|
similarity_function: TEMPLATE(similarity_function,cosine)
|
||||||
|
|
||||||
|
rampup:
|
||||||
|
ops:
|
||||||
|
insert_node:
|
||||||
|
async_write_transaction: |
|
||||||
|
CREATE (v:TEMPLATE(node_label,Node) {id: $id, embedding: $vector})
|
||||||
|
query_params:
|
||||||
|
id: '{id}'
|
||||||
|
vector: '{train_vector}'
|
||||||
|
|
||||||
|
rampup_batch:
|
||||||
|
ops:
|
||||||
|
# Reference: https://community.neo4j.com/t/unwind-multiple-arrays-to-set-property/59908/5
|
||||||
|
insert_nodes:
|
||||||
|
async_write_transaction: |
|
||||||
|
WITH $id_list as ids, $vector_list as vectors
|
||||||
|
UNWIND RANGE(0, size(ids) - 1) as idx
|
||||||
|
CREATE (v:TEMPLATE(node_label,Node) {id: ids[idx], embedding: vectors[idx]})
|
||||||
|
query_params:
|
||||||
|
id_list: '{id_batch}'
|
||||||
|
vector_list: '{train_vector_batch}'
|
||||||
|
|
||||||
|
search_and_verify:
|
||||||
|
ops:
|
||||||
|
search:
|
||||||
|
async_read_transaction: |
|
||||||
|
WITH $query_vector AS queryVector
|
||||||
|
CALL db.index.vector.queryNodes($index_name, $k, queryVector)
|
||||||
|
YIELD node
|
||||||
|
RETURN node.id
|
||||||
|
query_params:
|
||||||
|
query_vector: '{test_vector}'
|
||||||
|
index_name: vector_index
|
||||||
|
k: TEMPLATE(k,100)
|
||||||
|
verifier-init: |
|
||||||
|
relevancy = new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
|
||||||
|
for (int k in List.of(100)) {
|
||||||
|
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
|
||||||
|
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
|
||||||
|
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
|
||||||
|
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
|
||||||
|
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
|
||||||
|
}
|
||||||
|
verifier: |
|
||||||
|
// result is a Record[]
|
||||||
|
values = io.nosqlbench.adapter.neo4j.Neo4JAdapterUtils.getFieldForAllRecords(result, "node.id")
|
||||||
|
ann = values.collect { it.toString().toInteger() }.toArray(new Integer[values.size()])
|
||||||
|
knn = {relevant_indices}
|
||||||
|
relevancy.accept(knn, ann);
|
||||||
|
return true;
|
Loading…
Reference in New Issue
Block a user