Merge branch 'nosqlbench:main' into main

This commit is contained in:
yabinmeng
2023-10-18 08:42:46 -05:00
committed by GitHub
9 changed files with 375 additions and 27 deletions

View File

@@ -0,0 +1,66 @@
package io.nosqlbench.adapter.http;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.util.ArrayList;
import java.util.List;
public class JsonElementUtils {
/**
* <Pre>{@code
* "hits": {
* "hits": [
* {
* "_score": 1,
* "_id": "doGwOYsBv7KeAUqukb5D",
* "_source": {
* "key": 550,
* "value": [
* -0.34495,
* 1.0193,
* 0.87505,
* }</Pre>
* @param element
* @return
*/
public static int[] getIntArrayFromHits(JsonElement jsonElement) {
JsonObject json = jsonElement.getAsJsonObject();
if (!json.has("hits") || !json.getAsJsonObject("hits").has("hits")) {
return null;
}
JsonArray hits = json.getAsJsonObject("hits").getAsJsonArray("hits");
int count = hits.size();
int[] keys = new int[count];
int i = 0;
for (JsonElement element : hits) {
JsonObject hit = element.getAsJsonObject();
keys[i] = hit.getAsJsonObject("_source").get("key").getAsInt();
i++;
}
return keys;
}
}

View File

@@ -46,14 +46,24 @@ public class HttpOp implements CycleOp {
private final HttpClient client;
private final HttpSpace space;
private final long cycle;
private final HttpResultType resultType;
public HttpOp(HttpClient client, HttpRequest request, Pattern ok_status, Pattern ok_body, HttpSpace space, long cycle) {
public HttpOp(
HttpClient client,
HttpRequest request,
Pattern ok_status,
Pattern ok_body,
HttpSpace space,
long cycle,
HttpResultType resultType
) {
this.client = client;
this.request = request;
this.ok_status = ok_status;
this.ok_body = ok_body;
this.space = space;
this.cycle = cycle;
this.resultType = resultType;
}
@Override
@@ -91,31 +101,14 @@ public class HttpOp implements CycleOp {
System.out.println();
}
// propogate exception so main error handling logic can take over
if (error!=null) {
if (error != null) {
throw new RuntimeException(error);
}
}
try {
JsonParser parser = new JsonParser();
JsonObject json = parser.parse(response.body()).getAsJsonObject();
if (!json.has("hits") || !json.getAsJsonObject("hits").has("hits")) {
return null;
}
JsonArray hits = json.getAsJsonObject("hits").getAsJsonArray("hits");
int count = hits.size();
int[] keys = new int[count];
int i = 0;
for (JsonElement element : hits) {
JsonObject hit = element.getAsJsonObject();
keys[i] = hit.getAsJsonObject("_source").get("key").getAsInt();
i++;
}
return keys;
} catch (Exception e) {
throw new RuntimeException(e);
}
return switch (resultType) {
case string -> response.body();
case json_element -> JsonParser.parseString(response.body()).getAsJsonObject();
case none -> null;
};
}
}

View File

@@ -93,7 +93,7 @@ public class HttpOpDispenser extends BaseOpDispenser<HttpOp, HttpSpace> {
.filter(n -> n.charAt(0) >= 'A')
.filter(n -> n.charAt(0) <= 'Z')
.toList();
if (headerNames.size() > 0) {
if (!headerNames.isEmpty()) {
for (String headerName : headerNames) {
initBuilderF = op.enhanceFunc(initBuilderF, headerName, String.class, (b, h) -> b.header(headerName, h));
}
@@ -113,12 +113,16 @@ public class HttpOpDispenser extends BaseOpDispenser<HttpOp, HttpSpace> {
.map(Pattern::compile)
.orElse(null);
HttpResultType resultType = op.getOptionalEnumFromField(HttpResultType.class,"result-type").orElse(HttpResultType.none);
LongFunction<HttpOp> opFunc = cycle -> new HttpOp(
ctxF.apply(cycle).getClient(),
reqF.apply(cycle),
ok_status,
ok_body,
ctxF.apply(cycle), cycle
ctxF.apply(cycle),
cycle,
resultType
);
return opFunc;
}

View File

@@ -0,0 +1,33 @@
package io.nosqlbench.adapter.http.core;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import com.google.gson.JsonElement;
public enum HttpResultType {
none(Void.class),
string(String.class),
json_element(JsonElement.class);
public final Class<?> resultClass;
HttpResultType(Class<?> resultClass) {
this.resultClass = resultClass;
}
}

View File

@@ -0,0 +1,235 @@
min_version: "5.17.3"
description: |
A workload which reads ann-benchmarks vector data from HDF5 file format.
scenarios:
#main: run driver=http tags=='block:main.*' cycles===1 stride=1 threads=1
schema: run driver=http tags==block:schema threads==1 cycles==UNDEF
load:
schema: run driver=http tags==block:schema threads==1 cycles==UNDEF diag=all
rampup: run driver=http tags==block:rampup cycles===TEMPLATE(rampup-cycles,100) threads=100
#rampup: run driver=http tags==block:rampup cycles===47341 threads=1
#rampup: run driver=http tags==block:rampup cycles===1183514 threads=10
drop-tables:
schema: run driver=http tags==block:drop-tables threads==1 cycles==UNDEF
truncate: run driver=http tags==block:truncate-tables cycles===1 threads=1
#reads: run driver=http tags==block:main-read cycles===TEMPLATE(read-cycles,100) threads=100
reads: run driver=http tags==block:main-read cycles===TEMPLATE(read-cycles,100) threads=10
bindings:
rw_key1: Mul(25); ToString()
train_vector1: Mul(25); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key2: Mul(25); Add(1); ToString()
train_vector2: Mul(25); Add(1); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key3: Mul(25); Add(2); ToString()
train_vector3: Mul(25); Add(2); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key4: Mul(25); Add(3); ToString()
train_vector4: Mul(25); Add(3); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key5: Mul(25); Add(4); ToString()
train_vector5: Mul(25); Add(4); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key6: Mul(25); Add(5); ToString()
train_vector6: Mul(25); Add(5); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key7: Mul(25); Add(6); ToString()
train_vector7: Mul(25); Add(6); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key8: Mul(25); Add(7); ToString()
train_vector8: Mul(25); Add(7); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key9: Mul(25); Add(8); ToString()
train_vector9: Mul(25); Add(8); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key10: Mul(25); Add(9); ToString()
train_vector10: Mul(25); Add(9); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key11: Mul(25); Add(10); ToString()
train_vector11: Mul(25); Add(10); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key12: Mul(25); Add(11); ToString()
train_vector12: Mul(25); Add(11); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key13: Mul(25); Add(12); ToString()
train_vector13: Mul(25); Add(12); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key14: Mul(25); Add(13); ToString()
train_vector14: Mul(25); Add(13); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key15: Mul(25); Add(14); ToString()
train_vector15: Mul(25); Add(14); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key16: Mul(25); Add(15); ToString()
train_vector16: Mul(25); Add(15); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key17: Mul(25); Add(16); ToString()
train_vector17: Mul(25); Add(16); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key18: Mul(25); Add(17); ToString()
train_vector18: Mul(25); Add(17); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key19: Mul(25); Add(18); ToString()
train_vector19: Mul(25); Add(18); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key20: Mul(25); Add(19); ToString()
train_vector20: Mul(25); Add(19); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key21: Mul(25); Add(20); ToString()
train_vector21: Mul(25); Add(20); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key22: Mul(25); Add(21); ToString()
train_vector22: Mul(25); Add(21); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key23: Mul(25); Add(22); ToString()
train_vector23: Mul(25); Add(22); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key24: Mul(25); Add(23); ToString()
train_vector24: Mul(25); Add(23); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key25: Mul(25); Add(24); ToString()
train_vector25: Mul(25); Add(24); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
rw_key: ToString()
train_vector: HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
test_vector: HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/test") ; ToCqlVector()
validation_set: HdfFileToIntArray("TEMPLATE(hdf5_path)", "/neighbors")
ops:
vop1:
stmt: "number:{number} name:{number_name}\n"
verifier-imports:
- "io.nosqlbench.api.engine.metrics.ActivityMetrics"
verifier-init: |
recallHisto = ActivityMetrics.histogram(_parsed_op,"recall-histo",4);
verifier: |
// double recall = vectormath.computeRecall(result,result)
recallHisto.update(cycle);
return true;
blocks:
reset-schema:
ops:
- delete-index: |
DELETE TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)
Authorization: ApiKey TEMPLATE(apikey, required)
Content-Type: application/json
schema:
ops:
- create-index: |
PUT TEMPLATE(url, https://TODO.com)/TEMPLATE(index, vector)
Authorization: ApiKey TEMPLATE(apikey, required)
Content-Type: application/json
{
"mappings": {
"properties": {
"value": {
"type": "dense_vector",
"dims": TEMPLATE(dimensions, 25),
"index": true,
"similarity": "TEMPLATE(similarity_function, cosine)"
},
"key": {
"type": "text"
}
}
}
}
rampup:
ops:
- rampup-bulk-insert: |
POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)/_bulk?refresh=true
Authorization: ApiKey TEMPLATE(apikey, required)
Content-Type: application/json
{"index": {} }
{"value": {train_vector1},"key": {rw_key1}}
{"index": {} }
{"value": {train_vector2},"key": {rw_key2}}
{"index": {} }
{"value": {train_vector3},"key": {rw_key3}}
{"index": {} }
{"value": {train_vector4},"key": {rw_key4}}
{"index": {} }
{"value": {train_vector5},"key": {rw_key5}}
{"index": {} }
{"value": {train_vector6},"key": {rw_key6}}
{"index": {} }
{"value": {train_vector7},"key": {rw_key7}}
{"index": {} }
{"value": {train_vector8},"key": {rw_key8}}
{"index": {} }
{"value": {train_vector9},"key": {rw_key9}}
{"index": {} }
{"value": {train_vector10},"key": {rw_key10}}
{"index": {} }
{"value": {train_vector11},"key": {rw_key11}}
{"index": {} }
{"value": {train_vector12},"key": {rw_key12}}
{"index": {} }
{"value": {train_vector13},"key": {rw_key13}}
{"index": {} }
{"value": {train_vector14},"key": {rw_key14}}
{"index": {} }
{"value": {train_vector15},"key": {rw_key15}}
{"index": {} }
{"value": {train_vector16},"key": {rw_key16}}
{"index": {} }
{"value": {train_vector17},"key": {rw_key17}}
{"index": {} }
{"value": {train_vector18},"key": {rw_key18}}
{"index": {} }
{"value": {train_vector19},"key": {rw_key19}}
{"index": {} }
{"value": {train_vector20},"key": {rw_key20}}
{"index": {} }
{"value": {train_vector21},"key": {rw_key21}}
{"index": {} }
{"value": {train_vector22},"key": {rw_key22}}
{"index": {} }
{"value": {train_vector23},"key": {rw_key23}}
{"index": {} }
{"value": {train_vector24},"key": {rw_key24}}
{"index": {} }
{"value": {train_vector25},"key": {rw_key25}}
#- rampup-insert: |
# POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)//_doc?refresh=true
# Authorization: ApiKey TEMPLATE(apikey, required)
# Content-Type: application/json
# {
# "value": {train_vector},
# "key": {rw_key}
# }
main-read:
params:
ratio: TEMPLATE(read_ratio,90)
instrument: true
ops:
- main-select-ann-limit:
op: |
POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)/_search
Authorization: ApiKey TEMPLATE(apikey, required)
Content-Type: application/json
{
"from" : 0,
"size" : TEMPLATE(k,100),
"knn": {
"field": "value",
"query_vector": {test_vector},
"k": TEMPLATE(k, 100),
"num_candidates": TEMPLATE(k,100)
}
}
result-type: json_element
verifier-imports:
- io.nosqlbench.adapter.http.JsonElementUtils
verifier-init: |
k=TEMPLATE(top_k,100)
relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op);
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
verifier: |
actual_indices=JsonElementUtils.getIntArrayFromHits(result);
relevancy.accept({relevant_indices},actual_indices);
return true;
main-write:
params:
ratio: TEMPLATE(write_ratio,10)
cl: TEMPLATE(write_cl,LOCAL_QUORUM)
instrument: true
prepared: true
ops:
- main-insert: |
POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)/_doc?refresh=true
Authorization: ApiKey TEMPLATE(apikey, required)
Content-Type: application/json
{
"value": {train_data},
"key": {rw_key}
}

View File

@@ -163,6 +163,11 @@ defaults:
- **ok-body** - An optional regex pattern which will be applied to the
body to verify that it is a valid response. If this is not provided,
then content bodies are read, but any content is considered valid.
- **result-type** - If provided, you can specify `none`, `string`, or `json_element`.
By default, this is set to `none` and the http op will not produce a result type.
If you use string, then the raw body is returned, and if you use json_element,
then the body is presumed to be valid JSON, and it is parsed and returned as
a JsonElement.
Any other statement parameter which is capitalized is taken as a request
header. If additional fields are provided which are not included in the

View File

@@ -26,7 +26,7 @@
<properties>
<revision>5.17.5-SNAPSHOT</revision>
<revision>5.17.6-SNAPSHOT</revision>
<!-- Set this level to override the logging level for tests during build -->
<project.testlevel>INFO</project.testlevel>
<!-- Set this level to override the logging level for tests logging configuration during build -->

View File

@@ -70,4 +70,8 @@ public class DoubleSummaryGauge implements NBMetricGauge<Double>, DoubleConsumer
return labels;
}
@Override
public String toString() {
return this.labels.toString()+":"+this.stats.toString();
}
}

View File

@@ -71,4 +71,12 @@ public class RelevancyMeasures implements NBLabeledElement {
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (DoubleSummaryGauge gauge : gauges) {
sb.append(gauge.toString()).append("\n");
}
return sb.toString();
}
}