Merge branch 'nosqlbench:main' into main

2025-02-25 18:55:28 -06:00 · 2023-10-18 08:42:46 -05:00
parent f62b1c05e3 a19859ab84
commit ce9b65d09f
9 changed files with 375 additions and 27 deletions
--- a/adapter-http/src/main/java/io/nosqlbench/adapter/http/JsonElementUtils.java
+++ b/adapter-http/src/main/java/io/nosqlbench/adapter/http/JsonElementUtils.java
@@ -0,0 +1,66 @@
+package io.nosqlbench.adapter.http;
+
+/*
+ * Copyright (c) 2022 nosqlbench
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class JsonElementUtils {
+
+    /**
+     * <Pre>{@code
+     * "hits": {
+     *     "hits": [
+     *       {
+     *         "_score": 1,
+     *         "_id": "doGwOYsBv7KeAUqukb5D",
+     *         "_source": {
+     *           "key": 550,
+     *           "value": [
+     *             -0.34495,
+     *             1.0193,
+     *             0.87505,
+     * }</Pre>
+     * @param element
+     * @return
+     */
+    public static int[] getIntArrayFromHits(JsonElement jsonElement) {
+        JsonObject json = jsonElement.getAsJsonObject();
+
+        if (!json.has("hits") || !json.getAsJsonObject("hits").has("hits")) {
+            return null;
+        }
+        JsonArray hits = json.getAsJsonObject("hits").getAsJsonArray("hits");
+
+        int count = hits.size();
+        int[] keys = new int[count];
+        int i = 0;
+        for (JsonElement element : hits) {
+            JsonObject hit = element.getAsJsonObject();
+            keys[i] = hit.getAsJsonObject("_source").get("key").getAsInt();
+            i++;
+        }
+        return keys;
+
+    }
+}
--- a/adapter-http/src/main/java/io/nosqlbench/adapter/http/core/HttpOp.java
+++ b/adapter-http/src/main/java/io/nosqlbench/adapter/http/core/HttpOp.java
@@ -46,14 +46,24 @@ public class HttpOp implements CycleOp {
    private final HttpClient client;
    private final HttpSpace space;
    private final long cycle;
+    private final HttpResultType resultType;

-    public HttpOp(HttpClient client, HttpRequest request, Pattern ok_status, Pattern ok_body, HttpSpace space, long cycle) {
+    public HttpOp(
+        HttpClient client,
+        HttpRequest request,
+        Pattern ok_status,
+        Pattern ok_body,
+        HttpSpace space,
+        long cycle,
+        HttpResultType resultType
+    ) {
        this.client = client;
        this.request = request;
        this.ok_status = ok_status;
        this.ok_body = ok_body;
        this.space = space;
        this.cycle = cycle;
+        this.resultType = resultType;
    }

    @Override
@@ -91,31 +101,14 @@ public class HttpOp implements CycleOp {
                System.out.println();
            }
            // propogate exception so main error handling logic can take over
-            if (error!=null) {
+            if (error != null) {
                throw new RuntimeException(error);
            }
        }
-        try {
-            JsonParser parser = new JsonParser();
-            JsonObject json = parser.parse(response.body()).getAsJsonObject();
-
-            if (!json.has("hits") || !json.getAsJsonObject("hits").has("hits")) {
-                return null;
-            }
-            JsonArray hits = json.getAsJsonObject("hits").getAsJsonArray("hits");
-
-            int count = hits.size();
-            int[] keys = new int[count];
-            int i = 0;
-            for (JsonElement element : hits) {
-                JsonObject hit = element.getAsJsonObject();
-                keys[i] = hit.getAsJsonObject("_source").get("key").getAsInt();
-                i++;
-            }
-            return keys;
-        } catch (Exception e) {
-            throw new RuntimeException(e);
-        }
-
+        return switch (resultType) {
+            case string -> response.body();
+            case json_element -> JsonParser.parseString(response.body()).getAsJsonObject();
+            case none -> null;
+        };
    }
 }
--- a/adapter-http/src/main/java/io/nosqlbench/adapter/http/core/HttpOpDispenser.java
+++ b/adapter-http/src/main/java/io/nosqlbench/adapter/http/core/HttpOpDispenser.java
@@ -93,7 +93,7 @@ public class HttpOpDispenser extends BaseOpDispenser<HttpOp, HttpSpace> {
            .filter(n -> n.charAt(0) >= 'A')
            .filter(n -> n.charAt(0) <= 'Z')
            .toList();
-        if (headerNames.size() > 0) {
+        if (!headerNames.isEmpty()) {
            for (String headerName : headerNames) {
                initBuilderF = op.enhanceFunc(initBuilderF, headerName, String.class, (b, h) -> b.header(headerName, h));
            }
@@ -113,12 +113,16 @@ public class HttpOpDispenser extends BaseOpDispenser<HttpOp, HttpSpace> {
            .map(Pattern::compile)
            .orElse(null);

+        HttpResultType resultType = op.getOptionalEnumFromField(HttpResultType.class,"result-type").orElse(HttpResultType.none);
+
        LongFunction<HttpOp> opFunc = cycle -> new HttpOp(
            ctxF.apply(cycle).getClient(),
            reqF.apply(cycle),
            ok_status,
            ok_body,
-            ctxF.apply(cycle), cycle
+            ctxF.apply(cycle),
+            cycle,
+            resultType
        );
        return opFunc;
    }
--- a/adapter-http/src/main/java/io/nosqlbench/adapter/http/core/HttpResultType.java
+++ b/adapter-http/src/main/java/io/nosqlbench/adapter/http/core/HttpResultType.java
@@ -0,0 +1,33 @@
+package io.nosqlbench.adapter.http.core;
+
+/*
+ * Copyright (c) 2022 nosqlbench
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import com.google.gson.JsonElement;
+
+public enum HttpResultType {
+    none(Void.class),
+    string(String.class),
+    json_element(JsonElement.class);
+
+    public  final Class<?> resultClass;
+
+    HttpResultType(Class<?> resultClass) {
+        this.resultClass = resultClass;
+    }
+}
--- a/adapter-http/src/main/resources/activities/baselinesv2/http_elastic_vector.yaml
+++ b/adapter-http/src/main/resources/activities/baselinesv2/http_elastic_vector.yaml
@@ -0,0 +1,235 @@
+min_version: "5.17.3"
+description: |
+  A workload which reads ann-benchmarks vector data from HDF5 file format.
+scenarios:
+  #main: run driver=http tags=='block:main.*' cycles===1 stride=1 threads=1
+  schema: run driver=http tags==block:schema threads==1 cycles==UNDEF
+  load:
+    schema: run driver=http tags==block:schema threads==1 cycles==UNDEF diag=all
+    rampup: run driver=http tags==block:rampup cycles===TEMPLATE(rampup-cycles,100) threads=100
+    #rampup: run driver=http tags==block:rampup cycles===47341 threads=1
+    #rampup: run driver=http tags==block:rampup cycles===1183514 threads=10
+
+  drop-tables:
+    schema: run driver=http tags==block:drop-tables threads==1 cycles==UNDEF
+  truncate: run driver=http tags==block:truncate-tables cycles===1 threads=1
+    #reads: run driver=http tags==block:main-read cycles===TEMPLATE(read-cycles,100) threads=100
+  reads: run driver=http tags==block:main-read cycles===TEMPLATE(read-cycles,100) threads=10
+bindings:
+  rw_key1: Mul(25); ToString()
+  train_vector1: Mul(25); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key2: Mul(25); Add(1); ToString()
+  train_vector2: Mul(25); Add(1); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key3: Mul(25); Add(2); ToString()
+  train_vector3: Mul(25); Add(2); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key4: Mul(25); Add(3); ToString()
+  train_vector4: Mul(25); Add(3); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key5: Mul(25); Add(4); ToString()
+  train_vector5: Mul(25); Add(4); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key6: Mul(25); Add(5); ToString()
+  train_vector6: Mul(25); Add(5); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key7: Mul(25); Add(6); ToString()
+  train_vector7: Mul(25); Add(6); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key8: Mul(25); Add(7); ToString()
+  train_vector8: Mul(25); Add(7); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key9: Mul(25); Add(8); ToString()
+  train_vector9: Mul(25); Add(8); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key10: Mul(25); Add(9); ToString()
+  train_vector10: Mul(25); Add(9); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key11: Mul(25); Add(10); ToString()
+  train_vector11: Mul(25); Add(10); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key12: Mul(25); Add(11); ToString()
+  train_vector12: Mul(25); Add(11); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key13: Mul(25); Add(12); ToString()
+  train_vector13: Mul(25); Add(12); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key14: Mul(25); Add(13); ToString()
+  train_vector14: Mul(25); Add(13); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key15: Mul(25); Add(14); ToString()
+  train_vector15: Mul(25); Add(14); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key16: Mul(25); Add(15); ToString()
+  train_vector16: Mul(25); Add(15); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key17: Mul(25); Add(16); ToString()
+  train_vector17: Mul(25); Add(16); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key18: Mul(25); Add(17); ToString()
+  train_vector18: Mul(25); Add(17); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key19: Mul(25); Add(18); ToString()
+  train_vector19: Mul(25); Add(18); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key20: Mul(25); Add(19); ToString()
+  train_vector20: Mul(25); Add(19); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key21: Mul(25); Add(20); ToString()
+  train_vector21: Mul(25); Add(20); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key22: Mul(25); Add(21); ToString()
+  train_vector22: Mul(25); Add(21); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key23: Mul(25); Add(22); ToString()
+  train_vector23: Mul(25); Add(22); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key24: Mul(25); Add(23); ToString()
+  train_vector24: Mul(25); Add(23); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  rw_key25: Mul(25); Add(24); ToString()
+  train_vector25: Mul(25); Add(24); HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+
+  rw_key: ToString()
+  train_vector: HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/train") ; ToCqlVector()
+  test_vector: HdfFileToFloatArray("TEMPLATE(hdf5_path)", "/test") ; ToCqlVector()
+  validation_set: HdfFileToIntArray("TEMPLATE(hdf5_path)", "/neighbors")
+ops:
+  vop1:
+    stmt: "number:{number} name:{number_name}\n"
+    verifier-imports:
+      - "io.nosqlbench.api.engine.metrics.ActivityMetrics"
+    verifier-init: |
+      recallHisto = ActivityMetrics.histogram(_parsed_op,"recall-histo",4);
+    verifier: |
+      // double recall = vectormath.computeRecall(result,result)
+      recallHisto.update(cycle);
+      return true;
+blocks:
+  reset-schema:
+    ops:
+      - delete-index: |
+          DELETE TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)
+          Authorization: ApiKey TEMPLATE(apikey, required)
+          Content-Type: application/json
+  schema:
+    ops:
+      - create-index: |
+          PUT TEMPLATE(url, https://TODO.com)/TEMPLATE(index, vector)
+          Authorization: ApiKey TEMPLATE(apikey, required)
+          Content-Type: application/json
+
+          {
+            "mappings": {
+              "properties": {
+                "value": {
+                  "type": "dense_vector",
+                  "dims": TEMPLATE(dimensions, 25),
+                  "index": true,
+                  "similarity": "TEMPLATE(similarity_function, cosine)"
+                },
+                "key": {
+                  "type": "text"
+                }
+              }
+            }
+          }
+
+
+  rampup:
+    ops:
+      - rampup-bulk-insert: |
+          POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)/_bulk?refresh=true
+          Authorization: ApiKey TEMPLATE(apikey, required)
+          Content-Type: application/json
+
+          {"index": {} }
+          {"value": {train_vector1},"key": {rw_key1}}
+          {"index": {} }
+          {"value": {train_vector2},"key": {rw_key2}}
+          {"index": {} }
+          {"value": {train_vector3},"key": {rw_key3}}
+          {"index": {} }
+          {"value": {train_vector4},"key": {rw_key4}}
+          {"index": {} }
+          {"value": {train_vector5},"key": {rw_key5}}
+          {"index": {} }
+          {"value": {train_vector6},"key": {rw_key6}}
+          {"index": {} }
+          {"value": {train_vector7},"key": {rw_key7}}
+          {"index": {} }
+          {"value": {train_vector8},"key": {rw_key8}}
+          {"index": {} }
+          {"value": {train_vector9},"key": {rw_key9}}
+          {"index": {} }
+          {"value": {train_vector10},"key": {rw_key10}}
+          {"index": {} }
+          {"value": {train_vector11},"key": {rw_key11}}
+          {"index": {} }
+          {"value": {train_vector12},"key": {rw_key12}}
+          {"index": {} }
+          {"value": {train_vector13},"key": {rw_key13}}
+          {"index": {} }
+          {"value": {train_vector14},"key": {rw_key14}}
+          {"index": {} }
+          {"value": {train_vector15},"key": {rw_key15}}
+          {"index": {} }
+          {"value": {train_vector16},"key": {rw_key16}}
+          {"index": {} }
+          {"value": {train_vector17},"key": {rw_key17}}
+          {"index": {} }
+          {"value": {train_vector18},"key": {rw_key18}}
+          {"index": {} }
+          {"value": {train_vector19},"key": {rw_key19}}
+          {"index": {} }
+          {"value": {train_vector20},"key": {rw_key20}}
+          {"index": {} }
+          {"value": {train_vector21},"key": {rw_key21}}
+          {"index": {} }
+          {"value": {train_vector22},"key": {rw_key22}}
+          {"index": {} }
+          {"value": {train_vector23},"key": {rw_key23}}
+          {"index": {} }
+          {"value": {train_vector24},"key": {rw_key24}}
+          {"index": {} }
+          {"value": {train_vector25},"key": {rw_key25}}
+
+      #- rampup-insert: |
+      #   POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)//_doc?refresh=true
+      #    Authorization: ApiKey TEMPLATE(apikey, required)
+      #    Content-Type: application/json
+
+      #    {
+      #      "value": {train_vector},
+      #      "key": {rw_key}
+      #    }
+  main-read:
+    params:
+      ratio: TEMPLATE(read_ratio,90)
+      instrument: true
+    ops:
+      - main-select-ann-limit:
+          op: |
+            POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)/_search
+            Authorization: ApiKey TEMPLATE(apikey, required)
+            Content-Type: application/json
+
+            {
+              "from" : 0,
+              "size" : TEMPLATE(k,100),
+              "knn": {
+                "field": "value",
+                "query_vector": {test_vector},
+                "k": TEMPLATE(k, 100),
+                "num_candidates": TEMPLATE(k,100)
+              }
+            }
+          result-type: json_element
+          verifier-imports:
+            - io.nosqlbench.adapter.http.JsonElementUtils
+          verifier-init: |
+            k=TEMPLATE(top_k,100)
+            relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op);
+            relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
+            relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
+            relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
+            relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
+            relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
+          verifier: |
+            actual_indices=JsonElementUtils.getIntArrayFromHits(result);
+            relevancy.accept({relevant_indices},actual_indices);
+            return true;
+
+  main-write:
+    params:
+      ratio: TEMPLATE(write_ratio,10)
+      cl: TEMPLATE(write_cl,LOCAL_QUORUM)
+      instrument: true
+      prepared: true
+    ops:
+      - main-insert: |
+          POST TEMPLATE(url, https://TODO.com)/TEMPLATE(index,vector)/_doc?refresh=true
+          Authorization: ApiKey TEMPLATE(apikey, required)
+          Content-Type: application/json
+
+          {
+            "value": {train_data},
+            "key": {rw_key}
+          }
--- a/adapter-http/src/main/resources/http.md
+++ b/adapter-http/src/main/resources/http.md
@@ -163,6 +163,11 @@ defaults:
 - **ok-body** - An optional regex pattern which will be applied to the
  body to verify that it is a valid response. If this is not provided,
  then content bodies are read, but any content is considered valid.
+- **result-type** - If provided, you can specify `none`, `string`, or `json_element`.
+  By default, this is set to `none` and the http op will not produce a result type.
+  If you use string, then the raw body is returned, and if you use json_element,
+  then the body is presumed to be valid JSON, and it is parsed and returned as
+  a JsonElement.

 Any other statement parameter which is capitalized is taken as a request
 header. If additional fields are provided which are not included in the
--- a/mvn-defaults/pom.xml
+++ b/mvn-defaults/pom.xml
@@ -26,7 +26,7 @@

    <properties>

-        <revision>5.17.5-SNAPSHOT</revision>
+        <revision>5.17.6-SNAPSHOT</revision>
        <!-- Set this level to override the logging level for tests during build -->
        <project.testlevel>INFO</project.testlevel>
        <!-- Set this level to override the logging level for tests logging configuration during build -->
--- a/nb-api/src/main/java/io/nosqlbench/api/engine/metrics/DoubleSummaryGauge.java
+++ b/nb-api/src/main/java/io/nosqlbench/api/engine/metrics/DoubleSummaryGauge.java
@@ -70,4 +70,8 @@ public class DoubleSummaryGauge implements NBMetricGauge<Double>, DoubleConsumer
        return labels;
    }

+    @Override
+    public String toString() {
+        return this.labels.toString()+":"+this.stats.toString();
+    }
 }
--- a/nb-api/src/main/java/io/nosqlbench/api/engine/metrics/wrappers/RelevancyMeasures.java
+++ b/nb-api/src/main/java/io/nosqlbench/api/engine/metrics/wrappers/RelevancyMeasures.java
@@ -71,4 +71,12 @@ public class RelevancyMeasures implements NBLabeledElement {
        }
    }

+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        for (DoubleSummaryGauge gauge : gauges) {
+            sb.append(gauge.toString()).append("\n");
+        }
+        return sb.toString();
+    }
 }