miscellaneous fixes for relevancy metrics

This commit is contained in:
Jonathan Shook 2023-09-08 17:13:56 -05:00
parent ea847fed37
commit 340e5ac483
13 changed files with 127 additions and 40 deletions

View File

@ -19,6 +19,7 @@ package io.nosqlbench.engine.extensions.vectormath;
import com.datastax.oss.driver.api.core.cql.Row;
import java.util.List;
import java.util.Objects;
public class CqlUtils {
@ -34,5 +35,9 @@ public class CqlUtils {
return rows.stream().mapToInt(r -> r.getInt(fieldName)).toArray();
}
public static int[] cqlStringColumnToIntArray(String fieldName, List<Row> rows) {
return rows.stream().mapToInt(r -> Integer.parseInt(Objects.requireNonNull(r.getString(fieldName)))).toArray();
}
}

View File

@ -25,7 +25,7 @@ scenarios:
# await_index: run tags='block:await_index' # This would need to exit when a condition is met
# stop_search_and_index: stop search_and_index
# only possible if we have a triggering event to indicated
live_search: run tags='block:search' labels='target:astra'
# live_search: run tags='block:search' labels='target:astra' threads=1 cycles=TEMPLATE(search_cycles,10000)
search_and_rewrite: run tags='block:search_and_rewrite' labels='target:astra'
search_and_invalidate: run tags='block:search_and_invalidate' labels='target:astra'
@ -73,29 +73,29 @@ blocks:
search_and_index:
ops:
select_ann_limit:
stmt: |
prepared: |
SELECT * FROM TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
ORDER BY value ANN OF {test_floatlist} LIMIT TEMPLATE(select_limit,100);
tags:
optype: select
verifier-init: |
relevancy=scriptingmetrics.newRelevancyMeasures(_parsed_op,"group","relevancy");
for (int k in new int[]{1,2,5,10,25,50,100}) {
relevancy.addFunction(recall("recall",k));
relevancy.addFunction(precision("precision",k);
relevancy.addFunction(reciprocal_rank("RR",k));
relevancy.addFunction(average_precision("AP",k));
relevancy.addFunction(F1("F1",k));
for (int k in List.of(1,2,3,5,10,25,50,75,100)) {
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
}
verifier: |
# driver-specific function
actual_indices=cqlRowListToIntArray("id",result))
# driver-agnostic function
// driver-specific function
actual_indices=cql_utils.cqlStringColumnToIntArray("key",result);
// driver-agnostic function
relevancy.accept({relevant_indices},actual_indices);
# because we are "verifying" although this needs to be reorganized
// because we are "verifying" although this needs to be reorganized
return true;
insert_rewrite:
stmt: |
prepared: |
INSERT INTO TEMPLATE(keyspace,baselines).TEMPLATE(table,vectors)
(key, value) VALUES ({id},{train_floatlist});
tags:

View File

@ -30,9 +30,9 @@ public class Intersections {
int foundAt = -1;
for (int index = 0; index < maxIndex; index++) {
foundAt = Arrays.binarySearch(reference, sample[index]);
if (foundAt >= 0) break;
if (foundAt >= 0) return index;
}
return foundAt;
return -1;
}
public static int firstMatchingIndex(int[] reference, int[] sample, int limit) {
@ -41,9 +41,9 @@ public class Intersections {
int foundAt = -1;
for (int index = 0; index < maxIndex; index++) {
foundAt = Arrays.binarySearch(reference, sample[index]);
if (foundAt >= 0) break;
if (foundAt >= 0) return index;
}
return foundAt;
return -1;
}
public static int count(int[] reference, int[] sample) {

View File

@ -22,19 +22,64 @@ import io.nosqlbench.engine.extensions.computefunctions.relavency.*;
import java.util.Map;
public class RelevancyFunctions {
public static Recall recall(String name, int k, Map<String,String> labels) {
public static Recall recall(String name, int k, Map<String, String> labels) {
return new Recall(name, k, labels);
}
public static Precision precision(String name, int k, Map<String,String> labels) {
public static Recall recall(String name, int k, Object... labels) {
return new Recall(name, k, labels);
}
public static Recall recall(String name, int k) {
return new Recall(name, k);
}
public static Precision precision(String name, int k, Map<String, String> labels) {
return new Precision(name, k, labels);
}
public static F1 F1(String name, int k, Map<String,String> labels) {
public static Precision precision(String name, int k, Object... labels) {
return new Precision(name, k, labels);
}
public static Precision precision(String name, int k) {
return new Precision(name, k);
}
public static F1 F1(String name, int k, Map<String, String> labels) {
return new F1(name, k, labels);
}
public static AveragePrecision average_precision(String name, int k, Map<String,String> labels) {
public static F1 F1(String name, int k, Object... labels) {
return new F1(name, k, labels);
}
public static F1 F1(String name, int k) {
return new F1(name, k);
}
public static AveragePrecision average_precision(String name, int k, Map<String, String> labels) {
return new AveragePrecision(name, k, labels);
}
public static ReciprocalRank rank_reciprocal(String name, int k, Map<String,String> labels) {
return new ReciprocalRank(name, k, NBLabels.forKV("k",k).andTypes(labels));
public static AveragePrecision average_precision(String name, int k, Object... labels) {
return new AveragePrecision(name, k, labels);
}
public static AveragePrecision average_precision(String name, int k) {
return new AveragePrecision(name, k);
}
public static ReciprocalRank reciprocal_rank(String name, int k, Map<String, String> labels) {
return new ReciprocalRank(name, k, labels);
}
public static ReciprocalRank reciprocal_rank(String name, int k, Object... labels) {
return new ReciprocalRank(name, k, labels);
}
public static ReciprocalRank rank_reciprocal(String name, int k) {
return new ReciprocalRank(name, k, NBLabels.forKV("k", k));
}
}

View File

@ -28,7 +28,7 @@ public class AveragePrecision extends BaseRelevancyFunction {
this.k = k;
}
public AveragePrecision(String name, int k, Object... labels) {
super(name, NBLabels.forKV("k",k).andTypes(labels));
super(name, NBLabels.forKV("k",k).and(NBLabels.forKV(labels)));
this.k = k;
}
@ -37,4 +37,10 @@ public class AveragePrecision extends BaseRelevancyFunction {
return ComputeFunctions.average_precision(relevant,actual,k);
}
@Override
public String getUniqueName() {
return getName()+"_"+k;
}
}

View File

@ -19,19 +19,13 @@ package io.nosqlbench.engine.extensions.computefunctions.relavency;
import io.nosqlbench.api.config.NBLabels;
import io.nosqlbench.api.engine.metrics.wrappers.RelevancyFunction;
import java.util.Map;
public abstract class BaseRelevancyFunction implements RelevancyFunction {
private final String name;
private final NBLabels labels;
public BaseRelevancyFunction(String name, Object... labeldata) {
this.name = name;
this.labels = NBLabels.forKV(labeldata);
}
public BaseRelevancyFunction(String name, Map<String,String> labels) {
public BaseRelevancyFunction(String name, NBLabels labels) {
this.name = name;
this.labels = NBLabels.forMap(labels);
this.labels = labels;
}
@Override

View File

@ -29,7 +29,7 @@ public class F1 extends BaseRelevancyFunction {
this.k = k;
}
public F1(String name, int k, Object... labels) {
super(name, NBLabels.forKV("k",k).andTypes(labels));
super(name, NBLabels.forKV("k",k).and(NBLabels.forKV(labels)));
this.k = k;
}
@ -38,4 +38,10 @@ public class F1 extends BaseRelevancyFunction {
return ComputeFunctions.F1(relevant,actual,k);
}
@Override
public String getUniqueName() {
return getName()+"_"+k;
}
}

View File

@ -29,7 +29,7 @@ public class Precision extends BaseRelevancyFunction {
this.k = k;
}
public Precision(String name, int k, Object... labels) {
super(name, NBLabels.forKV("k",k).andTypes(labels));
super(name, NBLabels.forKV("k",k).and(NBLabels.forKV(labels)));
this.k = k;
}
@ -37,4 +37,10 @@ public class Precision extends BaseRelevancyFunction {
public double apply(int[] relevant, int[] actual) {
return ComputeFunctions.precision(relevant, actual, k);
}
@Override
public String getUniqueName() {
return getName()+"_"+k;
}
}

View File

@ -24,8 +24,8 @@ import java.util.Map;
public class Recall extends BaseRelevancyFunction {
private final int k;
public Recall(String name, int k, Object...labeldata) {
super(name, NBLabels.forKV("k",k).andTypes(labeldata));
public Recall(String name, int k, Object...labels) {
super(name, NBLabels.forKV("k",k).and(NBLabels.forKV(labels)));
this.k = k;
}
public Recall(String name, int k, Map<String,String> labels) {
@ -37,4 +37,9 @@ public class Recall extends BaseRelevancyFunction {
public double apply(int[] relevant, int[] actual) {
return ComputeFunctions.recall(relevant,actual,k);
}
@Override
public String getUniqueName() {
return getName()+"_"+k;
}
}

View File

@ -29,7 +29,7 @@ public class ReciprocalRank extends BaseRelevancyFunction {
this.k = k;
}
public ReciprocalRank(String name, int k, Object... labels) {
super(name, NBLabels.forKV("k",k).andTypes(labels));
super(name, NBLabels.forKV("k",k).and(NBLabels.forKV(labels)));
this.k = k;
}
@ -37,4 +37,10 @@ public class ReciprocalRank extends BaseRelevancyFunction {
public double apply(int[] relevant, int[] actual) {
return ComputeFunctions.reciprocal_rank(relevant,actual,k);
}
@Override
public String getUniqueName() {
return getName()+"_"+k;
}
}

View File

@ -59,6 +59,9 @@ public class ScriptingMetrics {
public RelevancyMeasures newRelevancyMeasures(NBLabeledElement parent, Map<String,String> labels) {
return new RelevancyMeasures(parent,labels);
}
public RelevancyMeasures newRelevancyMeasures(NBLabeledElement parent, Object... labels) {
return new RelevancyMeasures(parent,labels);
}

View File

@ -20,5 +20,11 @@ import io.nosqlbench.api.config.NBLabeledElement;
public interface RelevancyFunction extends NBLabeledElement {
double apply(int[] relevant, int[] actual);
String getName();
/**
* Return a name which identifies this function in terms of its parameters. This is a temporary
* work-around until the graphite uniqueness semantics are removed and label set uniqueness works.
* @return A string which can be used to identify if the metric exists yet.
*/
String getUniqueName();
}

View File

@ -40,6 +40,11 @@ public class RelevancyMeasures implements NBLabeledElement {
this.parent = parent;
this.labels = labels;
}
public RelevancyMeasures(NBLabeledElement parent, Object... labels) {
this.parent = parent;
this.labels = NBLabels.forKV(labels);
}
public RelevancyMeasures(NBLabeledElement parent, Map<String,String> labels) {
this(parent,NBLabels.forMap(labels));
}
@ -50,9 +55,9 @@ public class RelevancyMeasures implements NBLabeledElement {
}
public RelevancyMeasures addFunction(RelevancyFunction... f) {
for (RelevancyFunction function : this.functions) {
for (RelevancyFunction function : f) {
this.functions.add(function);
DoubleSummaryGauge gauge = ActivityMetrics.summaryGauge(function, function.getName());
DoubleSummaryGauge gauge = ActivityMetrics.summaryGauge(function, function.getUniqueName());
this.gauges.add(gauge);
}
return this;