binding improvements

2025-01-11 00:12:04 -06:00 · 2024-10-30 13:08:55 -05:00 · 2024-10-30 13:08:55 -05:00 · d0cd231a44
commit d0cd231a44
parent 5a737c72ef
4 changed files with 38 additions and 10 deletions
--- a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSampler.java
+++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSampler.java
@ -47,7 +47,10 @@ import java.util.stream.Collectors;
 * you can use some combining functions to tabulate these prior to sampling. In that case, you can use
 * any of "sum", "avg", "count", "min", or "max" as the reducing function on the value in the weight column.
 * If none are specified, then "sum" is used by default. All modes except "count" and "name" require a valid weight
- * column to be specified.
+ * column to be specified. These functions apply to the reduction of labels in the selected CSV column, and
+ * only apply when there is more than one row with the same value in that named column. Thus, the order
+ * of appearance row-by-row will be preserved in cases that all values in that column are distinct. This means
+ * that if you have multiple associated values on a given row, you can use the same
 *
 * <UL>
 *     <LI>sum, avg, min, max - takes the given stat for the weight of each distinct label</LI>
@ -133,7 +136,7 @@ public class CSVSampler implements LongFunction<String> {

        final Function<LabeledStatistic, Double> valFunc = weightFunc;

-        Map<String, LabeledStatistic> entries = new HashMap<>();
+        Map<String, LabeledStatistic> entries = new LinkedHashMap<>();

        for (String filename : data) {
            if (!filename.endsWith(".csv")) {
--- a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_string/MatchRegex.java
+++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_string/MatchRegex.java
@ -40,7 +40,7 @@ public class MatchRegex implements Function<String,String>  {
    private final MatchEntry[] entries;

    @Example({"MatchRegex('.*(25|6to4).*','$1')","Match 25 or 6 to 4 and set the output to only that"})
-    @Example({"MatchRegex('([0-9]+)-([0-9]+)-([0-9]+)','$1 $2 $3'", "replaced dashes with spaces in a 10 digit US phone number."})
+    @Example({"MatchRegex('([0-9]+)-([0-9]+)-([0-9]+)','$1 $2 $3'", "replace dashes with spaces in a 10 digit US phone number."})
    @SuppressWarnings("unchecked")
    public MatchRegex(String... specs) {
        if ((specs.length%2)!=0) {
--- a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSamplerTest.java
+++ b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSamplerTest.java
@ -22,6 +22,7 @@ import org.assertj.core.data.Percentage;
 import org.junit.jupiter.api.Test;

 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.Map;

 import static org.assertj.core.api.Assertions.assertThat;
@ -148,6 +149,30 @@ public class CSVSamplerTest {
        assertThat(results.get("alpha")).isCloseTo(20000, Percentage.withPercentage(2.0d));
    }

+    /**
+     * If there is no variation in aggregation, then bindings for different named columns
+     * should produce values from the same line for the same input
+     */
+    @Test
+    public void testStablePairingForSum() {
+        Map<String,String> expected = new LinkedHashMap<>() {{
+            put("1","one");
+            put("2","two");
+            put("3","three");
+            put("4","four");
+            put("5","five");
+            put("6","six");
+        }};
+        CSVSampler sampler1 = new CSVSampler("weight", "does not matter", "name", "basicdata");
+        CSVSampler sampler2 = new CSVSampler("wname", "does not matter", "name", "basicdata");
+
+        for (int i = 0; i < 1000; i++) {
+            String v1 = sampler1.apply(i);
+            String v2 = sampler2.apply(i);
+            assertThat(expected.get(v1)).isEqualTo(v2);
+        }
+
+    }


 }
--- a/nb-virtdata/virtdata-lib-basics/src/test/resources/basicdata.csv
+++ b/nb-virtdata/virtdata-lib-basics/src/test/resources/basicdata.csv
@ -1,7 +1,7 @@
-NAME,WEIGHT,MEMO
-alpha,1,this is sparta
-beta,2,this is sparta
-gamma,3,this is sparta
-delta,4,this is sparta
-epsilon,5,this is sparta
-alpha,6,this is sparta
+NAME,WEIGHT,MEMO,WNAME
+alpha,1,this is sparta,one
+beta,2,this is sparta,two
+gamma,3,this is sparta,three
+delta,4,this is sparta,four
+epsilon,5,this is sparta,five
+alpha,6,this is sparta,six