binding improvements

This commit is contained in:
Jonathan Shook 2024-10-30 13:08:55 -05:00
parent 5a737c72ef
commit d0cd231a44
4 changed files with 38 additions and 10 deletions

View File

@ -47,7 +47,10 @@ import java.util.stream.Collectors;
* you can use some combining functions to tabulate these prior to sampling. In that case, you can use
* any of "sum", "avg", "count", "min", or "max" as the reducing function on the value in the weight column.
* If none are specified, then "sum" is used by default. All modes except "count" and "name" require a valid weight
* column to be specified.
* column to be specified. These functions apply to the reduction of labels in the selected CSV column, and
* only apply when there is more than one row with the same value in that named column. Thus, the order
* of appearance row-by-row will be preserved in cases that all values in that column are distinct. This means
* that if you have multiple associated values on a given row, you can use the same
*
* <UL>
* <LI>sum, avg, min, max - takes the given stat for the weight of each distinct label</LI>
@ -133,7 +136,7 @@ public class CSVSampler implements LongFunction<String> {
final Function<LabeledStatistic, Double> valFunc = weightFunc;
Map<String, LabeledStatistic> entries = new HashMap<>();
Map<String, LabeledStatistic> entries = new LinkedHashMap<>();
for (String filename : data) {
if (!filename.endsWith(".csv")) {

View File

@ -40,7 +40,7 @@ public class MatchRegex implements Function<String,String> {
private final MatchEntry[] entries;
@Example({"MatchRegex('.*(25|6to4).*','$1')","Match 25 or 6 to 4 and set the output to only that"})
@Example({"MatchRegex('([0-9]+)-([0-9]+)-([0-9]+)','$1 $2 $3'", "replaced dashes with spaces in a 10 digit US phone number."})
@Example({"MatchRegex('([0-9]+)-([0-9]+)-([0-9]+)','$1 $2 $3'", "replace dashes with spaces in a 10 digit US phone number."})
@SuppressWarnings("unchecked")
public MatchRegex(String... specs) {
if ((specs.length%2)!=0) {

View File

@ -22,6 +22,7 @@ import org.assertj.core.data.Percentage;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import static org.assertj.core.api.Assertions.assertThat;
@ -148,6 +149,30 @@ public class CSVSamplerTest {
assertThat(results.get("alpha")).isCloseTo(20000, Percentage.withPercentage(2.0d));
}
/**
* If there is no variation in aggregation, then bindings for different named columns
* should produce values from the same line for the same input
*/
@Test
public void testStablePairingForSum() {
Map<String,String> expected = new LinkedHashMap<>() {{
put("1","one");
put("2","two");
put("3","three");
put("4","four");
put("5","five");
put("6","six");
}};
CSVSampler sampler1 = new CSVSampler("weight", "does not matter", "name", "basicdata");
CSVSampler sampler2 = new CSVSampler("wname", "does not matter", "name", "basicdata");
for (int i = 0; i < 1000; i++) {
String v1 = sampler1.apply(i);
String v2 = sampler2.apply(i);
assertThat(expected.get(v1)).isEqualTo(v2);
}
}
}

View File

@ -1,7 +1,7 @@
NAME,WEIGHT,MEMO
alpha,1,this is sparta
beta,2,this is sparta
gamma,3,this is sparta
delta,4,this is sparta
epsilon,5,this is sparta
alpha,6,this is sparta
NAME,WEIGHT,MEMO,WNAME
alpha,1,this is sparta,one
beta,2,this is sparta,two
gamma,3,this is sparta,three
delta,4,this is sparta,four
epsilon,5,this is sparta,five
alpha,6,this is sparta,six

1 NAME WEIGHT MEMO WNAME
2 alpha 1 this is sparta one
3 beta 2 this is sparta two
4 gamma 3 this is sparta three
5 delta 4 this is sparta four
6 epsilon 5 this is sparta five
7 alpha 6 this is sparta six