diff --git a/nb-virtdata/virtdata-api/src/main/java/io/nosqlbench/virtdata/core/templates/BindPointParser.java b/nb-virtdata/virtdata-api/src/main/java/io/nosqlbench/virtdata/core/templates/BindPointParser.java index 7e2b9fe84..712f2d44b 100644 --- a/nb-virtdata/virtdata-api/src/main/java/io/nosqlbench/virtdata/core/templates/BindPointParser.java +++ b/nb-virtdata/virtdata-api/src/main/java/io/nosqlbench/virtdata/core/templates/BindPointParser.java @@ -26,6 +26,8 @@ import java.util.function.BiFunction; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static java.util.regex.Matcher.quoteReplacement; + /** * BindPointParser parses a user-provide string template into spans. It builds a simple list of * BindPoints, and provides both the parsed spans and the BindPoints in a result. @@ -51,10 +53,10 @@ public class BindPointParser implements BiFunction, List spans = new ArrayList<>(); List bindpoints = new ArrayList<>(); - int genid=0; + int genid = 0; while (m.find()) { String pre = template.substring(lastMatch, m.start()); - spans.add(pre); + spans.add(unescape(pre)); lastMatch = m.end(); String reference = m.group("reference"); @@ -64,21 +66,29 @@ public class BindPointParser implements BiFunction, bindpoints.add(BindPoint.of(reference, bindings.getOrDefault(reference, null), BindPoint.Type.reference)); spans.add(reference); } else if (inline1 != null) { - bindpoints.add(BindPoint.of(DEFINITION, inline1, BindPoint.Type.definition)); + bindpoints.add(BindPoint.of(DEFINITION, unescape(inline1), BindPoint.Type.definition)); spans.add(inline1); } else if (inline2 != null) { - bindpoints.add(BindPoint.of(DEFINITION, inline2, BindPoint.Type.definition)); + bindpoints.add(BindPoint.of(DEFINITION, unescape(inline2), BindPoint.Type.definition)); spans.add(inline2); } else { throw new BasicError("Unable to parse: " + template); } } - spans.add(lastMatch >= 0 ? template.substring(lastMatch) : template); + spans.add(lastMatch >= 0 ? unescape(template.substring(lastMatch)) : unescape(template)); + for (String span : spans) { + + } return new Result(spans, bindpoints); } + private static String unescape(String s) { + String s1 = s.replaceAll("\\\\n", "\n"); + return s1; + } + public final static class Result { private final List spans; diff --git a/nb-virtdata/virtdata-api/src/test/java/io/nosqlbench/virtdata/core/templates/BindPointParserTest.java b/nb-virtdata/virtdata-api/src/test/java/io/nosqlbench/virtdata/core/templates/BindPointParserTest.java index f30afda4b..86ed9a9f6 100644 --- a/nb-virtdata/virtdata-api/src/test/java/io/nosqlbench/virtdata/core/templates/BindPointParserTest.java +++ b/nb-virtdata/virtdata-api/src/test/java/io/nosqlbench/virtdata/core/templates/BindPointParserTest.java @@ -88,4 +88,18 @@ public class BindPointParserTest { } + @Test + public void testUnescapingNewlines() { + BindPointParser bpp = new BindPointParser(); + assertThat(bpp.apply("a{{Template(\"-{}-\",Combinations(\"a-z\"))}}\\nb", Map.of())).isEqualTo( + new BindPointParser.Result( + List.of("a","Template(\"-{}-\",Combinations(\"a-z\"))","\nb"), + List.of( + BindPoint.of(BindPointParser.DEFINITION,"Template(\"-{}-\",Combinations(\"a-z\"))", BindPoint.Type.definition) + ) + ) + ); + + } + } diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/core/stathelpers/AliasSamplerDoubleInt.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/core/stathelpers/AliasSamplerDoubleInt.java index 693aedc29..bcd789451 100644 --- a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/core/stathelpers/AliasSamplerDoubleInt.java +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/core/stathelpers/AliasSamplerDoubleInt.java @@ -38,9 +38,9 @@ public class AliasSamplerDoubleInt implements DoubleToIntFunction { private final ByteBuffer stats; // tuples of double,int,int (unfair coin, direct pointers to referents) private final double slotCount; // The number of fair die-roll slotCount that contain unfair coin probabilities private static final int _r0=0; - private static final int _r1=_r0+Double.BYTES; - private static final int _r2=_r1+Integer.BYTES; - public static int RECORD_LEN = _r2 + Integer.BYTES; // Record size for the above. + private static final int _r1=_r0+Double.BYTES; // unfair coin + private static final int _r2=_r1+Integer.BYTES; // + referent 1 + public static int RECORD_LEN = _r2 + Integer.BYTES; // + referent 2 = Record size for the above. // for testing AliasSamplerDoubleInt(ByteBuffer stats) { diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSampler.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSampler.java index 561d2141d..2e80ae8c0 100644 --- a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSampler.java +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/CSVSampler.java @@ -61,7 +61,9 @@ import java.util.stream.Collectors; * the values will appear monotonically as you scan through the unit interval of all long values. * Specifically, 0L represents 0.0d in the unit interval on input, and Long.MAX_VALUE represents * 1.0 on the unit interval.) This mode is only recommended for advanced scenarios and should otherwise be - * avoided. You will know if you need this mode. + * avoided. You will know if you need this mode. For alias sampling, the values may not always occur + * in the order specified due to the alias table construction. However, the values will be clustered in the order + * they appear in that table. * */ @Categories(Category.general) diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/WeightedInts.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/WeightedInts.java new file mode 100644 index 000000000..f3cadfb7c --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/distributions/WeightedInts.java @@ -0,0 +1,95 @@ +package io.nosqlbench.virtdata.library.basics.shared.distributions; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import io.nosqlbench.nb.api.errors.BasicError; +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.basics.core.stathelpers.AliasSamplerDoubleInt; +import io.nosqlbench.virtdata.library.basics.core.stathelpers.EvProbD; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashInterval; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashRange; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.ScaledDouble; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.LongToDoubleFunction; +import java.util.function.LongToIntFunction; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +@ThreadSafeMapper +@Categories(Category.distributions) +public class WeightedInts implements LongToIntFunction { + private final AliasSamplerDoubleInt sampler; + private final LongToIntFunction function; + + public WeightedInts(String spec, String... modifiers) { + sampler = new AliasSamplerDoubleInt(parseWeights(spec)); + this.function = applyModifiers(sampler, modifiers); + } + + private LongToIntFunction applyModifiers(AliasSamplerDoubleInt aliasSampler, String[] modifiers) { + + String mode = "hash"; + + for (String modifier : modifiers) { + switch (modifier) { + case "map": + mode = "map"; + break; + default: + throw new RuntimeException("Unrecognized modifier: " + modifier); + } + } + + if (mode.equals("hash")) { + HashInterval f2 = new HashInterval(0.0d, 1.0d); + return (long l) -> aliasSampler.applyAsInt(f2.applyAsDouble(l)); + } else if (mode.equals("map")) { + ScaledDouble f1 = new ScaledDouble(); + return (long l) -> aliasSampler.applyAsInt(f1.applyAsDouble(l)); + } else { + throw new BasicError("Unable to determine mapping mode for weighted ints function"); + } + } + + private final static Pattern weight = Pattern.compile( + "(?\\d+)(:(?[0-9.]+))?([; ,]+)?" + ); + + private List parseWeights(String spec) { + List events = new ArrayList<>(); + Matcher matcher = weight.matcher(spec); + while (matcher.find()) { + int value = Integer.parseInt(matcher.group("value")); + String weightSpec = matcher.group("weight"); + double weight = (weightSpec != null) ? Double.parseDouble(weightSpec) : 1.0d; + events.add(new EvProbD(value, weight)); + } + return events; + } + + @Override + public int applyAsInt(long value) { + return function.applyAsInt(value); + } +} diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/HashInterval.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/HashInterval.java new file mode 100644 index 000000000..1d83b80e8 --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/HashInterval.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_double; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash; + +import java.util.function.LongToDoubleFunction; + +/** + * Create a double value from a hashed long, over the valid range of long inputs. + * This version provides a strict unit interval value, not a unit range value. + * That is, it can yield any value between 0.0 and 1.0, EXCEPT 1.0. + */ +@ThreadSafeMapper +@Categories({Category.general}) +public class HashInterval implements LongToDoubleFunction { + + private final double min; + private final double max; + private final double interval; + private final static double MAX_DOUBLE_VIA_LONG_PHI = ((double) Long.MAX_VALUE)+1026d; + private final Hash hash = new Hash(); + + public HashInterval(double min, double max) { + this.min = min; + this.max = max; + this.interval = max - min; + if (min>max) { + throw new RuntimeException("min must be less than or equal to max"); + } + } + + @Override + public double applyAsDouble(long value) { + long hashed = hash.applyAsLong(value); + double unitScale = ((double) hashed) / MAX_DOUBLE_VIA_LONG_PHI; + double valueScaled =interval*unitScale + min; + return valueScaled; + } + +} diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/ScaledDouble.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/ScaledDouble.java new file mode 100644 index 000000000..7e8f8e8b1 --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/ScaledDouble.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022-2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_double; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash; + +import java.util.function.LongToDoubleFunction; + +/* + *

This function attempts to take a double + * unit interval value from a long/long division over the whole + * range of long values but via double value types, thus providing + * a very linear sample. This means that the range of double + * values to be accessed will not fall along all possible doubles, + * but will still provide suitable values for ranges close to + * high-precision points in the IEEE floating point number line. + * This suffices for most reasonable ranges in practice outside + * of scientific computing, where large exponents put adjacent + * IEEE floating point values much further apart.

+ * + *

This should be consider the default double range sampling + * function for most uses, when the exponent is not needed for + * readability.

+ */ + +/** + * Return the double value closest to the fraction (input) / (Long.MAX_VALUE). + * This is essentially a scaling function from Long to Double over the range of + * positive longs to the double unit interval, so [0.0d - 1.0d) + */ +@ThreadSafeMapper +@Categories({Category.general}) +public class ScaledDouble implements LongToDoubleFunction { + + public final static double MAX_DOUBLE_VIA_LONG_PHI = ((double) Long.MAX_VALUE)+1026d; + + public ScaledDouble(){} + + @Override + public double applyAsDouble(long value) { + double unitScaled = ((double) value) / MAX_DOUBLE_VIA_LONG_PHI; + return unitScaled; + } +} diff --git a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/distributions/WeightedIntsTest.java b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/distributions/WeightedIntsTest.java new file mode 100644 index 000000000..f23b7c258 --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/distributions/WeightedIntsTest.java @@ -0,0 +1,74 @@ +package io.nosqlbench.virtdata.library.basics.shared.distributions; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import org.assertj.core.data.Offset; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +public class WeightedIntsTest { + @Test + public void testWeightedInts() { + WeightedInts weightedInts = new WeightedInts("10:10 20:20: 30:30 40:40", "map"); + assertThat(weightedInts.applyAsInt(0L)).isEqualTo(10); + assertThat(weightedInts.applyAsInt(1L)).isEqualTo(10); + assertThat(weightedInts.applyAsInt(Long.MAX_VALUE)).isEqualTo(40); + assertThat(weightedInts.applyAsInt(Long.MAX_VALUE-1L)).isEqualTo(40); + } + + @Test + public void testDistributionError() { + WeightedInts weightedInts = new WeightedInts("10:10 20:20: 30:30 40:40"); + double[] weights =new double[100]; + + long count = 1000000; + for (long i = 0; i < count; i++) { + int value = weightedInts.applyAsInt(i); + weights[value]++; + } + + // Verify that each label has been sampled at a frequency which is within + // 0.1% of the expected value. + Offset offset = Offset.offset(((double)count)/1000d); + + assertThat(weights[10]).isCloseTo(((double)count)*(10.d/100.d), offset); + assertThat(weights[20]).isCloseTo(((double)count)*(20.d/100.d), offset); + assertThat(weights[30]).isCloseTo(((double)count)*(30.d/100.d), offset); + assertThat(weights[40]).isCloseTo(((double)count)*(40.d/100.d), offset); + + } + + @Test + @Disabled("leaving here to show boundary check logic for PHI") + public void boundaryCheck() { + for (long i = 0; i < 100000000; i++) { + double pad = ((double) i)*1.0; + double denominator = ((double) Long.MAX_VALUE) + pad; + double scaled = ((double) Long.MAX_VALUE) / denominator; + if (scaled < 1.0d) { + System.out.println("phi:" + i); + break; + } + } + + } +} diff --git a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/HashIntervalTest.java b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/HashIntervalTest.java index c7ac496ed..c21b8c5ff 100644 --- a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/HashIntervalTest.java +++ b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/HashIntervalTest.java @@ -17,6 +17,7 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; import io.nosqlbench.nb.api.errors.BasicError; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; @@ -37,4 +38,15 @@ public class HashIntervalTest { assertThatExceptionOfType(BasicError.class) .isThrownBy(() -> new HashInterval(3L, 3L)); } + + @Test + @Disabled("exhaustive boundary check, expensive") + public void testDoubleHashIntervalBounds() { + io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashInterval hi = new io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashInterval(0.0, 1.0); + for (long i = 0; i < 100000000; i++) { + double v = hi.applyAsDouble(i); + assertThat(v).isGreaterThanOrEqualTo(0.0d); + assertThat(v).isLessThan(1.0d); + } + } } diff --git a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java index 0f1b6fb10..78e15c06a 100644 --- a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java +++ b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java @@ -2,13 +2,13 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; /* * Copyright (c) 2022 nosqlbench - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -26,7 +26,7 @@ import static org.junit.jupiter.api.Assertions.*; public class SequenceOfTest { @Test - public void testSequenceSimple() { + public void testSequenceSimpleToLong() { SequenceOf so = new SequenceOf(1L,"0 1 2 3 4 5 6 7 8 9"); long[] results = new long[10]; for (int i = 0; i < 10; i++) { @@ -38,7 +38,20 @@ public class SequenceOfTest { } @Test - public void testSequenceWeighted() { + public void testSequenceSimpleToInt() { + io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.SequenceOf so = new io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.SequenceOf(1,"0 1 2 3 4 5 6 7 8 9"); + long[] results = new long[10]; + for (int i = 0; i < 10; i++) { + results[i] = so.applyAsInt(i); + } + for (int i = 0; i < 10; i++) { + assertEquals(i,results[i]); + } + } + + + @Test + public void testSequenceWeightedToLong() { SequenceOf so = new SequenceOf(1L,"0:6 1 2 3 4"); int samples = 100; long[] results = new long[samples]; @@ -56,7 +69,29 @@ public class SequenceOfTest { assertThat(results[8]).isEqualTo(3); assertThat(results[9]).isEqualTo(4); assertThat(results[10]).isEqualTo(0); + } + + @Test + public void testSequenceWeightedToInt() { + io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.SequenceOf so = new io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.SequenceOf(1,"0:6 1 2 3 4"); + int samples = 100; + long[] results = new long[samples]; + for (int i = 0; i < samples; i++) { + results[i]=so.applyAsInt(i); + } + assertThat(results[0]).isEqualTo(0); + assertThat(results[1]).isEqualTo(0); + assertThat(results[2]).isEqualTo(0); + assertThat(results[3]).isEqualTo(0); + assertThat(results[4]).isEqualTo(0); + assertThat(results[5]).isEqualTo(0); + assertThat(results[6]).isEqualTo(1); + assertThat(results[7]).isEqualTo(2); + assertThat(results[8]).isEqualTo(3); + assertThat(results[9]).isEqualTo(4); + assertThat(results[10]).isEqualTo(0); } + } diff --git a/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java b/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java index 8609c24a3..3965b70b7 100644 --- a/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java +++ b/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java @@ -21,6 +21,45 @@ import io.nosqlbench.virtdata.api.annotations.Category; import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; import org.apache.commons.math4.legacy.distribution.EnumeratedIntegerDistribution; +/** + * Create a sampler based on enumeration of integer values an sample over them + * using the EnumeratedInts distribution curve provided by Apache Commons Math. + * This version will roughly produce the distribution, but since it also relies on + * interpolation by default, non-step values may appear at low frequencies. If this + * is a desired effect, then this function is suitable. For example: consider this + * result: + *
{@code
+ *  nb5 run driver=stdout op="{{EnumeratedInts('10:10 20:20 30:30 40:40')}}\n" cycles=10000 | sort -n | uniq -c
+ *       1 STDOUT0 (pending,current,complete)=(0,0,10000) 100.00% (last report)
+ *       1 9
+ *    1036 10
+ *       2 11
+ *       2 13
+ *       1 14
+ *       3 15
+ *       2 16
+ *       1 18
+ *       1 19
+ *    1937 20
+ *       1 21
+ *       1 23
+ *       1 24
+ *       1 25
+ *       1 28
+ *       1 29
+ *    3077 30
+ *       1 31
+ *       1 33
+ *       1 34
+ *       2 35
+ *       1 37
+ *       1 39
+ *    3924 40
+ * }
+ * + * The values here which are not multiples of 10 are not specified, yet the appear. For some testing, this is + * helpful as a fuzzer, but for more precise step-value sampling, see {@link AliasSampler} + */ @ThreadSafeMapper @Categories({Category.distributions}) public class EnumeratedInts extends IntToIntDiscreteCurve {