From ff142fd0b2b85ea2c9c5586c0c24575989443932 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 18 Nov 2021 16:35:20 -0600 Subject: [PATCH] refactor/improve stat samplers for efficiency and correctness --- devdocs/sketches/stat_samplers.md | 65 ++++++++++++++++ .../from_long/to_double/Interpolate.java | 74 ++++++++++++++----- .../shared/from_long/to_long/Interpolate.java | 17 +---- .../from_long/to_long/InterpolateTest.java | 24 +++++- .../common/InterpolatingIntDoubleSampler.java | 48 ++++++------ .../InterpolatingLongDoubleSampler.java | 59 +++++++++------ .../common/RealIntDoubleSampler.java | 10 ++- .../common/RealLongDoubleSampler.java | 12 +-- .../IntToDoubleContinuousCurve.java | 5 +- .../LongToDoubleContinuousCurve.java | 2 +- .../common/InterpolatingIntIntSampler.java | 42 +++++------ .../common/InterpolatingIntLongSampler.java | 40 +++++----- .../common/InterpolatingLongIntSampler.java | 36 +++++---- .../common/InterpolatingLongLongSampler.java | 36 +++++---- .../library/curves4/continuous/LevyTest.java | 2 +- .../RealDistributionsValuesTest.java | 21 ++++-- .../IntegerDistributionsConcurrencyTest.java | 2 +- .../IntegerDistributionsValuesTest.java | 13 +++- .../category_blurbs/funcref_distributions.md | 29 ++++---- .../io/virtdata/IntegratedCurvesTest.java | 4 +- 20 files changed, 336 insertions(+), 205 deletions(-) create mode 100644 devdocs/sketches/stat_samplers.md diff --git a/devdocs/sketches/stat_samplers.md b/devdocs/sketches/stat_samplers.md new file mode 100644 index 000000000..25fa6d063 --- /dev/null +++ b/devdocs/sketches/stat_samplers.md @@ -0,0 +1,65 @@ +This diagram shows the base implementations of all the statistical sampler +wrappers, the types they implement, and the helper functions which are key +to their operation. + + +```plantuml + +digraph samplers { + rankdir=LR; + node[shape=box]; + + subgraph cluster0 { + label="continuous" + subgraph cluster3 { + label="int->double" + IntToDoubleContinuousCurve[shape=box] + IntToDoubleContinuousCurve -> IntToDoubleFunction[style=dashed] + IntToDoubleContinuousCurve -> InterpolatingIntDoubleSampler + IntToDoubleContinuousCurve -> RealIntDoubleSampler + } + subgraph cluster4 { + label="long->double" + LongToDoubleContinuousCurve[shape=box] + LongToDoubleContinuousCurve -> LongToDoubleFunction[style=dashed] + LongToDoubleContinuousCurve -> InterpolatingLongDoubleSampler + LongToDoubleContinuousCurve -> RealLongDoubleSampler + } + } + subgraph cluster1 { + label="discrete" + subgraph cluster5 { + label="int->int" + IntToIntDiscreteCurve[shape=box] + IntToIntDiscreteCurve -> IntUnaryOperator[style=dashed] + IntToIntDiscreteCurve -> InterpolatingIntIntSampler + IntToIntDiscreteCurve -> DiscreteIntIntSampler + } + + subgraph cluster6 { + label="int->long" + IntToLongDiscreteCurve[shape=box] + IntToLongDiscreteCurve -> IntToLongFunction[style=dashed] + IntToLongDiscreteCurve -> InterpolatingIntLongSampler + IntToLongDiscreteCurve -> DiscreteIntLongSampler + } + + subgraph cluster7 { + label="long->int" + LongToIntDiscreteCurve[shape=box] + LongToIntDiscreteCurve -> LongToIntFunction[style=dashed] + LongToIntDiscreteCurve ->InterpolatingLongIntSampler + LongToIntDiscreteCurve ->DiscreteLongIntSampler + } + + subgraph cluster8 { + label="long->long" + LongToLongDiscreteCurve[shape=box] + LongToLongDiscreteCurve -> LongUnaryOperator[style=dashed] + LongToLongDiscreteCurve ->InterpolatingLongLongSampler + LongToLongDiscreteCurve ->DiscreteLongLongSampler + } + + } +} +``` diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java index 4d509f7e5..1a9b68e13 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java @@ -8,36 +8,76 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; import java.util.function.LongToDoubleFunction; +/** + * Return a value along an interpolation curve. This allows you to sketch a basic + * density curve and describe it simply with just a few values. The number of values + * provided determines the resolution of the internal lookup table that is used for + * interpolation. The first value is always the 0.0 anchoring point on the unit interval. + * The last value is always the 1.0 anchoring point on the unit interval. This means + * that in order to subdivide the density curve in an interesting way, you need to provide + * a few more values in between them. Providing two values simply provides a uniform + * sample between a minimum and maximum value. + * + * The input range of this function is, as many of the other functions in this library, + * based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive. + * This means that if you want to combine interpolation on this curve with the effect of + * pseudo-random sampling, you need to put a hash function ahead of it in the flow. + * + * Developer Note: This is the canonical implementation of LERPing in NoSQLBench, so is + * heavily documented. Any other LERP implementations should borrow directly from this, + * embedding by default. + */ @ThreadSafeMapper @Categories({Category.general}) public class Interpolate implements LongToDoubleFunction { - private final double scale; + // How many values we have to pick from + private final double resolution; + + // The lookup table private final double[] lut; - private final static double maxLongAsDouble = (double) Long.MAX_VALUE; + + /** + * The scale of Long.MAX_VALUE and the unit interval scale factor are pre-combined + * here to reduce the number of operations later. + * + * The LUT size is retained as the number of elements provided (resolution) + 1. + * The +1 element serves as the N+1 index for when the unit interval sample is + * 1.0. In other words, the maximum value is not a special case, as a duplicate + * value is appended to the LUT instead. + * + * This size is the scale factor from the unit interval to the array index. Since + * the input comes in as a long value, it is mapped from [0L, Long.MAX_VALUE] to + * [0.0D, 1.0D] by multiplying by (1.0/(double)Long.MAX_VALUE). The long input + * value can then be multiplied directly to yield a double in the range of + * [0,LUT.length-1], which simplifies all remaining LERP math. + * + */ + private final double scaleToLongInterval; - @Example({"Interpolate(0.0d,100.0d)","return a uniform double value between 0.0d and 100.0d"}) - @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"}) + @Example({"Interpolate(0.0d,100.0d)", "return a uniform double value between 0.0d and 100.0d"}) + @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)", "return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"}) public Interpolate(double... values) { - double[] doubles = new double[values.length+1]; - for (int i = 0; i < values.length; i++) { // not a ranging error - doubles[i]=values[i]; - } - doubles[doubles.length-1]=doubles[doubles.length-2]; - this.scale=values.length-1; + this.resolution = values.length; + double[] doubles = new double[values.length + 1]; + System.arraycopy(values,0,doubles,0,values.length); + doubles[doubles.length - 1] = doubles[doubles.length - 2]; this.lut = doubles; + this.scaleToLongInterval = (this.resolution - 1) * (1.0d / (double) Long.MAX_VALUE); } @Override public double applyAsDouble(long input) { - long value = input; - double samplePoint = ((double)input / maxLongAsDouble) * scale; - int leftidx = (int)samplePoint; - double fractional = samplePoint - (long)samplePoint; - double leftComponent = lut[leftidx]* (1.0d-fractional); - double rightComponent = lut[leftidx+1] * fractional; - double sample = (leftComponent + rightComponent); + // scale the input from [0,Long.MAX_VALUE] to [0.0,lut.length-1] + double samplePoint = scaleToLongInterval * input; + // truncate the sample point to the left index + int leftidx = (int) samplePoint; + // isolate the fractional component + double fractional = samplePoint - leftidx; + // take the sum of the left component and right component + // scaled by closeness to fractional point within the interval, respectively + double sample = (lut[leftidx] * (1.0d - fractional)) + (lut[leftidx + 1] * fractional); return sample; } diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java index e10a26c82..5d5ad1d7e 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java @@ -6,25 +6,10 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; import java.util.function.LongUnaryOperator; -/** - * Return a value along an interpolation curve. This allows you to sketch a basic - * density curve and describe it simply with just a few values. The number of values - * provided determines the resolution of the internal lookup table that is used for - * interpolation. The first value is always the 0.0 anchoring point on the unit interval. - * The last value is always the 1.0 anchoring point on the unit interval. This means - * that in order to subdivide the density curve in an interesting way, you need to provide - * a few more values in between them. Providing two values simply provides a uniform - * sample between a minimum and maximum value. - * - * The input range of this function is, as many of the other functions in this library, - * based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive. - * This means that if you want to combine interpolation on this curve with the effect of - * pseudo-random sampling, you need to put a hash function ahead of it in the flow. - */ @ThreadSafeMapper public class Interpolate implements LongUnaryOperator { - private io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc; + private final io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc; @Example({"Interpolate(0.0d,100.0d)","return a uniform long value between 0L and 100L"}) @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted long value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"}) diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java index bc76114a3..71ca3568c 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java @@ -1,11 +1,32 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.assertj.core.data.Offset; import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; public class InterpolateTest { + @Test + public void testRanging() { + io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate interpolate = + new io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate (0.0d, 1.0d); + Hash hf = new Hash(); + DescriptiveStatistics dss = new DescriptiveStatistics(); + long count=10000000; + for (long i = 0; i < count; i++) { + long input = (long) (Long.MAX_VALUE * ((double)i/(double)count)); + long prn = hf.applyAsLong(input); + double v = interpolate.applyAsDouble(prn); + dss.addValue(v); + } + assertThat(dss.getPercentile(0.000001)).isCloseTo(0.0, Offset.offset(0.01)); + assertThat(dss.getPercentile(99.99999)).isCloseTo(1.0, Offset.offset(0.01)); + } + + + @Test public void testDeciles() { long topvalue = 1_000_000_000L; @@ -26,6 +47,7 @@ public class InterpolateTest { long highvalue = (long) (Long.MAX_VALUE * 0.98d); long high = f.applyAsLong(highvalue); assertThat(high).isEqualTo(expected); + System.out.println(" -> was " + high); long highervalue = (long) (Long.MAX_VALUE * 0.9999d); long higher = f.applyAsLong(highervalue); @@ -35,4 +57,4 @@ public class InterpolateTest { assertThat(max).isEqualTo(1000000000L); } -} \ No newline at end of file +} diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java index d4999853e..c2b21620b 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java @@ -1,6 +1,6 @@ package io.nosqlbench.virtdata.library.curves4.continuous.common; -import io.nosqlbench.virtdata.library.curves4.discrete.common.ThreadSafeHash; +import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash; import java.util.Arrays; import java.util.function.DoubleUnaryOperator; @@ -10,11 +10,11 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{ private final double[] lut; private final DoubleUnaryOperator f; - private final int resolution; private final boolean clamp; private final double clampMin; private final double clampMax; - private ThreadSafeHash hash; + private final double scaleToIntRanged; + private Hash hash; public InterpolatingIntDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) { this.f = icdSource; @@ -22,47 +22,43 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{ this.clampMin = clampMin; this.clampMax = clampMax; if (hash) { - this.hash = new ThreadSafeHash(); + this.hash = new Hash(); } - double[] lut = precompute(resolution); + double[] computed = precompute(resolution); if (finite) { - while (lut.length>0 && Double.isInfinite(lut[0])) { - lut = Arrays.copyOfRange(lut,1,lut.length-1); + while (computed.length>0 && Double.isInfinite(computed[0])) { + computed = Arrays.copyOfRange(computed,1,computed.length-1); } - while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) { - lut = Arrays.copyOfRange(lut,0,lut.length-2); + while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) { + computed = Arrays.copyOfRange(computed,0,computed.length-2); } } - this.lut = lut; - this.resolution=lut.length-1; + double[] padded = new double[computed.length+1]; + System.arraycopy(computed,0,padded,0,computed.length); + this.scaleToIntRanged = (1.0d/(double)Integer.MAX_VALUE) * ((padded.length-2)); + this.lut = padded; } private double[] precompute(int resolution) { - double[] precomputed = new double[resolution+1]; - for (int s = 0; s <= resolution; s++) { // not a ranging error + double[] precomputed = new double[resolution]; + for (int s = 0; s < resolution; s++) { // not a ranging error double rangedToUnit = (double) s / (double) resolution; - double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit); + double sampleValue = f.applyAsDouble(rangedToUnit); + sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue; precomputed[s] = sampleValue; } - precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case return precomputed; } @Override public double applyAsDouble(int input) { - long value = input; if (hash!=null) { - value = hash.applyAsLong(value); + input = hash.applyAsInt(input); } - double unit = (double) value / (double) Long.MAX_VALUE; - double samplePoint = unit * resolution; - int leftidx = (int) samplePoint; - double leftPartial = samplePoint - leftidx; - - double leftComponent=(lut[leftidx] * (1.0-leftPartial)); - double rightComponent = (lut[leftidx+1] * leftPartial); - - double sample = leftComponent + rightComponent; + double samplePoint = scaleToIntRanged * input; + int leftidx = (int)samplePoint; + double fractional = samplePoint - leftidx; + double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional); return sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java index 060a9c762..3ca84c1b3 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java @@ -6,14 +6,30 @@ import java.util.Arrays; import java.util.function.DoubleUnaryOperator; import java.util.function.LongToDoubleFunction; +/** + * See {@link io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate} for + * details on implementation. + * + * For the 6 implementations of interpolating samplers which use inverse cumulative distribution tables, + * care should be given to the following: + * > + */ public class InterpolatingLongDoubleSampler implements LongToDoubleFunction { + private static final double MAX_LONG_AS_DOUBLE = Long.MAX_VALUE; + private final double[] lut; private final DoubleUnaryOperator f; - private final int resolution; private final boolean clamp; private final double clampMin; private final double clampMax; + private final double scaleToLong; private ThreadSafeHash hash; public InterpolatingLongDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) { @@ -24,44 +40,41 @@ public class InterpolatingLongDoubleSampler implements LongToDoubleFunction { this.clamp=clamp; this.clampMin=clampMin; this.clampMax=clampMax; - double[] lut = precompute(resolution); + double[] computed = precompute(resolution); if (finite) { - while (lut.length>0 && Double.isInfinite(lut[0])) { - lut = Arrays.copyOfRange(lut,1,lut.length-1); + while (computed.length>0 && Double.isInfinite(computed[0])) { + computed = Arrays.copyOfRange(computed,1,computed.length-1); } - while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) { - lut = Arrays.copyOfRange(lut,0,lut.length-2); + while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) { + computed = Arrays.copyOfRange(computed,0,computed.length-2); } } - this.lut = lut; - this.resolution = lut.length-1; + double[] padded = new double[computed.length+1]; + System.arraycopy(computed,0,padded,0,computed.length); + this.scaleToLong = (1.0d / (double) Long.MAX_VALUE) * (padded.length-2); + this.lut = padded; } private double[] precompute(int resolution) { - double[] precomputed = new double[resolution+1]; - for (int s = 0; s <= resolution; s++) { // not a ranging error + double[] precomputed = new double[resolution]; + for (int s = 0; s < resolution; s++) { // not a ranging error double rangedToUnit = (double) s / (double) resolution; - double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit); + double sampleValue = f.applyAsDouble(rangedToUnit); + sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue ; precomputed[s] = sampleValue; } - precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case return precomputed; } @Override - public double applyAsDouble(long value) { + public double applyAsDouble(long input) { if (hash!=null) { - value = hash.applyAsLong(value); + input = hash.applyAsLong(input); } - double unit = (double) value / (double) Long.MAX_VALUE; - double samplePoint = unit * resolution; - int leftidx = (int) samplePoint; - double leftPartial = samplePoint - leftidx; - - double leftComponent=(lut[leftidx] * (1.0-leftPartial)); - double rightComponent = (lut[leftidx+1] * leftPartial); - - double sample = leftComponent + rightComponent; + double samplePoint = scaleToLong * input; + int leftidx = (int)samplePoint; + double fractional = samplePoint - leftidx; + double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional); return sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java index 8dfd49c85..23f886824 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java @@ -10,15 +10,17 @@ public class RealIntDoubleSampler implements IntToDoubleFunction { private final DoubleUnaryOperator f; private final boolean clamp; private final double clampMax; + private final double clampMin; private ThreadSafeHash hash; - public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) { + public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) { this.f = parentFunc; if (hash) { this.hash = new ThreadSafeHash(); } - this.clamp = clamp; - this.clampMax = clampMax; + this.clamp = clamp | finite; + this.clampMin = Double.max(clampMin,Double.MIN_VALUE); + this.clampMax = Double.min(clampMax,Double.MAX_VALUE); } @Override @@ -28,7 +30,7 @@ public class RealIntDoubleSampler implements IntToDoubleFunction { value = hash.applyAsLong(value); } double unit = (double) value / (double) Long.MAX_VALUE; - double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit); + double sample =clamp ? Double.max(Double.min(clampMax,f.applyAsDouble(unit)),clampMin): f.applyAsDouble(unit); return sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java index e7ee0088d..7614d526f 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java @@ -10,24 +10,26 @@ public class RealLongDoubleSampler implements LongToDoubleFunction { private final DoubleUnaryOperator f; private final boolean clamp; private final double clampMax; + private final double clampMin; private ThreadSafeHash hash; - public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) { + public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) { this.f = parentFunc; if (hash) { this.hash = new ThreadSafeHash(); } - this.clamp = clamp; - this.clampMax=clampMax; + this.clamp = clamp | finite; + this.clampMin = Double.max(clampMin,Double.MIN_VALUE); + this.clampMax = Double.min(clampMax,Double.MAX_VALUE); } @Override public double applyAsDouble(long value) { - if (hash!=null) { + if (hash != null) { value = hash.applyAsLong(value); } double unit = (double) value / (double) Long.MAX_VALUE; - double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit); + double sample = clamp ? Double.max(clampMin, Double.min(clampMax, f.applyAsDouble(unit))) : f.applyAsDouble(unit); return sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java index 1d72983aa..fef6fc318 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java @@ -109,7 +109,6 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction { throw new RuntimeException("mods must not contain both "+ INFINITE +" and "+FINITE+"."); } - for (String s : modslist) { if (!validModifiers.contains(s)) { throw new RuntimeException("modifier '" + s + "' is not a valid modifier. Use one of " + validModifiers + " instead."); @@ -122,9 +121,9 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction { boolean finite = ( mods.contains(FINITE) || !mods.contains(INFINITE)); function = interpolate ? - new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Long.MAX_VALUE, finite) + new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, finite) : - new RealIntDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE); + new RealIntDoubleSampler(icdSource, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, true); } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java index c66a78078..7f51e3f84 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java @@ -124,7 +124,7 @@ public class LongToDoubleContinuousCurve implements LongToDoubleFunction { function = interpolate ? new InterpolatingLongDoubleSampler(icdSource, 1000, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, finite) : - new RealLongDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE); + new RealLongDoubleSampler(icdSource, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, true); } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java index fba6a6819..7d0cd1eb5 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java @@ -1,5 +1,7 @@ package io.nosqlbench.virtdata.library.curves4.discrete.common; +import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash; + import java.util.function.DoubleToIntFunction; import java.util.function.IntUnaryOperator; @@ -7,46 +9,42 @@ public class InterpolatingIntIntSampler implements IntUnaryOperator { private final double[] lut; private final DoubleToIntFunction f; - private int resolution; - private ThreadSafeHash hash; + private Hash hash; + private final double scaleToIntRanged; public InterpolatingIntIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { this.f = icdSource; - this.resolution = resolution; if (hash) { - this.hash = new ThreadSafeHash(); + this.hash = new Hash(); } - this.lut = precompute(); + double[] computed = precompute(resolution); + double[] padded = new double[computed.length+1]; + System.arraycopy(computed,0,padded,0,computed.length); + padded[padded.length-1] = padded[padded.length-2]; + scaleToIntRanged = (1.0d/Integer.MAX_VALUE)*(padded.length-2); + this.lut=padded; + } - private double[] precompute() { - double[] precomputed = new double[resolution+2]; - for (int s = 0; s <= resolution; s++) { // not a ranging error + private double[] precompute(int resolution) { + double[] precomputed = new double[resolution]; + for (int s = 0; s < resolution; s++) { // not a ranging error double rangedToUnit = (double) s / (double) resolution; int sampleValue = f.applyAsInt(rangedToUnit); precomputed[s] = sampleValue; } - precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case return precomputed; } @Override public int applyAsInt(int input) { - int value = input; - if (hash!=null) { - value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE); + input = hash.applyAsInt(input); } - - double unit = (double) value / (double) Integer.MAX_VALUE; - double samplePoint = unit * resolution; - int leftidx = (int) samplePoint; - double leftPartial = samplePoint - leftidx; - - double leftComponent=(lut[leftidx] * (1.0-leftPartial)); - double rightComponent = (lut[leftidx+1] * leftPartial); - - double sample = leftComponent + rightComponent; + double samplePoint = scaleToIntRanged * input; + int leftidx = (int)samplePoint; + double fractional = samplePoint - leftidx; + double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional); return (int) sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java index b7962e640..b8a87df8f 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java @@ -1,5 +1,7 @@ package io.nosqlbench.virtdata.library.curves4.discrete.common; +import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash; + import java.util.function.DoubleToIntFunction; import java.util.function.IntToLongFunction; @@ -7,46 +9,40 @@ public class InterpolatingIntLongSampler implements IntToLongFunction { private final double[] lut; private final DoubleToIntFunction f; - private int resolution; - private ThreadSafeHash hash; + private Hash hash; + private final double scaleToIntRanged; public InterpolatingIntLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { this.f = icdSource; - this.resolution = resolution; if (hash) { - this.hash = new ThreadSafeHash(); + this.hash = new Hash(); } - this.lut = precompute(); + double[] computed = precompute(resolution); + double[] padded = new double[computed.length+1]; + System.arraycopy(computed,0,padded,0,computed.length); + this.scaleToIntRanged = (1.0d / Integer.MAX_VALUE) * (padded.length-2); + this.lut=padded; } - private double[] precompute() { - double[] precomputed = new double[resolution+2]; - for (int s = 0; s <= resolution; s++) { // not a ranging error + private double[] precompute(int resolution) { + double[] precomputed = new double[resolution]; + for (int s = 0; s < resolution; s++) { // not a ranging error double rangedToUnit = (double) s / (double) resolution; int sampleValue = f.applyAsInt(rangedToUnit); precomputed[s] = sampleValue; } - precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case return precomputed; } @Override public long applyAsLong(int input) { - int value = input; - if (hash!=null) { - value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE); + input = hash.applyAsInt(input); } - - double unit = (double) value / (double) Integer.MAX_VALUE; - double samplePoint = unit * resolution; - int leftidx = (int) samplePoint; - double leftPartial = samplePoint - leftidx; - - double leftComponent=(lut[leftidx] * (1.0-leftPartial)); - double rightComponent = (lut[leftidx+1] * leftPartial); - - double sample = leftComponent + rightComponent; + double samplePoint = scaleToIntRanged * input; + int leftidx = (int)samplePoint; + double fractional = samplePoint - leftidx; + double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional); return (long) sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java index e18acefb0..7781085e2 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java @@ -7,43 +7,41 @@ public class InterpolatingLongIntSampler implements LongToIntFunction { private final double[] lut; private final DoubleToIntFunction f; - private int resolution; private ThreadSafeHash hash; + private final double scaleToLong; public InterpolatingLongIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { this.f = icdSource; - this.resolution = resolution; if (hash) { this.hash = new ThreadSafeHash(); } - this.lut = precompute(); + double[] computed = precompute(resolution); + double[] padded = new double[computed.length+1]; + System.arraycopy(computed,0,padded,0,computed.length); + padded[padded.length-1] = padded[padded.length-2]; + scaleToLong=(1.0d/Long.MAX_VALUE) * (padded.length-2); + this.lut=padded; } - private double[] precompute() { - double[] precomputed = new double[resolution+2]; - for (int s = 0; s <= resolution; s++) { // not a ranging error + private double[] precompute(int resolution) { + double[] precomputed = new double[resolution]; + for (int s = 0; s < resolution; s++) { // not a ranging error double rangedToUnit = (double) s / (double) resolution; int sampleValue = f.applyAsInt(rangedToUnit); precomputed[s] = sampleValue; } - precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case return precomputed; } @Override - public int applyAsInt(long value) { + public int applyAsInt(long input) { if (hash!=null) { - value = hash.applyAsLong(value); + input = hash.applyAsLong(input); } - double unit = (double) value / (double) Long.MAX_VALUE; - double samplePoint = unit * resolution; - int leftidx = (int) samplePoint; - double leftPartial = samplePoint - leftidx; - - double leftComponent=(lut[leftidx] * (1.0-leftPartial)); - double rightComponent = (lut[leftidx+1] * leftPartial); - - double sample = leftComponent + rightComponent; - return (int) sample; + double samplePoint = scaleToLong * input; + int leftidx = (int)samplePoint; + double fractional = samplePoint - leftidx; + double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional); + return (int)sample; } } diff --git a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java index ac96cd964..a8f76ae7f 100644 --- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java +++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java @@ -7,43 +7,41 @@ public class InterpolatingLongLongSampler implements LongUnaryOperator { private final double[] lut; private final DoubleToIntFunction f; - private int resolution; private ThreadSafeHash hash; + private final double scaleToLong; public InterpolatingLongLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { this.f = icdSource; - this.resolution = resolution; if (hash) { this.hash = new ThreadSafeHash(); } - this.lut = precompute(); + double[] computed = precompute(resolution); + double[] padded = new double[computed.length+1]; + System.arraycopy(computed,0,padded,0,computed.length); + padded[padded.length-1] = padded[padded.length-2]; + scaleToLong = (1.0d/Long.MAX_VALUE) * ((double)(padded.length-2)); + this.lut = padded; } - private double[] precompute() { - double[] precomputed = new double[resolution+2]; - for (int s = 0; s <= resolution; s++) { // not a ranging error + private double[] precompute(int resolution) { + double[] precomputed = new double[resolution]; + for (int s = 0; s < resolution; s++) { // not a ranging error double rangedToUnit = (double) s / (double) resolution; int sampleValue = f.applyAsInt(rangedToUnit); precomputed[s] = sampleValue; } - precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case return precomputed; } @Override - public long applyAsLong(long value) { + public long applyAsLong(long input) { if (hash!=null) { - value = hash.applyAsLong(value); + input = hash.applyAsLong(input); } - double unit = (double) value / (double) Long.MAX_VALUE; - double samplePoint = unit * resolution; - int leftidx = (int) samplePoint; - double leftPartial = samplePoint - leftidx; - - double leftComponent=(lut[leftidx] * (1.0-leftPartial)); - double rightComponent = (lut[leftidx+1] * leftPartial); - - double sample = leftComponent + rightComponent; - return (long) sample; + double samplePoint = scaleToLong * input; + int leftidx = (int)samplePoint; + double fractional = samplePoint - leftidx; + double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional); + return (long)sample; } } diff --git a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java index 326872551..62d36072e 100644 --- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java +++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java @@ -11,7 +11,7 @@ public class LevyTest { @Test public void testLevy() { Levy levy = new Levy(2.3d, 1.0d); - assertThat(levy.applyAsDouble(10L)).isCloseTo(2.938521849905433, Offset.offset(0.000001d)); + assertThat(levy.applyAsDouble(10L)).isCloseTo(2.9379325000660304, Offset.offset(0.000001d)); } } diff --git a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java index 114551d40..7200f009f 100644 --- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java +++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java @@ -17,7 +17,7 @@ public class RealDistributionsValuesTest { @Test public void testComputedNormal() { - RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000); + RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000,1); System.out.println(runData); assertThat(runData.getFractionalPercentile(0.5D)) .isCloseTo(10.0D, Offset.offset(0.01D)); @@ -29,7 +29,7 @@ public class RealDistributionsValuesTest { @Test public void testInterpolatedNormal() { - RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000); + RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000,1); System.out.println(runData); assertThat(runData.getFractionalPercentile(0.5D)) .isCloseTo(10.0D, Offset.offset(0.01D)); @@ -41,7 +41,7 @@ public class RealDistributionsValuesTest { @Test public void testComputedUniform() { - RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000); + RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000,1); assertThat(runData.getFractionalPercentile(0.33D)) .isCloseTo(33.33D, Offset.offset(1.0D)); assertThat(runData.getFractionalPercentile(0.5D)) @@ -53,7 +53,7 @@ public class RealDistributionsValuesTest { @Test public void testInterpolatedUniform() { - RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000); + RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000,1); assertThat(runData.getFractionalPercentile(0.33D)) .isCloseTo(33.33D, Offset.offset(1.0D)); assertThat(runData.getFractionalPercentile(0.5D)) @@ -66,22 +66,27 @@ public class RealDistributionsValuesTest { @Test public void testInterpolatedMappedUniform() { Uniform mapper = new Uniform(0.0, 100.0, "map", "interpolate"); - RunData runData = iterateMapperDouble(mapper,10000000); + RunData runData = iterateMapperDouble(mapper,10000000,Long.MAX_VALUE/10000000L); + + assertThat(runData.getFractionalPercentile(0.001D)) + .isCloseTo(0.0D, Offset.offset(1.0D)); + assertThat(runData.getFractionalPercentile(0.999D)) - .isCloseTo(0.0D, Offset.offset(1.0D)); + .isCloseTo(099.99D, Offset.offset(1.0D)); assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0D, Offset.offset(1.0D)); + System.out.println(runData); } - private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations) { + private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations, long funcstep) { assertThat(mapper).isNotNull(); double[] samples = new double[iterations]; long time_generating = System.nanoTime(); for (int i = 0; i < iterations; i++) { - samples[i] = mapper.applyAsDouble(i); + samples[i] = mapper.applyAsDouble(i*funcstep); } long time_generated = System.nanoTime(); diff --git a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java index 7aaef004f..f8bf56573 100644 --- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java +++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java @@ -46,7 +46,7 @@ public class IntegerDistributionsConcurrencyTest { // threshold test against CDF expected = mapper.get((long) (0.03515d * (double) Long.MAX_VALUE)); assertThat(expected).isEqualTo(1); - expected = mapper.get((long) (0.03600d * (double) Long.MAX_VALUE)); + expected = mapper.get((long) (0.03700d * (double) Long.MAX_VALUE)); assertThat(expected).isEqualTo(2); } diff --git a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java index 5fdac781e..6e3ece037 100644 --- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java +++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java @@ -16,6 +16,7 @@ import static org.assertj.core.api.Assertions.assertThat; public class IntegerDistributionsValuesTest { + @Disabled @Test public void testComputedZipf() { @@ -70,13 +71,19 @@ public class IntegerDistributionsValuesTest { System.out.println(runData); } + @Test + public void testMaximumValue() { + Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map"); + assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D)); + } + @Test public void testInterpolatedMappedUniform() { Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map"); RunData runData = iterateMapperDouble(mapper,10000000); assertThat(runData.getFractionalPercentile(0.999D)) .isCloseTo(0.0D, Offset.offset(1.0D)); - assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.0001D)); + assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D)); } @@ -101,7 +108,11 @@ public class IntegerDistributionsValuesTest { double[] samples = new double[iterations]; long time_generating = System.nanoTime(); + int readout = iterations/10; for (int i = 0; i < iterations; i++) { + if ((i%readout)==0) { + System.out.println("i="+i+"/"+iterations); + } samples[i] = mapper.applyAsDouble(i); } long time_generated = System.nanoTime(); diff --git a/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md b/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md index 6476f40ad..a3ed315ed 100644 --- a/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md +++ b/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md @@ -69,23 +69,24 @@ little loss in accuracy, but the difference is generally negligible for nearly a #### Infinite or Finite -For interpolated samples, you also have the option of including or excluding infinite values -which may occur in some distributions. If you want to include them, use `infinite`, or `finite` -to explicitly avoid them (the default). Specifying 'infinite' doesn't guarantee that you will -see +Infinity or -Infinity, only that they are allowed. The Normal distribution often contains --Infinity and +Infinity, for example, due to the function used to estimate its cumulative -distribution. These values can often be valuable in finding corner cases which should be treated -uniformly according to [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754). +For interpolated samples from continuous distributions, you also have the option of including or +excluding infinite values which may occur in some distributions. If you want to include them, +use `infinite`, or `finite` to explicitly avoid them (the default). Specifying 'infinite' +doesn't guarantee that you will see +Infinity or -Infinity, only that they are allowed. The +Normal distribution often contains -Infinity and +Infinity, for example, due to the function +used to estimate its cumulative distribution. These values can often be valuable in finding +corner cases which should be treated uniformly according to +[IEEE 754](https://en.wikipedia.org/wiki/IEEE_754). #### Clamp or Noclamp -For interpolated samples, you also have the option of clamping the allowed values to the valid -range for the integral data type used as input. To clamp the output values to the range -(Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.MIN_VALUE,Integer. -MAX_VALUE) for int-double functions, specify `clamp`, which is also the default. To explicitly -disable this, use `noclamp`. This is useful when you know the downstream functions will only -work with a certain range of values without truncating conversions. When you are using double -values natively on the downstream functions, use `noclamp` to avoid limiting the domain of +For interpolated samples from continuous distributions, you also have the option of clamping the +allowed values to the valid range for the integral data type used as input. To clamp the output +values to the range (Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer. +MIN_VALUE,Integer.MAX_VALUE) for int-double functions, specify `clamp`, which is also the default. +To explicitly disable this, use `noclamp`. This is useful when you know the downstream functions +will only work with a certain range of values without truncating conversions. When you are using +double values natively on the downstream functions, use `noclamp` to avoid limiting the domain of values in your test data. (In this case, you might also consider `infinite`). ### Computed Samples diff --git a/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java b/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java index db6d3af9a..73b07729a 100644 --- a/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java +++ b/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java @@ -25,8 +25,8 @@ public class IntegratedCurvesTest { assertThat(runData.getStats().getPercentile(1.0d)).isCloseTo(1.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(10.0d)).isCloseTo(1.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(90.0d)).isCloseTo(6.0, Offset.offset(0.01d)); - assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(61.0, Offset.offset(0.01d)); - assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(311.0, Offset.offset(0.01d)); + assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(55.989, Offset.offset(0.01d)); + assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(202.999, Offset.offset(0.01d)); }