refactor/improve stat samplers for efficiency and correctness

This commit is contained in:
Jonathan Shook 2021-11-18 16:35:20 -06:00
parent d31fbd45fe
commit ff142fd0b2
20 changed files with 336 additions and 205 deletions

View File

@ -0,0 +1,65 @@
This diagram shows the base implementations of all the statistical sampler
wrappers, the types they implement, and the helper functions which are key
to their operation.
```plantuml
digraph samplers {
rankdir=LR;
node[shape=box];
subgraph cluster0 {
label="continuous"
subgraph cluster3 {
label="int->double"
IntToDoubleContinuousCurve[shape=box]
IntToDoubleContinuousCurve -> IntToDoubleFunction[style=dashed]
IntToDoubleContinuousCurve -> InterpolatingIntDoubleSampler
IntToDoubleContinuousCurve -> RealIntDoubleSampler
}
subgraph cluster4 {
label="long->double"
LongToDoubleContinuousCurve[shape=box]
LongToDoubleContinuousCurve -> LongToDoubleFunction[style=dashed]
LongToDoubleContinuousCurve -> InterpolatingLongDoubleSampler
LongToDoubleContinuousCurve -> RealLongDoubleSampler
}
}
subgraph cluster1 {
label="discrete"
subgraph cluster5 {
label="int->int"
IntToIntDiscreteCurve[shape=box]
IntToIntDiscreteCurve -> IntUnaryOperator[style=dashed]
IntToIntDiscreteCurve -> InterpolatingIntIntSampler
IntToIntDiscreteCurve -> DiscreteIntIntSampler
}
subgraph cluster6 {
label="int->long"
IntToLongDiscreteCurve[shape=box]
IntToLongDiscreteCurve -> IntToLongFunction[style=dashed]
IntToLongDiscreteCurve -> InterpolatingIntLongSampler
IntToLongDiscreteCurve -> DiscreteIntLongSampler
}
subgraph cluster7 {
label="long->int"
LongToIntDiscreteCurve[shape=box]
LongToIntDiscreteCurve -> LongToIntFunction[style=dashed]
LongToIntDiscreteCurve ->InterpolatingLongIntSampler
LongToIntDiscreteCurve ->DiscreteLongIntSampler
}
subgraph cluster8 {
label="long->long"
LongToLongDiscreteCurve[shape=box]
LongToLongDiscreteCurve -> LongUnaryOperator[style=dashed]
LongToLongDiscreteCurve ->InterpolatingLongLongSampler
LongToLongDiscreteCurve ->DiscreteLongLongSampler
}
}
}
```

View File

@ -8,36 +8,76 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongToDoubleFunction;
/**
* Return a value along an interpolation curve. This allows you to sketch a basic
* density curve and describe it simply with just a few values. The number of values
* provided determines the resolution of the internal lookup table that is used for
* interpolation. The first value is always the 0.0 anchoring point on the unit interval.
* The last value is always the 1.0 anchoring point on the unit interval. This means
* that in order to subdivide the density curve in an interesting way, you need to provide
* a few more values in between them. Providing two values simply provides a uniform
* sample between a minimum and maximum value.
*
* The input range of this function is, as many of the other functions in this library,
* based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
* This means that if you want to combine interpolation on this curve with the effect of
* pseudo-random sampling, you need to put a hash function ahead of it in the flow.
*
* Developer Note: This is the canonical implementation of LERPing in NoSQLBench, so is
* heavily documented. Any other LERP implementations should borrow directly from this,
* embedding by default.
*/
@ThreadSafeMapper
@Categories({Category.general})
public class Interpolate implements LongToDoubleFunction {
private final double scale;
// How many values we have to pick from
private final double resolution;
// The lookup table
private final double[] lut;
private final static double maxLongAsDouble = (double) Long.MAX_VALUE;
/**
* The scale of Long.MAX_VALUE and the unit interval scale factor are pre-combined
* here to reduce the number of operations later.
*
* The LUT size is retained as the number of elements provided (resolution) + 1.
* The +1 element serves as the N+1 index for when the unit interval sample is
* 1.0. In other words, the maximum value is not a special case, as a duplicate
* value is appended to the LUT instead.
*
* This size is the scale factor from the unit interval to the array index. Since
* the input comes in as a long value, it is mapped from [0L, Long.MAX_VALUE] to
* [0.0D, 1.0D] by multiplying by (1.0/(double)Long.MAX_VALUE). The long input
* value can then be multiplied directly to yield a double in the range of
* [0,LUT.length-1], which simplifies all remaining LERP math.
*
*/
private final double scaleToLongInterval;
@Example({"Interpolate(0.0d,100.0d)","return a uniform double value between 0.0d and 100.0d"})
@Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
@Example({"Interpolate(0.0d,100.0d)", "return a uniform double value between 0.0d and 100.0d"})
@Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)", "return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
public Interpolate(double... values) {
double[] doubles = new double[values.length+1];
for (int i = 0; i < values.length; i++) { // not a ranging error
doubles[i]=values[i];
}
doubles[doubles.length-1]=doubles[doubles.length-2];
this.scale=values.length-1;
this.resolution = values.length;
double[] doubles = new double[values.length + 1];
System.arraycopy(values,0,doubles,0,values.length);
doubles[doubles.length - 1] = doubles[doubles.length - 2];
this.lut = doubles;
this.scaleToLongInterval = (this.resolution - 1) * (1.0d / (double) Long.MAX_VALUE);
}
@Override
public double applyAsDouble(long input) {
long value = input;
double samplePoint = ((double)input / maxLongAsDouble) * scale;
int leftidx = (int)samplePoint;
double fractional = samplePoint - (long)samplePoint;
double leftComponent = lut[leftidx]* (1.0d-fractional);
double rightComponent = lut[leftidx+1] * fractional;
double sample = (leftComponent + rightComponent);
// scale the input from [0,Long.MAX_VALUE] to [0.0,lut.length-1]
double samplePoint = scaleToLongInterval * input;
// truncate the sample point to the left index
int leftidx = (int) samplePoint;
// isolate the fractional component
double fractional = samplePoint - leftidx;
// take the sum of the left component and right component
// scaled by closeness to fractional point within the interval, respectively
double sample = (lut[leftidx] * (1.0d - fractional)) + (lut[leftidx + 1] * fractional);
return sample;
}

View File

@ -6,25 +6,10 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongUnaryOperator;
/**
* Return a value along an interpolation curve. This allows you to sketch a basic
* density curve and describe it simply with just a few values. The number of values
* provided determines the resolution of the internal lookup table that is used for
* interpolation. The first value is always the 0.0 anchoring point on the unit interval.
* The last value is always the 1.0 anchoring point on the unit interval. This means
* that in order to subdivide the density curve in an interesting way, you need to provide
* a few more values in between them. Providing two values simply provides a uniform
* sample between a minimum and maximum value.
*
* The input range of this function is, as many of the other functions in this library,
* based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
* This means that if you want to combine interpolation on this curve with the effect of
* pseudo-random sampling, you need to put a hash function ahead of it in the flow.
*/
@ThreadSafeMapper
public class Interpolate implements LongUnaryOperator {
private io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;
private final io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;
@Example({"Interpolate(0.0d,100.0d)","return a uniform long value between 0L and 100L"})
@Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted long value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})

View File

@ -1,11 +1,32 @@
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.assertj.core.data.Offset;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
public class InterpolateTest {
@Test
public void testRanging() {
io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate interpolate =
new io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate (0.0d, 1.0d);
Hash hf = new Hash();
DescriptiveStatistics dss = new DescriptiveStatistics();
long count=10000000;
for (long i = 0; i < count; i++) {
long input = (long) (Long.MAX_VALUE * ((double)i/(double)count));
long prn = hf.applyAsLong(input);
double v = interpolate.applyAsDouble(prn);
dss.addValue(v);
}
assertThat(dss.getPercentile(0.000001)).isCloseTo(0.0, Offset.offset(0.01));
assertThat(dss.getPercentile(99.99999)).isCloseTo(1.0, Offset.offset(0.01));
}
@Test
public void testDeciles() {
long topvalue = 1_000_000_000L;
@ -26,6 +47,7 @@ public class InterpolateTest {
long highvalue = (long) (Long.MAX_VALUE * 0.98d);
long high = f.applyAsLong(highvalue);
assertThat(high).isEqualTo(expected);
System.out.println(" -> was " + high);
long highervalue = (long) (Long.MAX_VALUE * 0.9999d);
long higher = f.applyAsLong(highervalue);
@ -35,4 +57,4 @@ public class InterpolateTest {
assertThat(max).isEqualTo(1000000000L);
}
}
}

View File

@ -1,6 +1,6 @@
package io.nosqlbench.virtdata.library.curves4.continuous.common;
import io.nosqlbench.virtdata.library.curves4.discrete.common.ThreadSafeHash;
import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
import java.util.Arrays;
import java.util.function.DoubleUnaryOperator;
@ -10,11 +10,11 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
private final double[] lut;
private final DoubleUnaryOperator f;
private final int resolution;
private final boolean clamp;
private final double clampMin;
private final double clampMax;
private ThreadSafeHash hash;
private final double scaleToIntRanged;
private Hash hash;
public InterpolatingIntDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
this.f = icdSource;
@ -22,47 +22,43 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
this.clampMin = clampMin;
this.clampMax = clampMax;
if (hash) {
this.hash = new ThreadSafeHash();
this.hash = new Hash();
}
double[] lut = precompute(resolution);
double[] computed = precompute(resolution);
if (finite) {
while (lut.length>0 && Double.isInfinite(lut[0])) {
lut = Arrays.copyOfRange(lut,1,lut.length-1);
while (computed.length>0 && Double.isInfinite(computed[0])) {
computed = Arrays.copyOfRange(computed,1,computed.length-1);
}
while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) {
lut = Arrays.copyOfRange(lut,0,lut.length-2);
while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
computed = Arrays.copyOfRange(computed,0,computed.length-2);
}
}
this.lut = lut;
this.resolution=lut.length-1;
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
this.scaleToIntRanged = (1.0d/(double)Integer.MAX_VALUE) * ((padded.length-2));
this.lut = padded;
}
private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+1];
for (int s = 0; s <= resolution; s++) { // not a ranging error
double[] precomputed = new double[resolution];
for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution;
double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit);
double sampleValue = f.applyAsDouble(rangedToUnit);
sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue;
precomputed[s] = sampleValue;
}
precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
return precomputed;
}
@Override
public double applyAsDouble(int input) {
long value = input;
if (hash!=null) {
value = hash.applyAsLong(value);
input = hash.applyAsInt(input);
}
double unit = (double) value / (double) Long.MAX_VALUE;
double samplePoint = unit * resolution;
int leftidx = (int) samplePoint;
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
double samplePoint = scaleToIntRanged * input;
int leftidx = (int)samplePoint;
double fractional = samplePoint - leftidx;
double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return sample;
}
}

View File

@ -6,14 +6,30 @@ import java.util.Arrays;
import java.util.function.DoubleUnaryOperator;
import java.util.function.LongToDoubleFunction;
/**
* See {@link io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate} for
* details on implementation.
*
* For the 6 implementations of interpolating samplers which use inverse cumulative distribution tables,
* care should be given to the following:
* <UL>
* <LI>Input Ranging - ensure that the input type is appropriate for the curve; pre-scaling needs to be matched
* to the input type</LI>
* <LI>resolution, scale, and LUT length; T</LI>
* <LI>+1 LUT padding for U=1.0</LI>
* <LI>Uniform LERP code in main function</LI>
* </UL>>
*/
public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
private static final double MAX_LONG_AS_DOUBLE = Long.MAX_VALUE;
private final double[] lut;
private final DoubleUnaryOperator f;
private final int resolution;
private final boolean clamp;
private final double clampMin;
private final double clampMax;
private final double scaleToLong;
private ThreadSafeHash hash;
public InterpolatingLongDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
@ -24,44 +40,41 @@ public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
this.clamp=clamp;
this.clampMin=clampMin;
this.clampMax=clampMax;
double[] lut = precompute(resolution);
double[] computed = precompute(resolution);
if (finite) {
while (lut.length>0 && Double.isInfinite(lut[0])) {
lut = Arrays.copyOfRange(lut,1,lut.length-1);
while (computed.length>0 && Double.isInfinite(computed[0])) {
computed = Arrays.copyOfRange(computed,1,computed.length-1);
}
while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) {
lut = Arrays.copyOfRange(lut,0,lut.length-2);
while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
computed = Arrays.copyOfRange(computed,0,computed.length-2);
}
}
this.lut = lut;
this.resolution = lut.length-1;
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
this.scaleToLong = (1.0d / (double) Long.MAX_VALUE) * (padded.length-2);
this.lut = padded;
}
private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+1];
for (int s = 0; s <= resolution; s++) { // not a ranging error
double[] precomputed = new double[resolution];
for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution;
double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit);
double sampleValue = f.applyAsDouble(rangedToUnit);
sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue ;
precomputed[s] = sampleValue;
}
precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
return precomputed;
}
@Override
public double applyAsDouble(long value) {
public double applyAsDouble(long input) {
if (hash!=null) {
value = hash.applyAsLong(value);
input = hash.applyAsLong(input);
}
double unit = (double) value / (double) Long.MAX_VALUE;
double samplePoint = unit * resolution;
int leftidx = (int) samplePoint;
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
double samplePoint = scaleToLong * input;
int leftidx = (int)samplePoint;
double fractional = samplePoint - leftidx;
double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return sample;
}
}

View File

@ -10,15 +10,17 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
private final DoubleUnaryOperator f;
private final boolean clamp;
private final double clampMax;
private final double clampMin;
private ThreadSafeHash hash;
public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) {
public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
this.f = parentFunc;
if (hash) {
this.hash = new ThreadSafeHash();
}
this.clamp = clamp;
this.clampMax = clampMax;
this.clamp = clamp | finite;
this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
}
@Override
@ -28,7 +30,7 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
value = hash.applyAsLong(value);
}
double unit = (double) value / (double) Long.MAX_VALUE;
double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit);
double sample =clamp ? Double.max(Double.min(clampMax,f.applyAsDouble(unit)),clampMin): f.applyAsDouble(unit);
return sample;
}
}

View File

@ -10,24 +10,26 @@ public class RealLongDoubleSampler implements LongToDoubleFunction {
private final DoubleUnaryOperator f;
private final boolean clamp;
private final double clampMax;
private final double clampMin;
private ThreadSafeHash hash;
public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) {
public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
this.f = parentFunc;
if (hash) {
this.hash = new ThreadSafeHash();
}
this.clamp = clamp;
this.clampMax=clampMax;
this.clamp = clamp | finite;
this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
}
@Override
public double applyAsDouble(long value) {
if (hash!=null) {
if (hash != null) {
value = hash.applyAsLong(value);
}
double unit = (double) value / (double) Long.MAX_VALUE;
double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit);
double sample = clamp ? Double.max(clampMin, Double.min(clampMax, f.applyAsDouble(unit))) : f.applyAsDouble(unit);
return sample;
}
}

View File

@ -109,7 +109,6 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
throw new RuntimeException("mods must not contain both "+ INFINITE +" and "+FINITE+".");
}
for (String s : modslist) {
if (!validModifiers.contains(s)) {
throw new RuntimeException("modifier '" + s + "' is not a valid modifier. Use one of " + validModifiers + " instead.");
@ -122,9 +121,9 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
boolean finite = ( mods.contains(FINITE) || !mods.contains(INFINITE));
function = interpolate ?
new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Long.MAX_VALUE, finite)
new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, finite)
:
new RealIntDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE);
new RealIntDoubleSampler(icdSource, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, true);
}

View File

@ -124,7 +124,7 @@ public class LongToDoubleContinuousCurve implements LongToDoubleFunction {
function = interpolate ?
new InterpolatingLongDoubleSampler(icdSource, 1000, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, finite)
:
new RealLongDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE);
new RealLongDoubleSampler(icdSource, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, true);
}

View File

@ -1,5 +1,7 @@
package io.nosqlbench.virtdata.library.curves4.discrete.common;
import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
import java.util.function.DoubleToIntFunction;
import java.util.function.IntUnaryOperator;
@ -7,46 +9,42 @@ public class InterpolatingIntIntSampler implements IntUnaryOperator {
private final double[] lut;
private final DoubleToIntFunction f;
private int resolution;
private ThreadSafeHash hash;
private Hash hash;
private final double scaleToIntRanged;
public InterpolatingIntIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource;
this.resolution = resolution;
if (hash) {
this.hash = new ThreadSafeHash();
this.hash = new Hash();
}
this.lut = precompute();
double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
padded[padded.length-1] = padded[padded.length-2];
scaleToIntRanged = (1.0d/Integer.MAX_VALUE)*(padded.length-2);
this.lut=padded;
}
private double[] precompute() {
double[] precomputed = new double[resolution+2];
for (int s = 0; s <= resolution; s++) { // not a ranging error
private double[] precompute(int resolution) {
double[] precomputed = new double[resolution];
for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue;
}
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed;
}
@Override
public int applyAsInt(int input) {
int value = input;
if (hash!=null) {
value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE);
input = hash.applyAsInt(input);
}
double unit = (double) value / (double) Integer.MAX_VALUE;
double samplePoint = unit * resolution;
int leftidx = (int) samplePoint;
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
double samplePoint = scaleToIntRanged * input;
int leftidx = (int)samplePoint;
double fractional = samplePoint - leftidx;
double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return (int) sample;
}
}

View File

@ -1,5 +1,7 @@
package io.nosqlbench.virtdata.library.curves4.discrete.common;
import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
import java.util.function.DoubleToIntFunction;
import java.util.function.IntToLongFunction;
@ -7,46 +9,40 @@ public class InterpolatingIntLongSampler implements IntToLongFunction {
private final double[] lut;
private final DoubleToIntFunction f;
private int resolution;
private ThreadSafeHash hash;
private Hash hash;
private final double scaleToIntRanged;
public InterpolatingIntLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource;
this.resolution = resolution;
if (hash) {
this.hash = new ThreadSafeHash();
this.hash = new Hash();
}
this.lut = precompute();
double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
this.scaleToIntRanged = (1.0d / Integer.MAX_VALUE) * (padded.length-2);
this.lut=padded;
}
private double[] precompute() {
double[] precomputed = new double[resolution+2];
for (int s = 0; s <= resolution; s++) { // not a ranging error
private double[] precompute(int resolution) {
double[] precomputed = new double[resolution];
for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue;
}
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed;
}
@Override
public long applyAsLong(int input) {
int value = input;
if (hash!=null) {
value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE);
input = hash.applyAsInt(input);
}
double unit = (double) value / (double) Integer.MAX_VALUE;
double samplePoint = unit * resolution;
int leftidx = (int) samplePoint;
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
double samplePoint = scaleToIntRanged * input;
int leftidx = (int)samplePoint;
double fractional = samplePoint - leftidx;
double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return (long) sample;
}
}

View File

@ -7,43 +7,41 @@ public class InterpolatingLongIntSampler implements LongToIntFunction {
private final double[] lut;
private final DoubleToIntFunction f;
private int resolution;
private ThreadSafeHash hash;
private final double scaleToLong;
public InterpolatingLongIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource;
this.resolution = resolution;
if (hash) {
this.hash = new ThreadSafeHash();
}
this.lut = precompute();
double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
padded[padded.length-1] = padded[padded.length-2];
scaleToLong=(1.0d/Long.MAX_VALUE) * (padded.length-2);
this.lut=padded;
}
private double[] precompute() {
double[] precomputed = new double[resolution+2];
for (int s = 0; s <= resolution; s++) { // not a ranging error
private double[] precompute(int resolution) {
double[] precomputed = new double[resolution];
for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue;
}
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed;
}
@Override
public int applyAsInt(long value) {
public int applyAsInt(long input) {
if (hash!=null) {
value = hash.applyAsLong(value);
input = hash.applyAsLong(input);
}
double unit = (double) value / (double) Long.MAX_VALUE;
double samplePoint = unit * resolution;
int leftidx = (int) samplePoint;
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return (int) sample;
double samplePoint = scaleToLong * input;
int leftidx = (int)samplePoint;
double fractional = samplePoint - leftidx;
double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return (int)sample;
}
}

View File

@ -7,43 +7,41 @@ public class InterpolatingLongLongSampler implements LongUnaryOperator {
private final double[] lut;
private final DoubleToIntFunction f;
private int resolution;
private ThreadSafeHash hash;
private final double scaleToLong;
public InterpolatingLongLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource;
this.resolution = resolution;
if (hash) {
this.hash = new ThreadSafeHash();
}
this.lut = precompute();
double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
padded[padded.length-1] = padded[padded.length-2];
scaleToLong = (1.0d/Long.MAX_VALUE) * ((double)(padded.length-2));
this.lut = padded;
}
private double[] precompute() {
double[] precomputed = new double[resolution+2];
for (int s = 0; s <= resolution; s++) { // not a ranging error
private double[] precompute(int resolution) {
double[] precomputed = new double[resolution];
for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue;
}
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed;
}
@Override
public long applyAsLong(long value) {
public long applyAsLong(long input) {
if (hash!=null) {
value = hash.applyAsLong(value);
input = hash.applyAsLong(input);
}
double unit = (double) value / (double) Long.MAX_VALUE;
double samplePoint = unit * resolution;
int leftidx = (int) samplePoint;
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return (long) sample;
double samplePoint = scaleToLong * input;
int leftidx = (int)samplePoint;
double fractional = samplePoint - leftidx;
double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return (long)sample;
}
}

View File

@ -11,7 +11,7 @@ public class LevyTest {
@Test
public void testLevy() {
Levy levy = new Levy(2.3d, 1.0d);
assertThat(levy.applyAsDouble(10L)).isCloseTo(2.938521849905433, Offset.offset(0.000001d));
assertThat(levy.applyAsDouble(10L)).isCloseTo(2.9379325000660304, Offset.offset(0.000001d));
}
}

View File

@ -17,7 +17,7 @@ public class RealDistributionsValuesTest {
@Test
public void testComputedNormal() {
RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000);
RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000,1);
System.out.println(runData);
assertThat(runData.getFractionalPercentile(0.5D))
.isCloseTo(10.0D, Offset.offset(0.01D));
@ -29,7 +29,7 @@ public class RealDistributionsValuesTest {
@Test
public void testInterpolatedNormal() {
RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000);
RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000,1);
System.out.println(runData);
assertThat(runData.getFractionalPercentile(0.5D))
.isCloseTo(10.0D, Offset.offset(0.01D));
@ -41,7 +41,7 @@ public class RealDistributionsValuesTest {
@Test
public void testComputedUniform() {
RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000);
RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000,1);
assertThat(runData.getFractionalPercentile(0.33D))
.isCloseTo(33.33D, Offset.offset(1.0D));
assertThat(runData.getFractionalPercentile(0.5D))
@ -53,7 +53,7 @@ public class RealDistributionsValuesTest {
@Test
public void testInterpolatedUniform() {
RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000);
RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000,1);
assertThat(runData.getFractionalPercentile(0.33D))
.isCloseTo(33.33D, Offset.offset(1.0D));
assertThat(runData.getFractionalPercentile(0.5D))
@ -66,22 +66,27 @@ public class RealDistributionsValuesTest {
@Test
public void testInterpolatedMappedUniform() {
Uniform mapper = new Uniform(0.0, 100.0, "map", "interpolate");
RunData runData = iterateMapperDouble(mapper,10000000);
RunData runData = iterateMapperDouble(mapper,10000000,Long.MAX_VALUE/10000000L);
assertThat(runData.getFractionalPercentile(0.001D))
.isCloseTo(0.0D, Offset.offset(1.0D));
assertThat(runData.getFractionalPercentile(0.999D))
.isCloseTo(0.0D, Offset.offset(1.0D));
.isCloseTo(099.99D, Offset.offset(1.0D));
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0D, Offset.offset(1.0D));
System.out.println(runData);
}
private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations) {
private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations, long funcstep) {
assertThat(mapper).isNotNull();
double[] samples = new double[iterations];
long time_generating = System.nanoTime();
for (int i = 0; i < iterations; i++) {
samples[i] = mapper.applyAsDouble(i);
samples[i] = mapper.applyAsDouble(i*funcstep);
}
long time_generated = System.nanoTime();

View File

@ -46,7 +46,7 @@ public class IntegerDistributionsConcurrencyTest {
// threshold test against CDF
expected = mapper.get((long) (0.03515d * (double) Long.MAX_VALUE));
assertThat(expected).isEqualTo(1);
expected = mapper.get((long) (0.03600d * (double) Long.MAX_VALUE));
expected = mapper.get((long) (0.03700d * (double) Long.MAX_VALUE));
assertThat(expected).isEqualTo(2);
}

View File

@ -16,6 +16,7 @@ import static org.assertj.core.api.Assertions.assertThat;
public class IntegerDistributionsValuesTest {
@Disabled
@Test
public void testComputedZipf() {
@ -70,13 +71,19 @@ public class IntegerDistributionsValuesTest {
System.out.println(runData);
}
@Test
public void testMaximumValue() {
Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
}
@Test
public void testInterpolatedMappedUniform() {
Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
RunData runData = iterateMapperDouble(mapper,10000000);
assertThat(runData.getFractionalPercentile(0.999D))
.isCloseTo(0.0D, Offset.offset(1.0D));
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.0001D));
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
}
@ -101,7 +108,11 @@ public class IntegerDistributionsValuesTest {
double[] samples = new double[iterations];
long time_generating = System.nanoTime();
int readout = iterations/10;
for (int i = 0; i < iterations; i++) {
if ((i%readout)==0) {
System.out.println("i="+i+"/"+iterations);
}
samples[i] = mapper.applyAsDouble(i);
}
long time_generated = System.nanoTime();

View File

@ -69,23 +69,24 @@ little loss in accuracy, but the difference is generally negligible for nearly a
#### Infinite or Finite
For interpolated samples, you also have the option of including or excluding infinite values
which may occur in some distributions. If you want to include them, use `infinite`, or `finite`
to explicitly avoid them (the default). Specifying 'infinite' doesn't guarantee that you will
see +Infinity or -Infinity, only that they are allowed. The Normal distribution often contains
-Infinity and +Infinity, for example, due to the function used to estimate its cumulative
distribution. These values can often be valuable in finding corner cases which should be treated
uniformly according to [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).
For interpolated samples from continuous distributions, you also have the option of including or
excluding infinite values which may occur in some distributions. If you want to include them,
use `infinite`, or `finite` to explicitly avoid them (the default). Specifying 'infinite'
doesn't guarantee that you will see +Infinity or -Infinity, only that they are allowed. The
Normal distribution often contains -Infinity and +Infinity, for example, due to the function
used to estimate its cumulative distribution. These values can often be valuable in finding
corner cases which should be treated uniformly according to
[IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).
#### Clamp or Noclamp
For interpolated samples, you also have the option of clamping the allowed values to the valid
range for the integral data type used as input. To clamp the output values to the range
(Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.MIN_VALUE,Integer.
MAX_VALUE) for int-double functions, specify `clamp`, which is also the default. To explicitly
disable this, use `noclamp`. This is useful when you know the downstream functions will only
work with a certain range of values without truncating conversions. When you are using double
values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
For interpolated samples from continuous distributions, you also have the option of clamping the
allowed values to the valid range for the integral data type used as input. To clamp the output
values to the range (Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.
MIN_VALUE,Integer.MAX_VALUE) for int-double functions, specify `clamp`, which is also the default.
To explicitly disable this, use `noclamp`. This is useful when you know the downstream functions
will only work with a certain range of values without truncating conversions. When you are using
double values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
values in your test data. (In this case, you might also consider `infinite`).
### Computed Samples

View File

@ -25,8 +25,8 @@ public class IntegratedCurvesTest {
assertThat(runData.getStats().getPercentile(1.0d)).isCloseTo(1.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(10.0d)).isCloseTo(1.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(90.0d)).isCloseTo(6.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(61.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(311.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(55.989, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(202.999, Offset.offset(0.01d));
}