refactor/improve stat samplers for efficiency and correctness

This commit is contained in:
Jonathan Shook 2021-11-18 16:35:20 -06:00
parent d31fbd45fe
commit ff142fd0b2
20 changed files with 336 additions and 205 deletions

View File

@ -0,0 +1,65 @@
This diagram shows the base implementations of all the statistical sampler
wrappers, the types they implement, and the helper functions which are key
to their operation.
```plantuml
digraph samplers {
rankdir=LR;
node[shape=box];
subgraph cluster0 {
label="continuous"
subgraph cluster3 {
label="int->double"
IntToDoubleContinuousCurve[shape=box]
IntToDoubleContinuousCurve -> IntToDoubleFunction[style=dashed]
IntToDoubleContinuousCurve -> InterpolatingIntDoubleSampler
IntToDoubleContinuousCurve -> RealIntDoubleSampler
}
subgraph cluster4 {
label="long->double"
LongToDoubleContinuousCurve[shape=box]
LongToDoubleContinuousCurve -> LongToDoubleFunction[style=dashed]
LongToDoubleContinuousCurve -> InterpolatingLongDoubleSampler
LongToDoubleContinuousCurve -> RealLongDoubleSampler
}
}
subgraph cluster1 {
label="discrete"
subgraph cluster5 {
label="int->int"
IntToIntDiscreteCurve[shape=box]
IntToIntDiscreteCurve -> IntUnaryOperator[style=dashed]
IntToIntDiscreteCurve -> InterpolatingIntIntSampler
IntToIntDiscreteCurve -> DiscreteIntIntSampler
}
subgraph cluster6 {
label="int->long"
IntToLongDiscreteCurve[shape=box]
IntToLongDiscreteCurve -> IntToLongFunction[style=dashed]
IntToLongDiscreteCurve -> InterpolatingIntLongSampler
IntToLongDiscreteCurve -> DiscreteIntLongSampler
}
subgraph cluster7 {
label="long->int"
LongToIntDiscreteCurve[shape=box]
LongToIntDiscreteCurve -> LongToIntFunction[style=dashed]
LongToIntDiscreteCurve ->InterpolatingLongIntSampler
LongToIntDiscreteCurve ->DiscreteLongIntSampler
}
subgraph cluster8 {
label="long->long"
LongToLongDiscreteCurve[shape=box]
LongToLongDiscreteCurve -> LongUnaryOperator[style=dashed]
LongToLongDiscreteCurve ->InterpolatingLongLongSampler
LongToLongDiscreteCurve ->DiscreteLongLongSampler
}
}
}
```

View File

@ -8,36 +8,76 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongToDoubleFunction; import java.util.function.LongToDoubleFunction;
/**
* Return a value along an interpolation curve. This allows you to sketch a basic
* density curve and describe it simply with just a few values. The number of values
* provided determines the resolution of the internal lookup table that is used for
* interpolation. The first value is always the 0.0 anchoring point on the unit interval.
* The last value is always the 1.0 anchoring point on the unit interval. This means
* that in order to subdivide the density curve in an interesting way, you need to provide
* a few more values in between them. Providing two values simply provides a uniform
* sample between a minimum and maximum value.
*
* The input range of this function is, as many of the other functions in this library,
* based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
* This means that if you want to combine interpolation on this curve with the effect of
* pseudo-random sampling, you need to put a hash function ahead of it in the flow.
*
* Developer Note: This is the canonical implementation of LERPing in NoSQLBench, so is
* heavily documented. Any other LERP implementations should borrow directly from this,
* embedding by default.
*/
@ThreadSafeMapper @ThreadSafeMapper
@Categories({Category.general}) @Categories({Category.general})
public class Interpolate implements LongToDoubleFunction { public class Interpolate implements LongToDoubleFunction {
private final double scale; // How many values we have to pick from
private final double resolution;
// The lookup table
private final double[] lut; private final double[] lut;
private final static double maxLongAsDouble = (double) Long.MAX_VALUE;
/**
* The scale of Long.MAX_VALUE and the unit interval scale factor are pre-combined
* here to reduce the number of operations later.
*
* The LUT size is retained as the number of elements provided (resolution) + 1.
* The +1 element serves as the N+1 index for when the unit interval sample is
* 1.0. In other words, the maximum value is not a special case, as a duplicate
* value is appended to the LUT instead.
*
* This size is the scale factor from the unit interval to the array index. Since
* the input comes in as a long value, it is mapped from [0L, Long.MAX_VALUE] to
* [0.0D, 1.0D] by multiplying by (1.0/(double)Long.MAX_VALUE). The long input
* value can then be multiplied directly to yield a double in the range of
* [0,LUT.length-1], which simplifies all remaining LERP math.
*
*/
private final double scaleToLongInterval;
@Example({"Interpolate(0.0d,100.0d)","return a uniform double value between 0.0d and 100.0d"}) @Example({"Interpolate(0.0d,100.0d)", "return a uniform double value between 0.0d and 100.0d"})
@Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"}) @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)", "return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
public Interpolate(double... values) { public Interpolate(double... values) {
double[] doubles = new double[values.length+1]; this.resolution = values.length;
for (int i = 0; i < values.length; i++) { // not a ranging error double[] doubles = new double[values.length + 1];
doubles[i]=values[i]; System.arraycopy(values,0,doubles,0,values.length);
} doubles[doubles.length - 1] = doubles[doubles.length - 2];
doubles[doubles.length-1]=doubles[doubles.length-2];
this.scale=values.length-1;
this.lut = doubles; this.lut = doubles;
this.scaleToLongInterval = (this.resolution - 1) * (1.0d / (double) Long.MAX_VALUE);
} }
@Override @Override
public double applyAsDouble(long input) { public double applyAsDouble(long input) {
long value = input; // scale the input from [0,Long.MAX_VALUE] to [0.0,lut.length-1]
double samplePoint = ((double)input / maxLongAsDouble) * scale; double samplePoint = scaleToLongInterval * input;
int leftidx = (int)samplePoint; // truncate the sample point to the left index
double fractional = samplePoint - (long)samplePoint; int leftidx = (int) samplePoint;
double leftComponent = lut[leftidx]* (1.0d-fractional); // isolate the fractional component
double rightComponent = lut[leftidx+1] * fractional; double fractional = samplePoint - leftidx;
double sample = (leftComponent + rightComponent); // take the sum of the left component and right component
// scaled by closeness to fractional point within the interval, respectively
double sample = (lut[leftidx] * (1.0d - fractional)) + (lut[leftidx + 1] * fractional);
return sample; return sample;
} }

View File

@ -6,25 +6,10 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongUnaryOperator; import java.util.function.LongUnaryOperator;
/**
* Return a value along an interpolation curve. This allows you to sketch a basic
* density curve and describe it simply with just a few values. The number of values
* provided determines the resolution of the internal lookup table that is used for
* interpolation. The first value is always the 0.0 anchoring point on the unit interval.
* The last value is always the 1.0 anchoring point on the unit interval. This means
* that in order to subdivide the density curve in an interesting way, you need to provide
* a few more values in between them. Providing two values simply provides a uniform
* sample between a minimum and maximum value.
*
* The input range of this function is, as many of the other functions in this library,
* based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
* This means that if you want to combine interpolation on this curve with the effect of
* pseudo-random sampling, you need to put a hash function ahead of it in the flow.
*/
@ThreadSafeMapper @ThreadSafeMapper
public class Interpolate implements LongUnaryOperator { public class Interpolate implements LongUnaryOperator {
private io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc; private final io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;
@Example({"Interpolate(0.0d,100.0d)","return a uniform long value between 0L and 100L"}) @Example({"Interpolate(0.0d,100.0d)","return a uniform long value between 0L and 100L"})
@Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted long value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"}) @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted long value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})

View File

@ -1,11 +1,32 @@
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.assertj.core.data.Offset;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
public class InterpolateTest { public class InterpolateTest {
@Test
public void testRanging() {
io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate interpolate =
new io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate (0.0d, 1.0d);
Hash hf = new Hash();
DescriptiveStatistics dss = new DescriptiveStatistics();
long count=10000000;
for (long i = 0; i < count; i++) {
long input = (long) (Long.MAX_VALUE * ((double)i/(double)count));
long prn = hf.applyAsLong(input);
double v = interpolate.applyAsDouble(prn);
dss.addValue(v);
}
assertThat(dss.getPercentile(0.000001)).isCloseTo(0.0, Offset.offset(0.01));
assertThat(dss.getPercentile(99.99999)).isCloseTo(1.0, Offset.offset(0.01));
}
@Test @Test
public void testDeciles() { public void testDeciles() {
long topvalue = 1_000_000_000L; long topvalue = 1_000_000_000L;
@ -26,6 +47,7 @@ public class InterpolateTest {
long highvalue = (long) (Long.MAX_VALUE * 0.98d); long highvalue = (long) (Long.MAX_VALUE * 0.98d);
long high = f.applyAsLong(highvalue); long high = f.applyAsLong(highvalue);
assertThat(high).isEqualTo(expected); assertThat(high).isEqualTo(expected);
System.out.println(" -> was " + high);
long highervalue = (long) (Long.MAX_VALUE * 0.9999d); long highervalue = (long) (Long.MAX_VALUE * 0.9999d);
long higher = f.applyAsLong(highervalue); long higher = f.applyAsLong(highervalue);
@ -35,4 +57,4 @@ public class InterpolateTest {
assertThat(max).isEqualTo(1000000000L); assertThat(max).isEqualTo(1000000000L);
} }
} }

View File

@ -1,6 +1,6 @@
package io.nosqlbench.virtdata.library.curves4.continuous.common; package io.nosqlbench.virtdata.library.curves4.continuous.common;
import io.nosqlbench.virtdata.library.curves4.discrete.common.ThreadSafeHash; import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
import java.util.Arrays; import java.util.Arrays;
import java.util.function.DoubleUnaryOperator; import java.util.function.DoubleUnaryOperator;
@ -10,11 +10,11 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
private final double[] lut; private final double[] lut;
private final DoubleUnaryOperator f; private final DoubleUnaryOperator f;
private final int resolution;
private final boolean clamp; private final boolean clamp;
private final double clampMin; private final double clampMin;
private final double clampMax; private final double clampMax;
private ThreadSafeHash hash; private final double scaleToIntRanged;
private Hash hash;
public InterpolatingIntDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) { public InterpolatingIntDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
this.f = icdSource; this.f = icdSource;
@ -22,47 +22,43 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
this.clampMin = clampMin; this.clampMin = clampMin;
this.clampMax = clampMax; this.clampMax = clampMax;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new Hash();
} }
double[] lut = precompute(resolution); double[] computed = precompute(resolution);
if (finite) { if (finite) {
while (lut.length>0 && Double.isInfinite(lut[0])) { while (computed.length>0 && Double.isInfinite(computed[0])) {
lut = Arrays.copyOfRange(lut,1,lut.length-1); computed = Arrays.copyOfRange(computed,1,computed.length-1);
} }
while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) { while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
lut = Arrays.copyOfRange(lut,0,lut.length-2); computed = Arrays.copyOfRange(computed,0,computed.length-2);
} }
} }
this.lut = lut; double[] padded = new double[computed.length+1];
this.resolution=lut.length-1; System.arraycopy(computed,0,padded,0,computed.length);
this.scaleToIntRanged = (1.0d/(double)Integer.MAX_VALUE) * ((padded.length-2));
this.lut = padded;
} }
private double[] precompute(int resolution) { private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+1]; double[] precomputed = new double[resolution];
for (int s = 0; s <= resolution; s++) { // not a ranging error for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution; double rangedToUnit = (double) s / (double) resolution;
double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit); double sampleValue = f.applyAsDouble(rangedToUnit);
sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue;
precomputed[s] = sampleValue; precomputed[s] = sampleValue;
} }
precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
return precomputed; return precomputed;
} }
@Override @Override
public double applyAsDouble(int input) { public double applyAsDouble(int input) {
long value = input;
if (hash!=null) { if (hash!=null) {
value = hash.applyAsLong(value); input = hash.applyAsInt(input);
} }
double unit = (double) value / (double) Long.MAX_VALUE; double samplePoint = scaleToIntRanged * input;
double samplePoint = unit * resolution; int leftidx = (int)samplePoint;
int leftidx = (int) samplePoint; double fractional = samplePoint - leftidx;
double leftPartial = samplePoint - leftidx; double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return sample; return sample;
} }
} }

View File

@ -6,14 +6,30 @@ import java.util.Arrays;
import java.util.function.DoubleUnaryOperator; import java.util.function.DoubleUnaryOperator;
import java.util.function.LongToDoubleFunction; import java.util.function.LongToDoubleFunction;
/**
* See {@link io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate} for
* details on implementation.
*
* For the 6 implementations of interpolating samplers which use inverse cumulative distribution tables,
* care should be given to the following:
* <UL>
* <LI>Input Ranging - ensure that the input type is appropriate for the curve; pre-scaling needs to be matched
* to the input type</LI>
* <LI>resolution, scale, and LUT length; T</LI>
* <LI>+1 LUT padding for U=1.0</LI>
* <LI>Uniform LERP code in main function</LI>
* </UL>>
*/
public class InterpolatingLongDoubleSampler implements LongToDoubleFunction { public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
private static final double MAX_LONG_AS_DOUBLE = Long.MAX_VALUE;
private final double[] lut; private final double[] lut;
private final DoubleUnaryOperator f; private final DoubleUnaryOperator f;
private final int resolution;
private final boolean clamp; private final boolean clamp;
private final double clampMin; private final double clampMin;
private final double clampMax; private final double clampMax;
private final double scaleToLong;
private ThreadSafeHash hash; private ThreadSafeHash hash;
public InterpolatingLongDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) { public InterpolatingLongDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
@ -24,44 +40,41 @@ public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
this.clamp=clamp; this.clamp=clamp;
this.clampMin=clampMin; this.clampMin=clampMin;
this.clampMax=clampMax; this.clampMax=clampMax;
double[] lut = precompute(resolution); double[] computed = precompute(resolution);
if (finite) { if (finite) {
while (lut.length>0 && Double.isInfinite(lut[0])) { while (computed.length>0 && Double.isInfinite(computed[0])) {
lut = Arrays.copyOfRange(lut,1,lut.length-1); computed = Arrays.copyOfRange(computed,1,computed.length-1);
} }
while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) { while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
lut = Arrays.copyOfRange(lut,0,lut.length-2); computed = Arrays.copyOfRange(computed,0,computed.length-2);
} }
} }
this.lut = lut; double[] padded = new double[computed.length+1];
this.resolution = lut.length-1; System.arraycopy(computed,0,padded,0,computed.length);
this.scaleToLong = (1.0d / (double) Long.MAX_VALUE) * (padded.length-2);
this.lut = padded;
} }
private double[] precompute(int resolution) { private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+1]; double[] precomputed = new double[resolution];
for (int s = 0; s <= resolution; s++) { // not a ranging error for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution; double rangedToUnit = (double) s / (double) resolution;
double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit); double sampleValue = f.applyAsDouble(rangedToUnit);
sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue ;
precomputed[s] = sampleValue; precomputed[s] = sampleValue;
} }
precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
return precomputed; return precomputed;
} }
@Override @Override
public double applyAsDouble(long value) { public double applyAsDouble(long input) {
if (hash!=null) { if (hash!=null) {
value = hash.applyAsLong(value); input = hash.applyAsLong(input);
} }
double unit = (double) value / (double) Long.MAX_VALUE; double samplePoint = scaleToLong * input;
double samplePoint = unit * resolution; int leftidx = (int)samplePoint;
int leftidx = (int) samplePoint; double fractional = samplePoint - leftidx;
double leftPartial = samplePoint - leftidx; double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return sample; return sample;
} }
} }

View File

@ -10,15 +10,17 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
private final DoubleUnaryOperator f; private final DoubleUnaryOperator f;
private final boolean clamp; private final boolean clamp;
private final double clampMax; private final double clampMax;
private final double clampMin;
private ThreadSafeHash hash; private ThreadSafeHash hash;
public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) { public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
this.f = parentFunc; this.f = parentFunc;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new ThreadSafeHash();
} }
this.clamp = clamp; this.clamp = clamp | finite;
this.clampMax = clampMax; this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
} }
@Override @Override
@ -28,7 +30,7 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
value = hash.applyAsLong(value); value = hash.applyAsLong(value);
} }
double unit = (double) value / (double) Long.MAX_VALUE; double unit = (double) value / (double) Long.MAX_VALUE;
double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit); double sample =clamp ? Double.max(Double.min(clampMax,f.applyAsDouble(unit)),clampMin): f.applyAsDouble(unit);
return sample; return sample;
} }
} }

View File

@ -10,24 +10,26 @@ public class RealLongDoubleSampler implements LongToDoubleFunction {
private final DoubleUnaryOperator f; private final DoubleUnaryOperator f;
private final boolean clamp; private final boolean clamp;
private final double clampMax; private final double clampMax;
private final double clampMin;
private ThreadSafeHash hash; private ThreadSafeHash hash;
public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) { public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
this.f = parentFunc; this.f = parentFunc;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new ThreadSafeHash();
} }
this.clamp = clamp; this.clamp = clamp | finite;
this.clampMax=clampMax; this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
} }
@Override @Override
public double applyAsDouble(long value) { public double applyAsDouble(long value) {
if (hash!=null) { if (hash != null) {
value = hash.applyAsLong(value); value = hash.applyAsLong(value);
} }
double unit = (double) value / (double) Long.MAX_VALUE; double unit = (double) value / (double) Long.MAX_VALUE;
double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit); double sample = clamp ? Double.max(clampMin, Double.min(clampMax, f.applyAsDouble(unit))) : f.applyAsDouble(unit);
return sample; return sample;
} }
} }

View File

@ -109,7 +109,6 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
throw new RuntimeException("mods must not contain both "+ INFINITE +" and "+FINITE+"."); throw new RuntimeException("mods must not contain both "+ INFINITE +" and "+FINITE+".");
} }
for (String s : modslist) { for (String s : modslist) {
if (!validModifiers.contains(s)) { if (!validModifiers.contains(s)) {
throw new RuntimeException("modifier '" + s + "' is not a valid modifier. Use one of " + validModifiers + " instead."); throw new RuntimeException("modifier '" + s + "' is not a valid modifier. Use one of " + validModifiers + " instead.");
@ -122,9 +121,9 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
boolean finite = ( mods.contains(FINITE) || !mods.contains(INFINITE)); boolean finite = ( mods.contains(FINITE) || !mods.contains(INFINITE));
function = interpolate ? function = interpolate ?
new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Long.MAX_VALUE, finite) new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, finite)
: :
new RealIntDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE); new RealIntDoubleSampler(icdSource, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, true);
} }

View File

@ -124,7 +124,7 @@ public class LongToDoubleContinuousCurve implements LongToDoubleFunction {
function = interpolate ? function = interpolate ?
new InterpolatingLongDoubleSampler(icdSource, 1000, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, finite) new InterpolatingLongDoubleSampler(icdSource, 1000, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, finite)
: :
new RealLongDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE); new RealLongDoubleSampler(icdSource, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, true);
} }

View File

@ -1,5 +1,7 @@
package io.nosqlbench.virtdata.library.curves4.discrete.common; package io.nosqlbench.virtdata.library.curves4.discrete.common;
import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
import java.util.function.DoubleToIntFunction; import java.util.function.DoubleToIntFunction;
import java.util.function.IntUnaryOperator; import java.util.function.IntUnaryOperator;
@ -7,46 +9,42 @@ public class InterpolatingIntIntSampler implements IntUnaryOperator {
private final double[] lut; private final double[] lut;
private final DoubleToIntFunction f; private final DoubleToIntFunction f;
private int resolution; private Hash hash;
private ThreadSafeHash hash; private final double scaleToIntRanged;
public InterpolatingIntIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { public InterpolatingIntIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource; this.f = icdSource;
this.resolution = resolution;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new Hash();
} }
this.lut = precompute(); double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
padded[padded.length-1] = padded[padded.length-2];
scaleToIntRanged = (1.0d/Integer.MAX_VALUE)*(padded.length-2);
this.lut=padded;
} }
private double[] precompute() { private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+2]; double[] precomputed = new double[resolution];
for (int s = 0; s <= resolution; s++) { // not a ranging error for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution; double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit); int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue; precomputed[s] = sampleValue;
} }
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed; return precomputed;
} }
@Override @Override
public int applyAsInt(int input) { public int applyAsInt(int input) {
int value = input;
if (hash!=null) { if (hash!=null) {
value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE); input = hash.applyAsInt(input);
} }
double samplePoint = scaleToIntRanged * input;
double unit = (double) value / (double) Integer.MAX_VALUE; int leftidx = (int)samplePoint;
double samplePoint = unit * resolution; double fractional = samplePoint - leftidx;
int leftidx = (int) samplePoint; double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return (int) sample; return (int) sample;
} }
} }

View File

@ -1,5 +1,7 @@
package io.nosqlbench.virtdata.library.curves4.discrete.common; package io.nosqlbench.virtdata.library.curves4.discrete.common;
import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
import java.util.function.DoubleToIntFunction; import java.util.function.DoubleToIntFunction;
import java.util.function.IntToLongFunction; import java.util.function.IntToLongFunction;
@ -7,46 +9,40 @@ public class InterpolatingIntLongSampler implements IntToLongFunction {
private final double[] lut; private final double[] lut;
private final DoubleToIntFunction f; private final DoubleToIntFunction f;
private int resolution; private Hash hash;
private ThreadSafeHash hash; private final double scaleToIntRanged;
public InterpolatingIntLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { public InterpolatingIntLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource; this.f = icdSource;
this.resolution = resolution;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new Hash();
} }
this.lut = precompute(); double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
this.scaleToIntRanged = (1.0d / Integer.MAX_VALUE) * (padded.length-2);
this.lut=padded;
} }
private double[] precompute() { private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+2]; double[] precomputed = new double[resolution];
for (int s = 0; s <= resolution; s++) { // not a ranging error for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution; double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit); int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue; precomputed[s] = sampleValue;
} }
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed; return precomputed;
} }
@Override @Override
public long applyAsLong(int input) { public long applyAsLong(int input) {
int value = input;
if (hash!=null) { if (hash!=null) {
value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE); input = hash.applyAsInt(input);
} }
double samplePoint = scaleToIntRanged * input;
double unit = (double) value / (double) Integer.MAX_VALUE; int leftidx = (int)samplePoint;
double samplePoint = unit * resolution; double fractional = samplePoint - leftidx;
int leftidx = (int) samplePoint; double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
double leftPartial = samplePoint - leftidx;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return (long) sample; return (long) sample;
} }
} }

View File

@ -7,43 +7,41 @@ public class InterpolatingLongIntSampler implements LongToIntFunction {
private final double[] lut; private final double[] lut;
private final DoubleToIntFunction f; private final DoubleToIntFunction f;
private int resolution;
private ThreadSafeHash hash; private ThreadSafeHash hash;
private final double scaleToLong;
public InterpolatingLongIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { public InterpolatingLongIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource; this.f = icdSource;
this.resolution = resolution;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new ThreadSafeHash();
} }
this.lut = precompute(); double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
padded[padded.length-1] = padded[padded.length-2];
scaleToLong=(1.0d/Long.MAX_VALUE) * (padded.length-2);
this.lut=padded;
} }
private double[] precompute() { private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+2]; double[] precomputed = new double[resolution];
for (int s = 0; s <= resolution; s++) { // not a ranging error for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution; double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit); int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue; precomputed[s] = sampleValue;
} }
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed; return precomputed;
} }
@Override @Override
public int applyAsInt(long value) { public int applyAsInt(long input) {
if (hash!=null) { if (hash!=null) {
value = hash.applyAsLong(value); input = hash.applyAsLong(input);
} }
double unit = (double) value / (double) Long.MAX_VALUE; double samplePoint = scaleToLong * input;
double samplePoint = unit * resolution; int leftidx = (int)samplePoint;
int leftidx = (int) samplePoint; double fractional = samplePoint - leftidx;
double leftPartial = samplePoint - leftidx; double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return (int)sample;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return (int) sample;
} }
} }

View File

@ -7,43 +7,41 @@ public class InterpolatingLongLongSampler implements LongUnaryOperator {
private final double[] lut; private final double[] lut;
private final DoubleToIntFunction f; private final DoubleToIntFunction f;
private int resolution;
private ThreadSafeHash hash; private ThreadSafeHash hash;
private final double scaleToLong;
public InterpolatingLongLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) { public InterpolatingLongLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
this.f = icdSource; this.f = icdSource;
this.resolution = resolution;
if (hash) { if (hash) {
this.hash = new ThreadSafeHash(); this.hash = new ThreadSafeHash();
} }
this.lut = precompute(); double[] computed = precompute(resolution);
double[] padded = new double[computed.length+1];
System.arraycopy(computed,0,padded,0,computed.length);
padded[padded.length-1] = padded[padded.length-2];
scaleToLong = (1.0d/Long.MAX_VALUE) * ((double)(padded.length-2));
this.lut = padded;
} }
private double[] precompute() { private double[] precompute(int resolution) {
double[] precomputed = new double[resolution+2]; double[] precomputed = new double[resolution];
for (int s = 0; s <= resolution; s++) { // not a ranging error for (int s = 0; s < resolution; s++) { // not a ranging error
double rangedToUnit = (double) s / (double) resolution; double rangedToUnit = (double) s / (double) resolution;
int sampleValue = f.applyAsInt(rangedToUnit); int sampleValue = f.applyAsInt(rangedToUnit);
precomputed[s] = sampleValue; precomputed[s] = sampleValue;
} }
precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
return precomputed; return precomputed;
} }
@Override @Override
public long applyAsLong(long value) { public long applyAsLong(long input) {
if (hash!=null) { if (hash!=null) {
value = hash.applyAsLong(value); input = hash.applyAsLong(input);
} }
double unit = (double) value / (double) Long.MAX_VALUE; double samplePoint = scaleToLong * input;
double samplePoint = unit * resolution; int leftidx = (int)samplePoint;
int leftidx = (int) samplePoint; double fractional = samplePoint - leftidx;
double leftPartial = samplePoint - leftidx; double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
return (long)sample;
double leftComponent=(lut[leftidx] * (1.0-leftPartial));
double rightComponent = (lut[leftidx+1] * leftPartial);
double sample = leftComponent + rightComponent;
return (long) sample;
} }
} }

View File

@ -11,7 +11,7 @@ public class LevyTest {
@Test @Test
public void testLevy() { public void testLevy() {
Levy levy = new Levy(2.3d, 1.0d); Levy levy = new Levy(2.3d, 1.0d);
assertThat(levy.applyAsDouble(10L)).isCloseTo(2.938521849905433, Offset.offset(0.000001d)); assertThat(levy.applyAsDouble(10L)).isCloseTo(2.9379325000660304, Offset.offset(0.000001d));
} }
} }

View File

@ -17,7 +17,7 @@ public class RealDistributionsValuesTest {
@Test @Test
public void testComputedNormal() { public void testComputedNormal() {
RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000); RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000,1);
System.out.println(runData); System.out.println(runData);
assertThat(runData.getFractionalPercentile(0.5D)) assertThat(runData.getFractionalPercentile(0.5D))
.isCloseTo(10.0D, Offset.offset(0.01D)); .isCloseTo(10.0D, Offset.offset(0.01D));
@ -29,7 +29,7 @@ public class RealDistributionsValuesTest {
@Test @Test
public void testInterpolatedNormal() { public void testInterpolatedNormal() {
RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000); RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000,1);
System.out.println(runData); System.out.println(runData);
assertThat(runData.getFractionalPercentile(0.5D)) assertThat(runData.getFractionalPercentile(0.5D))
.isCloseTo(10.0D, Offset.offset(0.01D)); .isCloseTo(10.0D, Offset.offset(0.01D));
@ -41,7 +41,7 @@ public class RealDistributionsValuesTest {
@Test @Test
public void testComputedUniform() { public void testComputedUniform() {
RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000); RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000,1);
assertThat(runData.getFractionalPercentile(0.33D)) assertThat(runData.getFractionalPercentile(0.33D))
.isCloseTo(33.33D, Offset.offset(1.0D)); .isCloseTo(33.33D, Offset.offset(1.0D));
assertThat(runData.getFractionalPercentile(0.5D)) assertThat(runData.getFractionalPercentile(0.5D))
@ -53,7 +53,7 @@ public class RealDistributionsValuesTest {
@Test @Test
public void testInterpolatedUniform() { public void testInterpolatedUniform() {
RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000); RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000,1);
assertThat(runData.getFractionalPercentile(0.33D)) assertThat(runData.getFractionalPercentile(0.33D))
.isCloseTo(33.33D, Offset.offset(1.0D)); .isCloseTo(33.33D, Offset.offset(1.0D));
assertThat(runData.getFractionalPercentile(0.5D)) assertThat(runData.getFractionalPercentile(0.5D))
@ -66,22 +66,27 @@ public class RealDistributionsValuesTest {
@Test @Test
public void testInterpolatedMappedUniform() { public void testInterpolatedMappedUniform() {
Uniform mapper = new Uniform(0.0, 100.0, "map", "interpolate"); Uniform mapper = new Uniform(0.0, 100.0, "map", "interpolate");
RunData runData = iterateMapperDouble(mapper,10000000); RunData runData = iterateMapperDouble(mapper,10000000,Long.MAX_VALUE/10000000L);
assertThat(runData.getFractionalPercentile(0.001D))
.isCloseTo(0.0D, Offset.offset(1.0D));
assertThat(runData.getFractionalPercentile(0.999D)) assertThat(runData.getFractionalPercentile(0.999D))
.isCloseTo(0.0D, Offset.offset(1.0D)); .isCloseTo(099.99D, Offset.offset(1.0D));
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0D, Offset.offset(1.0D)); assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0D, Offset.offset(1.0D));
System.out.println(runData);
} }
private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations) { private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations, long funcstep) {
assertThat(mapper).isNotNull(); assertThat(mapper).isNotNull();
double[] samples = new double[iterations]; double[] samples = new double[iterations];
long time_generating = System.nanoTime(); long time_generating = System.nanoTime();
for (int i = 0; i < iterations; i++) { for (int i = 0; i < iterations; i++) {
samples[i] = mapper.applyAsDouble(i); samples[i] = mapper.applyAsDouble(i*funcstep);
} }
long time_generated = System.nanoTime(); long time_generated = System.nanoTime();

View File

@ -46,7 +46,7 @@ public class IntegerDistributionsConcurrencyTest {
// threshold test against CDF // threshold test against CDF
expected = mapper.get((long) (0.03515d * (double) Long.MAX_VALUE)); expected = mapper.get((long) (0.03515d * (double) Long.MAX_VALUE));
assertThat(expected).isEqualTo(1); assertThat(expected).isEqualTo(1);
expected = mapper.get((long) (0.03600d * (double) Long.MAX_VALUE)); expected = mapper.get((long) (0.03700d * (double) Long.MAX_VALUE));
assertThat(expected).isEqualTo(2); assertThat(expected).isEqualTo(2);
} }

View File

@ -16,6 +16,7 @@ import static org.assertj.core.api.Assertions.assertThat;
public class IntegerDistributionsValuesTest { public class IntegerDistributionsValuesTest {
@Disabled @Disabled
@Test @Test
public void testComputedZipf() { public void testComputedZipf() {
@ -70,13 +71,19 @@ public class IntegerDistributionsValuesTest {
System.out.println(runData); System.out.println(runData);
} }
@Test
public void testMaximumValue() {
Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
}
@Test @Test
public void testInterpolatedMappedUniform() { public void testInterpolatedMappedUniform() {
Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map"); Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
RunData runData = iterateMapperDouble(mapper,10000000); RunData runData = iterateMapperDouble(mapper,10000000);
assertThat(runData.getFractionalPercentile(0.999D)) assertThat(runData.getFractionalPercentile(0.999D))
.isCloseTo(0.0D, Offset.offset(1.0D)); .isCloseTo(0.0D, Offset.offset(1.0D));
assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.0001D)); assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
} }
@ -101,7 +108,11 @@ public class IntegerDistributionsValuesTest {
double[] samples = new double[iterations]; double[] samples = new double[iterations];
long time_generating = System.nanoTime(); long time_generating = System.nanoTime();
int readout = iterations/10;
for (int i = 0; i < iterations; i++) { for (int i = 0; i < iterations; i++) {
if ((i%readout)==0) {
System.out.println("i="+i+"/"+iterations);
}
samples[i] = mapper.applyAsDouble(i); samples[i] = mapper.applyAsDouble(i);
} }
long time_generated = System.nanoTime(); long time_generated = System.nanoTime();

View File

@ -69,23 +69,24 @@ little loss in accuracy, but the difference is generally negligible for nearly a
#### Infinite or Finite #### Infinite or Finite
For interpolated samples, you also have the option of including or excluding infinite values For interpolated samples from continuous distributions, you also have the option of including or
which may occur in some distributions. If you want to include them, use `infinite`, or `finite` excluding infinite values which may occur in some distributions. If you want to include them,
to explicitly avoid them (the default). Specifying 'infinite' doesn't guarantee that you will use `infinite`, or `finite` to explicitly avoid them (the default). Specifying 'infinite'
see +Infinity or -Infinity, only that they are allowed. The Normal distribution often contains doesn't guarantee that you will see +Infinity or -Infinity, only that they are allowed. The
-Infinity and +Infinity, for example, due to the function used to estimate its cumulative Normal distribution often contains -Infinity and +Infinity, for example, due to the function
distribution. These values can often be valuable in finding corner cases which should be treated used to estimate its cumulative distribution. These values can often be valuable in finding
uniformly according to [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754). corner cases which should be treated uniformly according to
[IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).
#### Clamp or Noclamp #### Clamp or Noclamp
For interpolated samples, you also have the option of clamping the allowed values to the valid For interpolated samples from continuous distributions, you also have the option of clamping the
range for the integral data type used as input. To clamp the output values to the range allowed values to the valid range for the integral data type used as input. To clamp the output
(Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.MIN_VALUE,Integer. values to the range (Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.
MAX_VALUE) for int-double functions, specify `clamp`, which is also the default. To explicitly MIN_VALUE,Integer.MAX_VALUE) for int-double functions, specify `clamp`, which is also the default.
disable this, use `noclamp`. This is useful when you know the downstream functions will only To explicitly disable this, use `noclamp`. This is useful when you know the downstream functions
work with a certain range of values without truncating conversions. When you are using double will only work with a certain range of values without truncating conversions. When you are using
values natively on the downstream functions, use `noclamp` to avoid limiting the domain of double values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
values in your test data. (In this case, you might also consider `infinite`). values in your test data. (In this case, you might also consider `infinite`).
### Computed Samples ### Computed Samples

View File

@ -25,8 +25,8 @@ public class IntegratedCurvesTest {
assertThat(runData.getStats().getPercentile(1.0d)).isCloseTo(1.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(1.0d)).isCloseTo(1.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(10.0d)).isCloseTo(1.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(10.0d)).isCloseTo(1.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(90.0d)).isCloseTo(6.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(90.0d)).isCloseTo(6.0, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(61.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(55.989, Offset.offset(0.01d));
assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(311.0, Offset.offset(0.01d)); assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(202.999, Offset.offset(0.01d));
} }