refactor/improve stat samplers for efficiency and correctness

2025-02-25 18:55:28 -06:00 · 2021-11-18 16:35:20 -06:00 · 2021-11-18 16:35:20 -06:00 · ff142fd0b2
commit ff142fd0b2
parent d31fbd45fe
20 changed files with 336 additions and 205 deletions
--- a/devdocs/sketches/stat_samplers.md
+++ b/devdocs/sketches/stat_samplers.md
@ -0,0 +1,65 @@
 This diagram shows the base implementations of all the statistical sampler
 wrappers, the types they implement, and the helper functions which are key
 to their operation.
 ```plantuml
 digraph samplers {
 rankdir=LR;
 node[shape=box];
 subgraph cluster0 {
  label="continuous"
   subgraph cluster3 {
    label="int->double"
    IntToDoubleContinuousCurve[shape=box]
    IntToDoubleContinuousCurve -> IntToDoubleFunction[style=dashed]
    IntToDoubleContinuousCurve -> InterpolatingIntDoubleSampler
    IntToDoubleContinuousCurve -> RealIntDoubleSampler
   }
   subgraph cluster4 {
    label="long->double"
    LongToDoubleContinuousCurve[shape=box]
    LongToDoubleContinuousCurve -> LongToDoubleFunction[style=dashed]
    LongToDoubleContinuousCurve -> InterpolatingLongDoubleSampler
    LongToDoubleContinuousCurve -> RealLongDoubleSampler
   }
 }
 subgraph cluster1 {
  label="discrete"
  subgraph cluster5 {
   label="int->int"
   IntToIntDiscreteCurve[shape=box]
   IntToIntDiscreteCurve -> IntUnaryOperator[style=dashed]
   IntToIntDiscreteCurve -> InterpolatingIntIntSampler
   IntToIntDiscreteCurve -> DiscreteIntIntSampler
  }
  subgraph cluster6 {
   label="int->long"
   IntToLongDiscreteCurve[shape=box]
   IntToLongDiscreteCurve -> IntToLongFunction[style=dashed]
   IntToLongDiscreteCurve -> InterpolatingIntLongSampler
   IntToLongDiscreteCurve -> DiscreteIntLongSampler
  }
  subgraph cluster7 {
   label="long->int"
   LongToIntDiscreteCurve[shape=box]
   LongToIntDiscreteCurve -> LongToIntFunction[style=dashed]
   LongToIntDiscreteCurve ->InterpolatingLongIntSampler
   LongToIntDiscreteCurve ->DiscreteLongIntSampler
  }
  subgraph cluster8 {
   label="long->long"
   LongToLongDiscreteCurve[shape=box]
   LongToLongDiscreteCurve -> LongUnaryOperator[style=dashed]
   LongToLongDiscreteCurve ->InterpolatingLongLongSampler
   LongToLongDiscreteCurve ->DiscreteLongLongSampler
  }
 }
 }
 ```
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java
@ -8,36 +8,76 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
 import java.util.function.LongToDoubleFunction;
 /**
 * Return a value along an interpolation curve. This allows you to sketch a basic
 * density curve and describe it simply with just a few values. The number of values
 * provided determines the resolution of the internal lookup table that is used for
 * interpolation. The first value is always the 0.0 anchoring point on the unit interval.
 * The last value is always the 1.0 anchoring point on the unit interval. This means
 * that in order to subdivide the density curve in an interesting way, you need to provide
 * a few more values in between them. Providing two values simply provides a uniform
 * sample between a minimum and maximum value.
 *
 * The input range of this function is, as many of the other functions in this library,
 * based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
 * This means that if you want to combine interpolation on this curve with the effect of
 * pseudo-random sampling, you need to put a hash function ahead of it in the flow.
 *
 * Developer Note: This is the canonical implementation of LERPing in NoSQLBench, so is
 * heavily documented. Any other LERP implementations should borrow directly from this,
 * embedding by default.
 */
@ThreadSafeMapper
@Categories({Category.general})
 public class Interpolate implements LongToDoubleFunction {
-    private final double scale;
+    // How many values we have to pick from
    private final double resolution;
    // The lookup table
    private final double[] lut;
-    private final static double maxLongAsDouble = (double) Long.MAX_VALUE;
+
    /**
     * The scale of Long.MAX_VALUE and the unit interval scale factor are pre-combined
     * here to reduce the number of operations later.
     *
     * The LUT size is retained as the number of elements provided (resolution) + 1.
     * The +1 element serves as the N+1 index for when the unit interval sample is
     * 1.0. In other words, the maximum value is not a special case, as a duplicate
     * value is appended to the LUT instead.
     *
     * This size is the scale factor from the unit interval to the array index. Since
     * the input comes in as a long value, it is mapped from [0L, Long.MAX_VALUE] to
     * [0.0D, 1.0D] by multiplying by (1.0/(double)Long.MAX_VALUE). The long input
     * value can then be multiplied directly to yield a double in the range of
     * [0,LUT.length-1], which simplifies all remaining LERP math.
     *
     */
    private final double scaleToLongInterval;
-    @Example({"Interpolate(0.0d,100.0d)","return a uniform double value between 0.0d and 100.0d"})
+    @Example({"Interpolate(0.0d,100.0d)", "return a uniform double value between 0.0d and 100.0d"})
-    @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
+    @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)", "return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
    public Interpolate(double... values) {
-        double[] doubles = new double[values.length+1];
+        this.resolution = values.length;
-        for (int i = 0; i < values.length; i++) { // not a ranging error
+        double[] doubles = new double[values.length + 1];
-            doubles[i]=values[i];
+        System.arraycopy(values,0,doubles,0,values.length);
-        }
+        doubles[doubles.length - 1] = doubles[doubles.length - 2];
        doubles[doubles.length-1]=doubles[doubles.length-2];
        this.scale=values.length-1;
        this.lut = doubles;
        this.scaleToLongInterval = (this.resolution - 1) * (1.0d / (double) Long.MAX_VALUE);
    }
    @Override
    public double applyAsDouble(long input) {
-        long value = input;
+        // scale the input from [0,Long.MAX_VALUE] to [0.0,lut.length-1]
-        double samplePoint = ((double)input / maxLongAsDouble) * scale;
+        double samplePoint = scaleToLongInterval * input;
-        int leftidx = (int)samplePoint;
+        // truncate the sample point to the left index
-        double fractional = samplePoint - (long)samplePoint;
+        int leftidx = (int) samplePoint;
-        double leftComponent = lut[leftidx]* (1.0d-fractional);
+        // isolate the fractional component
-        double rightComponent = lut[leftidx+1] * fractional;
+        double fractional = samplePoint - leftidx;
-        double sample = (leftComponent + rightComponent);
+        // take the sum of the left component and right component
        // scaled by closeness to fractional point within the interval, respectively
        double sample = (lut[leftidx] * (1.0d - fractional)) + (lut[leftidx + 1] * fractional);
        return sample;
    }
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java
@ -6,25 +6,10 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
 import java.util.function.LongUnaryOperator;
 /**
 * Return a value along an interpolation curve. This allows you to sketch a basic
 * density curve and describe it simply with just a few values. The number of values
 * provided determines the resolution of the internal lookup table that is used for
 * interpolation. The first value is always the 0.0 anchoring point on the unit interval.
 * The last value is always the 1.0 anchoring point on the unit interval. This means
 * that in order to subdivide the density curve in an interesting way, you need to provide
 * a few more values in between them. Providing two values simply provides a uniform
 * sample between a minimum and maximum value.
 *
 * The input range of this function is, as many of the other functions in this library,
 * based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
 * This means that if you want to combine interpolation on this curve with the effect of
 * pseudo-random sampling, you need to put a hash function ahead of it in the flow.
 */
@ThreadSafeMapper
 public class Interpolate implements LongUnaryOperator {
-    private io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;
+    private final io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;
    @Example({"Interpolate(0.0d,100.0d)","return a uniform long value between 0L and 100L"})
    @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted long value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
--- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java
@ -1,11 +1,32 @@
 package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
 import org.assertj.core.data.Offset;
 import org.junit.jupiter.api.Test;
 import static org.assertj.core.api.Assertions.assertThat;
 public class InterpolateTest {
    @Test
    public void testRanging() {
        io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate interpolate =
            new io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate (0.0d, 1.0d);
        Hash hf = new Hash();
        DescriptiveStatistics dss = new DescriptiveStatistics();
        long count=10000000;
        for (long i = 0; i < count; i++) {
            long input = (long) (Long.MAX_VALUE * ((double)i/(double)count));
            long prn = hf.applyAsLong(input);
            double v = interpolate.applyAsDouble(prn);
            dss.addValue(v);
        }
        assertThat(dss.getPercentile(0.000001)).isCloseTo(0.0, Offset.offset(0.01));
        assertThat(dss.getPercentile(99.99999)).isCloseTo(1.0, Offset.offset(0.01));
    }
    @Test
    public void testDeciles() {
        long topvalue = 1_000_000_000L;
@ -26,6 +47,7 @@ public class InterpolateTest {
        long highvalue = (long) (Long.MAX_VALUE * 0.98d);
        long high = f.applyAsLong(highvalue);
        assertThat(high).isEqualTo(expected);
        System.out.println(" -> was " + high);
        long highervalue = (long) (Long.MAX_VALUE * 0.9999d);
        long higher = f.applyAsLong(highervalue);
@ -35,4 +57,4 @@ public class InterpolateTest {
        assertThat(max).isEqualTo(1000000000L);
    }
-}
+}
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java
@ -1,6 +1,6 @@
 package io.nosqlbench.virtdata.library.curves4.continuous.common;
-import io.nosqlbench.virtdata.library.curves4.discrete.common.ThreadSafeHash;
+import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
 import java.util.Arrays;
 import java.util.function.DoubleUnaryOperator;
@ -10,11 +10,11 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
    private final double[] lut;
    private final DoubleUnaryOperator f;
    private final int resolution;
    private final boolean clamp;
    private final double clampMin;
    private final double clampMax;
-    private ThreadSafeHash hash;
+    private final double scaleToIntRanged;
    private Hash hash;
    public InterpolatingIntDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
        this.f = icdSource;
@ -22,47 +22,43 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
        this.clampMin = clampMin;
        this.clampMax = clampMax;
        if (hash) {
-            this.hash = new ThreadSafeHash();
+            this.hash = new Hash();
        }
-        double[] lut = precompute(resolution);
+        double[] computed = precompute(resolution);
        if (finite) {
-            while (lut.length>0 && Double.isInfinite(lut[0])) {
+            while (computed.length>0 && Double.isInfinite(computed[0])) {
-                lut = Arrays.copyOfRange(lut,1,lut.length-1);
+                computed = Arrays.copyOfRange(computed,1,computed.length-1);
            }
-            while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) {
+            while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
-                lut = Arrays.copyOfRange(lut,0,lut.length-2);
+                computed = Arrays.copyOfRange(computed,0,computed.length-2);
            }
        }
-        this.lut = lut;
+        double[] padded = new double[computed.length+1];
-        this.resolution=lut.length-1;
+        System.arraycopy(computed,0,padded,0,computed.length);
        this.scaleToIntRanged = (1.0d/(double)Integer.MAX_VALUE) * ((padded.length-2));
        this.lut = padded;
    }
    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+1];
+        double[] precomputed = new double[resolution];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
-            double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit);
+            double sampleValue = f.applyAsDouble(rangedToUnit);
            sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue;
            precomputed[s] =  sampleValue;
        }
        precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
        return precomputed;
    }
    @Override
    public double applyAsDouble(int input) {
        long value = input;
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsInt(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
+        double samplePoint = scaleToIntRanged * input;
-        double samplePoint = unit * resolution;
+        int leftidx = (int)samplePoint;
-        int leftidx = (int) samplePoint;
+        double fractional = samplePoint - leftidx;
-        double leftPartial = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
        double rightComponent = (lut[leftidx+1] * leftPartial);
        double sample = leftComponent + rightComponent;
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java
@ -6,14 +6,30 @@ import java.util.Arrays;
 import java.util.function.DoubleUnaryOperator;
 import java.util.function.LongToDoubleFunction;
 /**
 * See {@link io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate} for
 * details on implementation.
 *
 * For the 6 implementations of interpolating samplers which use inverse cumulative distribution tables,
 * care should be given to the following:
 * <UL>
 *     <LI>Input Ranging - ensure that the input type is appropriate for the curve; pre-scaling needs to be matched
 *     to the input type</LI>
 *     <LI>resolution, scale, and LUT length; T</LI>
 *     <LI>+1 LUT padding for U=1.0</LI>
 *     <LI>Uniform LERP code in main function</LI>
 * </UL>>
 */
 public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
    private static final double MAX_LONG_AS_DOUBLE = Long.MAX_VALUE;
    private final double[] lut;
    private final DoubleUnaryOperator f;
    private final int resolution;
    private final boolean clamp;
    private final double clampMin;
    private final double clampMax;
    private final double scaleToLong;
    private ThreadSafeHash hash;
    public InterpolatingLongDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
@ -24,44 +40,41 @@ public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
        this.clamp=clamp;
        this.clampMin=clampMin;
        this.clampMax=clampMax;
-        double[] lut = precompute(resolution);
+        double[] computed = precompute(resolution);
        if (finite) {
-            while (lut.length>0 && Double.isInfinite(lut[0])) {
+            while (computed.length>0 && Double.isInfinite(computed[0])) {
-                lut = Arrays.copyOfRange(lut,1,lut.length-1);
+                computed = Arrays.copyOfRange(computed,1,computed.length-1);
            }
-            while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) {
+            while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
-                lut = Arrays.copyOfRange(lut,0,lut.length-2);
+                computed = Arrays.copyOfRange(computed,0,computed.length-2);
            }
        }
-        this.lut = lut;
+        double[] padded = new double[computed.length+1];
-        this.resolution = lut.length-1;
+        System.arraycopy(computed,0,padded,0,computed.length);
        this.scaleToLong = (1.0d / (double) Long.MAX_VALUE) * (padded.length-2);
        this.lut = padded;
    }
    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+1];
+        double[] precomputed = new double[resolution];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
-            double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit);
+            double sampleValue = f.applyAsDouble(rangedToUnit);
            sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue ;
            precomputed[s] =  sampleValue;
        }
        precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
        return precomputed;
    }
    @Override
-    public double applyAsDouble(long value) {
+    public double applyAsDouble(long input) {
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsLong(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
+        double samplePoint = scaleToLong * input;
-        double samplePoint = unit * resolution;
+        int leftidx = (int)samplePoint;
-        int leftidx = (int) samplePoint;
+        double fractional = samplePoint - leftidx;
-        double leftPartial = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
        double rightComponent = (lut[leftidx+1] * leftPartial);
        double sample = leftComponent + rightComponent;
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java
@ -10,15 +10,17 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
    private final DoubleUnaryOperator f;
    private final boolean clamp;
    private final double clampMax;
    private final double clampMin;
    private ThreadSafeHash hash;
-    public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) {
+    public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
        this.f = parentFunc;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.clamp = clamp;
+        this.clamp = clamp | finite;
-        this.clampMax = clampMax;
+        this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
        this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
    }
    @Override
@ -28,7 +30,7 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
            value = hash.applyAsLong(value);
        }
        double unit = (double) value / (double) Long.MAX_VALUE;
-        double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit);
+        double sample =clamp ? Double.max(Double.min(clampMax,f.applyAsDouble(unit)),clampMin): f.applyAsDouble(unit);
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java
@ -10,24 +10,26 @@ public class RealLongDoubleSampler implements LongToDoubleFunction {
    private final DoubleUnaryOperator f;
    private final boolean clamp;
    private final double clampMax;
    private final double clampMin;
    private ThreadSafeHash hash;
-    public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) {
+    public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
        this.f = parentFunc;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.clamp = clamp;
+        this.clamp = clamp | finite;
-        this.clampMax=clampMax;
+        this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
        this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
    }
    @Override
    public double applyAsDouble(long value) {
-        if (hash!=null) {
+        if (hash != null) {
            value = hash.applyAsLong(value);
        }
        double unit = (double) value / (double) Long.MAX_VALUE;
-        double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit);
+        double sample = clamp ? Double.max(clampMin, Double.min(clampMax, f.applyAsDouble(unit))) : f.applyAsDouble(unit);
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java
@ -109,7 +109,6 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
            throw new RuntimeException("mods must not contain both "+ INFINITE +" and "+FINITE+".");
        }
        for (String s : modslist) {
            if (!validModifiers.contains(s)) {
                throw new RuntimeException("modifier '" + s + "' is not a valid modifier. Use one of " + validModifiers + " instead.");
@ -122,9 +121,9 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
        boolean finite = ( mods.contains(FINITE) || !mods.contains(INFINITE));
        function = interpolate ?
-                new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Long.MAX_VALUE, finite)
+                new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, finite)
                :
-                new RealIntDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE);
+                new RealIntDoubleSampler(icdSource, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, true);
    }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java
@ -124,7 +124,7 @@ public class LongToDoubleContinuousCurve implements LongToDoubleFunction {
        function = interpolate ?
                new InterpolatingLongDoubleSampler(icdSource, 1000, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, finite)
                :
-                new RealLongDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE);
+                new RealLongDoubleSampler(icdSource, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, true);
    }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java
@ -1,5 +1,7 @@
 package io.nosqlbench.virtdata.library.curves4.discrete.common;
 import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
 import java.util.function.DoubleToIntFunction;
 import java.util.function.IntUnaryOperator;
@ -7,46 +9,42 @@ public class InterpolatingIntIntSampler implements IntUnaryOperator {
    private final double[] lut;
    private final DoubleToIntFunction f;
-    private int resolution;
+    private Hash hash;
-    private ThreadSafeHash hash;
+    private final double scaleToIntRanged;
    public InterpolatingIntIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
        this.resolution = resolution;
        if (hash) {
-            this.hash = new ThreadSafeHash();
+            this.hash = new Hash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
        double[] padded = new double[computed.length+1];
        System.arraycopy(computed,0,padded,0,computed.length);
        padded[padded.length-1] = padded[padded.length-2];
        scaleToIntRanged = (1.0d/Integer.MAX_VALUE)*(padded.length-2);
        this.lut=padded;
    }
-    private double[] precompute() {
+    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+2];
+        double[] precomputed = new double[resolution];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }
    @Override
    public int applyAsInt(int input) {
        int value = input;
        if (hash!=null) {
-            value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE);
+            input = hash.applyAsInt(input);
        }
-
+        double samplePoint = scaleToIntRanged * input;
-        double unit = (double) value / (double) Integer.MAX_VALUE;
+        int leftidx = (int)samplePoint;
-        double samplePoint = unit * resolution;
+        double fractional = samplePoint - leftidx;
-        int leftidx = (int) samplePoint;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        double leftPartial = samplePoint - leftidx;
        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
        double rightComponent = (lut[leftidx+1] * leftPartial);
        double sample = leftComponent + rightComponent;
        return (int) sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java
@ -1,5 +1,7 @@
 package io.nosqlbench.virtdata.library.curves4.discrete.common;
 import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
 import java.util.function.DoubleToIntFunction;
 import java.util.function.IntToLongFunction;
@ -7,46 +9,40 @@ public class InterpolatingIntLongSampler implements IntToLongFunction {
    private final double[] lut;
    private final DoubleToIntFunction f;
-    private int resolution;
+    private Hash hash;
-    private ThreadSafeHash hash;
+    private final double scaleToIntRanged;
    public InterpolatingIntLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
        this.resolution = resolution;
        if (hash) {
-            this.hash = new ThreadSafeHash();
+            this.hash = new Hash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
        double[] padded = new double[computed.length+1];
        System.arraycopy(computed,0,padded,0,computed.length);
        this.scaleToIntRanged = (1.0d / Integer.MAX_VALUE) * (padded.length-2);
        this.lut=padded;
    }
-    private double[] precompute() {
+    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+2];
+        double[] precomputed = new double[resolution];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }
    @Override
    public long applyAsLong(int input) {
        int value = input;
        if (hash!=null) {
-            value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE);
+            input = hash.applyAsInt(input);
        }
-
+        double samplePoint = scaleToIntRanged * input;
-        double unit = (double) value / (double) Integer.MAX_VALUE;
+        int leftidx = (int)samplePoint;
-        double samplePoint = unit * resolution;
+        double fractional = samplePoint - leftidx;
-        int leftidx = (int) samplePoint;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        double leftPartial = samplePoint - leftidx;
        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
        double rightComponent = (lut[leftidx+1] * leftPartial);
        double sample = leftComponent + rightComponent;
        return (long) sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java
@ -7,43 +7,41 @@ public class InterpolatingLongIntSampler implements LongToIntFunction {
    private final double[] lut;
    private final DoubleToIntFunction f;
    private int resolution;
    private ThreadSafeHash hash;
    private final double scaleToLong;
    public InterpolatingLongIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
        this.resolution = resolution;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
        double[] padded = new double[computed.length+1];
        System.arraycopy(computed,0,padded,0,computed.length);
        padded[padded.length-1] = padded[padded.length-2];
        scaleToLong=(1.0d/Long.MAX_VALUE) * (padded.length-2);
        this.lut=padded;
    }
-    private double[] precompute() {
+    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+2];
+        double[] precomputed = new double[resolution];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }
    @Override
-    public int applyAsInt(long value) {
+    public int applyAsInt(long input) {
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsLong(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
+        double samplePoint = scaleToLong * input;
-        double samplePoint = unit * resolution;
+        int leftidx = (int)samplePoint;
-        int leftidx = (int) samplePoint;
+        double fractional = samplePoint - leftidx;
-        double leftPartial = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
-
+        return (int)sample;
        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
        double rightComponent = (lut[leftidx+1] * leftPartial);
        double sample = leftComponent + rightComponent;
        return (int) sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java
@ -7,43 +7,41 @@ public class InterpolatingLongLongSampler implements LongUnaryOperator {
    private final double[] lut;
    private final DoubleToIntFunction f;
    private int resolution;
    private ThreadSafeHash hash;
    private final double scaleToLong;
    public InterpolatingLongLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
        this.resolution = resolution;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
        double[] padded = new double[computed.length+1];
        System.arraycopy(computed,0,padded,0,computed.length);
        padded[padded.length-1] = padded[padded.length-2];
        scaleToLong = (1.0d/Long.MAX_VALUE) * ((double)(padded.length-2));
        this.lut = padded;
    }
-    private double[] precompute() {
+    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+2];
+        double[] precomputed = new double[resolution];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }
    @Override
-    public long applyAsLong(long value) {
+    public long applyAsLong(long input) {
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsLong(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
+        double samplePoint = scaleToLong * input;
-        double samplePoint = unit * resolution;
+        int leftidx = (int)samplePoint;
-        int leftidx = (int) samplePoint;
+        double fractional = samplePoint - leftidx;
-        double leftPartial = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
-
+        return (long)sample;
        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
        double rightComponent = (lut[leftidx+1] * leftPartial);
        double sample = leftComponent + rightComponent;
        return (long) sample;
    }
 }
--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java
@ -11,7 +11,7 @@ public class LevyTest {
    @Test
    public void testLevy() {
        Levy levy = new Levy(2.3d, 1.0d);
-        assertThat(levy.applyAsDouble(10L)).isCloseTo(2.938521849905433, Offset.offset(0.000001d));
+        assertThat(levy.applyAsDouble(10L)).isCloseTo(2.9379325000660304, Offset.offset(0.000001d));
    }
 }
--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java
@ -17,7 +17,7 @@ public class RealDistributionsValuesTest {
    @Test
    public void testComputedNormal() {
-        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000);
+        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000,1);
        System.out.println(runData);
        assertThat(runData.getFractionalPercentile(0.5D))
                .isCloseTo(10.0D, Offset.offset(0.01D));
@ -29,7 +29,7 @@ public class RealDistributionsValuesTest {
    @Test
    public void testInterpolatedNormal() {
-        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000);
+        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000,1);
        System.out.println(runData);
        assertThat(runData.getFractionalPercentile(0.5D))
                .isCloseTo(10.0D, Offset.offset(0.01D));
@ -41,7 +41,7 @@ public class RealDistributionsValuesTest {
    @Test
    public void testComputedUniform() {
-        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000);
+        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000,1);
        assertThat(runData.getFractionalPercentile(0.33D))
                .isCloseTo(33.33D, Offset.offset(1.0D));
        assertThat(runData.getFractionalPercentile(0.5D))
@ -53,7 +53,7 @@ public class RealDistributionsValuesTest {
    @Test
    public void testInterpolatedUniform() {
-        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000);
+        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000,1);
        assertThat(runData.getFractionalPercentile(0.33D))
                .isCloseTo(33.33D, Offset.offset(1.0D));
        assertThat(runData.getFractionalPercentile(0.5D))
@ -66,22 +66,27 @@ public class RealDistributionsValuesTest {
    @Test
    public void testInterpolatedMappedUniform() {
        Uniform mapper = new Uniform(0.0, 100.0, "map", "interpolate");
-        RunData runData = iterateMapperDouble(mapper,10000000);
+        RunData runData = iterateMapperDouble(mapper,10000000,Long.MAX_VALUE/10000000L);
        assertThat(runData.getFractionalPercentile(0.001D))
            .isCloseTo(0.0D, Offset.offset(1.0D));
        assertThat(runData.getFractionalPercentile(0.999D))
-                .isCloseTo(0.0D, Offset.offset(1.0D));
+                .isCloseTo(099.99D, Offset.offset(1.0D));
        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0D, Offset.offset(1.0D));
        System.out.println(runData);
    }
-    private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations) {
+    private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations, long funcstep) {
        assertThat(mapper).isNotNull();
        double[] samples = new double[iterations];
        long time_generating = System.nanoTime();
        for (int i = 0; i < iterations; i++) {
-            samples[i] = mapper.applyAsDouble(i);
+            samples[i] = mapper.applyAsDouble(i*funcstep);
        }
        long time_generated = System.nanoTime();
--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java
@ -46,7 +46,7 @@ public class IntegerDistributionsConcurrencyTest {
        // threshold test against CDF
        expected = mapper.get((long) (0.03515d * (double) Long.MAX_VALUE));
        assertThat(expected).isEqualTo(1);
-        expected = mapper.get((long) (0.03600d * (double) Long.MAX_VALUE));
+        expected = mapper.get((long) (0.03700d * (double) Long.MAX_VALUE));
        assertThat(expected).isEqualTo(2);
    }
--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java
@ -16,6 +16,7 @@ import static org.assertj.core.api.Assertions.assertThat;
 public class IntegerDistributionsValuesTest {
    @Disabled
    @Test
    public void testComputedZipf() {
@ -70,13 +71,19 @@ public class IntegerDistributionsValuesTest {
        System.out.println(runData);
    }
    @Test
    public void testMaximumValue() {
        Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
    }
    @Test
    public void testInterpolatedMappedUniform() {
        Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
        RunData runData = iterateMapperDouble(mapper,10000000);
        assertThat(runData.getFractionalPercentile(0.999D))
                .isCloseTo(0.0D, Offset.offset(1.0D));
-        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.0001D));
+        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
    }
@ -101,7 +108,11 @@ public class IntegerDistributionsValuesTest {
        double[] samples = new double[iterations];
        long time_generating = System.nanoTime();
        int readout = iterations/10;
        for (int i = 0; i < iterations; i++) {
            if ((i%readout)==0) {
                System.out.println("i="+i+"/"+iterations);
            }
            samples[i] = mapper.applyAsDouble(i);
        }
        long time_generated = System.nanoTime();
--- a/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md
+++ b/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md
@ -69,23 +69,24 @@ little loss in accuracy, but the difference is generally negligible for nearly a
 #### Infinite or Finite
-For interpolated samples, you also have the option of including or excluding infinite values
+For interpolated samples from continuous distributions, you also have the option of including or
-which may occur in some distributions. If you want to include them, use `infinite`, or `finite`
+excluding infinite values which may occur in some distributions. If you want to include them,
-to explicitly avoid them (the default). Specifying 'infinite' doesn't guarantee that you will
+use `infinite`, or `finite` to explicitly avoid them (the default). Specifying 'infinite'
-see +Infinity or -Infinity, only that they are allowed. The Normal distribution often contains
+doesn't guarantee that you will see +Infinity or -Infinity, only that they are allowed. The
-Infinity and +Infinity, for example, due to the function used to estimate its cumulative
+Normal distribution often contains -Infinity and +Infinity, for example, due to the function
-distribution. These values can often be valuable in finding corner cases which should be treated
+used to estimate its cumulative distribution. These values can often be valuable in finding
-uniformly according to [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).
+corner cases which should be treated uniformly according to
 [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).
 #### Clamp or Noclamp
-For interpolated samples, you also have the option of clamping the allowed values to the valid
+For interpolated samples from continuous distributions, you also have the option of clamping the
-range for the integral data type used as input. To clamp the output values to the range
+allowed values to the valid range for the integral data type used as input. To clamp the output
-(Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.MIN_VALUE,Integer.
+values to the range (Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.
-MAX_VALUE) for int-double functions, specify `clamp`, which is also the default. To explicitly
+MIN_VALUE,Integer.MAX_VALUE) for int-double functions, specify `clamp`, which is also the default.
-disable this, use `noclamp`. This is useful when you know the downstream functions will only
+To explicitly disable this, use `noclamp`. This is useful when you know the downstream functions
-work with a certain range of values without truncating conversions. When you are using double
+will only work with a certain range of values without truncating conversions. When you are using
-values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
+double values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
 values in your test data. (In this case, you might also consider `infinite`).
 ### Computed Samples
--- a/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java
+++ b/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java
@ -25,8 +25,8 @@ public class IntegratedCurvesTest {
        assertThat(runData.getStats().getPercentile(1.0d)).isCloseTo(1.0, Offset.offset(0.01d));
        assertThat(runData.getStats().getPercentile(10.0d)).isCloseTo(1.0, Offset.offset(0.01d));
        assertThat(runData.getStats().getPercentile(90.0d)).isCloseTo(6.0, Offset.offset(0.01d));
-        assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(61.0, Offset.offset(0.01d));
+        assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(55.989, Offset.offset(0.01d));
-        assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(311.0, Offset.offset(0.01d));
+        assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(202.999, Offset.offset(0.01d));
    }