refactor/improve stat samplers for efficiency and correctness

2025-02-25 18:55:28 -06:00 · 2021-11-18 16:35:20 -06:00 · 2021-11-18 16:35:20 -06:00 · ff142fd0b2
commit ff142fd0b2
parent d31fbd45fe
20 changed files with 336 additions and 205 deletions
--- a/devdocs/sketches/stat_samplers.md
+++ b/devdocs/sketches/stat_samplers.md
@ -0,0 +1,65 @@
+This diagram shows the base implementations of all the statistical sampler
+wrappers, the types they implement, and the helper functions which are key
+to their operation.
+
+
+```plantuml
+
+digraph samplers {
+ rankdir=LR;
+ node[shape=box];
+
+ subgraph cluster0 {
+  label="continuous"
+   subgraph cluster3 {
+    label="int->double"
+    IntToDoubleContinuousCurve[shape=box]
+    IntToDoubleContinuousCurve -> IntToDoubleFunction[style=dashed]
+    IntToDoubleContinuousCurve -> InterpolatingIntDoubleSampler
+    IntToDoubleContinuousCurve -> RealIntDoubleSampler
+   }
+   subgraph cluster4 {
+    label="long->double"
+    LongToDoubleContinuousCurve[shape=box]
+    LongToDoubleContinuousCurve -> LongToDoubleFunction[style=dashed]
+    LongToDoubleContinuousCurve -> InterpolatingLongDoubleSampler
+    LongToDoubleContinuousCurve -> RealLongDoubleSampler
+   }
+ }
+ subgraph cluster1 {
+  label="discrete"
+  subgraph cluster5 {
+   label="int->int"
+   IntToIntDiscreteCurve[shape=box]
+   IntToIntDiscreteCurve -> IntUnaryOperator[style=dashed]
+   IntToIntDiscreteCurve -> InterpolatingIntIntSampler
+   IntToIntDiscreteCurve -> DiscreteIntIntSampler
+  }
+
+  subgraph cluster6 {
+   label="int->long"
+   IntToLongDiscreteCurve[shape=box]
+   IntToLongDiscreteCurve -> IntToLongFunction[style=dashed]
+   IntToLongDiscreteCurve -> InterpolatingIntLongSampler
+   IntToLongDiscreteCurve -> DiscreteIntLongSampler
+  }
+
+  subgraph cluster7 {
+   label="long->int"
+   LongToIntDiscreteCurve[shape=box]
+   LongToIntDiscreteCurve -> LongToIntFunction[style=dashed]
+   LongToIntDiscreteCurve ->InterpolatingLongIntSampler
+   LongToIntDiscreteCurve ->DiscreteLongIntSampler
+  }
+
+  subgraph cluster8 {
+   label="long->long"
+   LongToLongDiscreteCurve[shape=box]
+   LongToLongDiscreteCurve -> LongUnaryOperator[style=dashed]
+   LongToLongDiscreteCurve ->InterpolatingLongLongSampler
+   LongToLongDiscreteCurve ->DiscreteLongLongSampler
+  }
+
+ }
+}
+```
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/Interpolate.java
@ -8,36 +8,76 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;

 import java.util.function.LongToDoubleFunction;

+/**
+ * Return a value along an interpolation curve. This allows you to sketch a basic
+ * density curve and describe it simply with just a few values. The number of values
+ * provided determines the resolution of the internal lookup table that is used for
+ * interpolation. The first value is always the 0.0 anchoring point on the unit interval.
+ * The last value is always the 1.0 anchoring point on the unit interval. This means
+ * that in order to subdivide the density curve in an interesting way, you need to provide
+ * a few more values in between them. Providing two values simply provides a uniform
+ * sample between a minimum and maximum value.
+ *
+ * The input range of this function is, as many of the other functions in this library,
+ * based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
+ * This means that if you want to combine interpolation on this curve with the effect of
+ * pseudo-random sampling, you need to put a hash function ahead of it in the flow.
+ *
+ * Developer Note: This is the canonical implementation of LERPing in NoSQLBench, so is
+ * heavily documented. Any other LERP implementations should borrow directly from this,
+ * embedding by default.
+ */
@ThreadSafeMapper
@Categories({Category.general})
 public class Interpolate implements LongToDoubleFunction {

-    private final double scale;
+    // How many values we have to pick from
+    private final double resolution;
+
+    // The lookup table
    private final double[] lut;
-    private final static double maxLongAsDouble = (double) Long.MAX_VALUE;
+
+    /**
+     * The scale of Long.MAX_VALUE and the unit interval scale factor are pre-combined
+     * here to reduce the number of operations later.
+     *
+     * The LUT size is retained as the number of elements provided (resolution) + 1.
+     * The +1 element serves as the N+1 index for when the unit interval sample is
+     * 1.0. In other words, the maximum value is not a special case, as a duplicate
+     * value is appended to the LUT instead.
+     *
+     * This size is the scale factor from the unit interval to the array index. Since
+     * the input comes in as a long value, it is mapped from [0L, Long.MAX_VALUE] to
+     * [0.0D, 1.0D] by multiplying by (1.0/(double)Long.MAX_VALUE). The long input
+     * value can then be multiplied directly to yield a double in the range of
+     * [0,LUT.length-1], which simplifies all remaining LERP math.
+     *
+     */
+    private final double scaleToLongInterval;


-    @Example({"Interpolate(0.0d,100.0d)","return a uniform double value between 0.0d and 100.0d"})
-    @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
+    @Example({"Interpolate(0.0d,100.0d)", "return a uniform double value between 0.0d and 100.0d"})
+    @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)", "return a weighted double value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
    public Interpolate(double... values) {
-        double[] doubles = new double[values.length+1];
-        for (int i = 0; i < values.length; i++) { // not a ranging error
-            doubles[i]=values[i];
-        }
-        doubles[doubles.length-1]=doubles[doubles.length-2];
-        this.scale=values.length-1;
+        this.resolution = values.length;
+        double[] doubles = new double[values.length + 1];
+        System.arraycopy(values,0,doubles,0,values.length);
+        doubles[doubles.length - 1] = doubles[doubles.length - 2];
        this.lut = doubles;
+        this.scaleToLongInterval = (this.resolution - 1) * (1.0d / (double) Long.MAX_VALUE);
    }

    @Override
    public double applyAsDouble(long input) {
-        long value = input;
-        double samplePoint = ((double)input / maxLongAsDouble) * scale;
-        int leftidx = (int)samplePoint;
-        double fractional = samplePoint - (long)samplePoint;
-        double leftComponent = lut[leftidx]* (1.0d-fractional);
-        double rightComponent = lut[leftidx+1] * fractional;
-        double sample = (leftComponent + rightComponent);
+        // scale the input from [0,Long.MAX_VALUE] to [0.0,lut.length-1]
+        double samplePoint = scaleToLongInterval * input;
+        // truncate the sample point to the left index
+        int leftidx = (int) samplePoint;
+        // isolate the fractional component
+        double fractional = samplePoint - leftidx;
+        // take the sum of the left component and right component
+        // scaled by closeness to fractional point within the interval, respectively
+        double sample = (lut[leftidx] * (1.0d - fractional)) + (lut[leftidx + 1] * fractional);
        return sample;
    }

--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/Interpolate.java
@ -6,25 +6,10 @@ import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;

 import java.util.function.LongUnaryOperator;

-/**
- * Return a value along an interpolation curve. This allows you to sketch a basic
- * density curve and describe it simply with just a few values. The number of values
- * provided determines the resolution of the internal lookup table that is used for
- * interpolation. The first value is always the 0.0 anchoring point on the unit interval.
- * The last value is always the 1.0 anchoring point on the unit interval. This means
- * that in order to subdivide the density curve in an interesting way, you need to provide
- * a few more values in between them. Providing two values simply provides a uniform
- * sample between a minimum and maximum value.
- *
- * The input range of this function is, as many of the other functions in this library,
- * based on the valid range of positive long values, between 0L and Long.MAX_VALUE inclusive.
- * This means that if you want to combine interpolation on this curve with the effect of
- * pseudo-random sampling, you need to put a hash function ahead of it in the flow.
- */
@ThreadSafeMapper
 public class Interpolate implements LongUnaryOperator {

-    private io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;
+    private final io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate basefunc;

    @Example({"Interpolate(0.0d,100.0d)","return a uniform long value between 0L and 100L"})
    @Example({"Interpolate(0.0d,90.0d,95.0d,98.0d,100.0d)","return a weighted long value where the first second and third quartiles are 90.0D, 95.0D, and 98.0D"})
--- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/InterpolateTest.java
@ -1,11 +1,32 @@
 package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;

+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.assertj.core.data.Offset;
 import org.junit.jupiter.api.Test;

 import static org.assertj.core.api.Assertions.assertThat;

 public class InterpolateTest {

+    @Test
+    public void testRanging() {
+        io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate interpolate =
+            new io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate (0.0d, 1.0d);
+        Hash hf = new Hash();
+        DescriptiveStatistics dss = new DescriptiveStatistics();
+        long count=10000000;
+        for (long i = 0; i < count; i++) {
+            long input = (long) (Long.MAX_VALUE * ((double)i/(double)count));
+            long prn = hf.applyAsLong(input);
+            double v = interpolate.applyAsDouble(prn);
+            dss.addValue(v);
+        }
+        assertThat(dss.getPercentile(0.000001)).isCloseTo(0.0, Offset.offset(0.01));
+        assertThat(dss.getPercentile(99.99999)).isCloseTo(1.0, Offset.offset(0.01));
+    }
+
+
+
    @Test
    public void testDeciles() {
        long topvalue = 1_000_000_000L;
@ -26,6 +47,7 @@ public class InterpolateTest {
        long highvalue = (long) (Long.MAX_VALUE * 0.98d);
        long high = f.applyAsLong(highvalue);
        assertThat(high).isEqualTo(expected);
+        System.out.println(" -> was " + high);

        long highervalue = (long) (Long.MAX_VALUE * 0.9999d);
        long higher = f.applyAsLong(highervalue);
@ -35,4 +57,4 @@ public class InterpolateTest {
        assertThat(max).isEqualTo(1000000000L);

    }
-}
+}
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingIntDoubleSampler.java
@ -1,6 +1,6 @@
 package io.nosqlbench.virtdata.library.curves4.continuous.common;

-import io.nosqlbench.virtdata.library.curves4.discrete.common.ThreadSafeHash;
+import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;

 import java.util.Arrays;
 import java.util.function.DoubleUnaryOperator;
@ -10,11 +10,11 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{

    private final double[] lut;
    private final DoubleUnaryOperator f;
-    private final int resolution;
    private final boolean clamp;
    private final double clampMin;
    private final double clampMax;
-    private ThreadSafeHash hash;
+    private final double scaleToIntRanged;
+    private Hash hash;

    public InterpolatingIntDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
        this.f = icdSource;
@ -22,47 +22,43 @@ public class InterpolatingIntDoubleSampler implements IntToDoubleFunction{
        this.clampMin = clampMin;
        this.clampMax = clampMax;
        if (hash) {
-            this.hash = new ThreadSafeHash();
+            this.hash = new Hash();
        }
-        double[] lut = precompute(resolution);
+        double[] computed = precompute(resolution);
        if (finite) {
-            while (lut.length>0 && Double.isInfinite(lut[0])) {
-                lut = Arrays.copyOfRange(lut,1,lut.length-1);
+            while (computed.length>0 && Double.isInfinite(computed[0])) {
+                computed = Arrays.copyOfRange(computed,1,computed.length-1);
            }
-            while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) {
-                lut = Arrays.copyOfRange(lut,0,lut.length-2);
+            while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
+                computed = Arrays.copyOfRange(computed,0,computed.length-2);
            }
        }
-        this.lut = lut;
-        this.resolution=lut.length-1;
+        double[] padded = new double[computed.length+1];
+        System.arraycopy(computed,0,padded,0,computed.length);
+        this.scaleToIntRanged = (1.0d/(double)Integer.MAX_VALUE) * ((padded.length-2));
+        this.lut = padded;
    }

    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+1];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        double[] precomputed = new double[resolution];
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
-            double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit);
+            double sampleValue = f.applyAsDouble(rangedToUnit);
+            sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue;
            precomputed[s] =  sampleValue;
        }
-        precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
        return precomputed;
    }

    @Override
    public double applyAsDouble(int input) {
-        long value = input;
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsInt(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
-        double samplePoint = unit * resolution;
-        int leftidx = (int) samplePoint;
-        double leftPartial = samplePoint - leftidx;
-
-        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
-        double rightComponent = (lut[leftidx+1] * leftPartial);
-
-        double sample = leftComponent + rightComponent;
+        double samplePoint = scaleToIntRanged * input;
+        int leftidx = (int)samplePoint;
+        double fractional = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/InterpolatingLongDoubleSampler.java
@ -6,14 +6,30 @@ import java.util.Arrays;
 import java.util.function.DoubleUnaryOperator;
 import java.util.function.LongToDoubleFunction;

+/**
+ * See {@link io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.Interpolate} for
+ * details on implementation.
+ *
+ * For the 6 implementations of interpolating samplers which use inverse cumulative distribution tables,
+ * care should be given to the following:
+ * <UL>
+ *     <LI>Input Ranging - ensure that the input type is appropriate for the curve; pre-scaling needs to be matched
+ *     to the input type</LI>
+ *     <LI>resolution, scale, and LUT length; T</LI>
+ *     <LI>+1 LUT padding for U=1.0</LI>
+ *     <LI>Uniform LERP code in main function</LI>
+ * </UL>>
+ */
 public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {

+    private static final double MAX_LONG_AS_DOUBLE = Long.MAX_VALUE;
+
    private final double[] lut;
    private final DoubleUnaryOperator f;
-    private final int resolution;
    private final boolean clamp;
    private final double clampMin;
    private final double clampMax;
+    private final double scaleToLong;
    private ThreadSafeHash hash;

    public InterpolatingLongDoubleSampler(DoubleUnaryOperator icdSource, int resolution, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
@ -24,44 +40,41 @@ public class InterpolatingLongDoubleSampler implements LongToDoubleFunction {
        this.clamp=clamp;
        this.clampMin=clampMin;
        this.clampMax=clampMax;
-        double[] lut = precompute(resolution);
+        double[] computed = precompute(resolution);
        if (finite) {
-            while (lut.length>0 && Double.isInfinite(lut[0])) {
-                lut = Arrays.copyOfRange(lut,1,lut.length-1);
+            while (computed.length>0 && Double.isInfinite(computed[0])) {
+                computed = Arrays.copyOfRange(computed,1,computed.length-1);
            }
-            while (lut.length>0 && Double.isInfinite(lut[lut.length-1])) {
-                lut = Arrays.copyOfRange(lut,0,lut.length-2);
+            while (computed.length>0 && Double.isInfinite(computed[computed.length-1])) {
+                computed = Arrays.copyOfRange(computed,0,computed.length-2);
            }
        }
-        this.lut = lut;
-        this.resolution = lut.length-1;
+        double[] padded = new double[computed.length+1];
+        System.arraycopy(computed,0,padded,0,computed.length);
+        this.scaleToLong = (1.0d / (double) Long.MAX_VALUE) * (padded.length-2);
+        this.lut = padded;
    }

    private double[] precompute(int resolution) {
-        double[] precomputed = new double[resolution+1];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+        double[] precomputed = new double[resolution];
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
-            double sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,f.applyAsDouble(rangedToUnit))) : f.applyAsDouble(rangedToUnit);
+            double sampleValue = f.applyAsDouble(rangedToUnit);
+            sampleValue = clamp ? Double.max(clampMin,Double.min(clampMax,sampleValue)) : sampleValue ;
            precomputed[s] =  sampleValue;
        }
-        precomputed[precomputed.length-1]=precomputed[precomputed.length-2]; // only for right of max, when S==Max in the rare case
        return precomputed;
    }

    @Override
-    public double applyAsDouble(long value) {
+    public double applyAsDouble(long input) {
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsLong(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
-        double samplePoint = unit * resolution;
-        int leftidx = (int) samplePoint;
-        double leftPartial = samplePoint - leftidx;
-
-        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
-        double rightComponent = (lut[leftidx+1] * leftPartial);
-
-        double sample = leftComponent + rightComponent;
+        double samplePoint = scaleToLong * input;
+        int leftidx = (int)samplePoint;
+        double fractional = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealIntDoubleSampler.java
@ -10,15 +10,17 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
    private final DoubleUnaryOperator f;
    private final boolean clamp;
    private final double clampMax;
+    private final double clampMin;
    private ThreadSafeHash hash;

-    public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) {
+    public RealIntDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
        this.f = parentFunc;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.clamp = clamp;
-        this.clampMax = clampMax;
+        this.clamp = clamp | finite;
+        this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
+        this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
    }

    @Override
@ -28,7 +30,7 @@ public class RealIntDoubleSampler implements IntToDoubleFunction {
            value = hash.applyAsLong(value);
        }
        double unit = (double) value / (double) Long.MAX_VALUE;
-        double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit);
+        double sample =clamp ? Double.max(Double.min(clampMax,f.applyAsDouble(unit)),clampMin): f.applyAsDouble(unit);
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/common/RealLongDoubleSampler.java
@ -10,24 +10,26 @@ public class RealLongDoubleSampler implements LongToDoubleFunction {
    private final DoubleUnaryOperator f;
    private final boolean clamp;
    private final double clampMax;
+    private final double clampMin;
    private ThreadSafeHash hash;

-    public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMax) {
+    public RealLongDoubleSampler(DoubleUnaryOperator parentFunc, boolean hash, boolean clamp, double clampMin, double clampMax, boolean finite) {
        this.f = parentFunc;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.clamp = clamp;
-        this.clampMax=clampMax;
+        this.clamp = clamp | finite;
+        this.clampMin = Double.max(clampMin,Double.MIN_VALUE);
+        this.clampMax = Double.min(clampMax,Double.MAX_VALUE);
    }

    @Override
    public double applyAsDouble(long value) {
-        if (hash!=null) {
+        if (hash != null) {
            value = hash.applyAsLong(value);
        }
        double unit = (double) value / (double) Long.MAX_VALUE;
-        double sample =clamp ? Double.min(clampMax,f.applyAsDouble(unit)) : f.applyAsDouble(unit);
+        double sample = clamp ? Double.max(clampMin, Double.min(clampMax, f.applyAsDouble(unit))) : f.applyAsDouble(unit);
        return sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/int_double/IntToDoubleContinuousCurve.java
@ -109,7 +109,6 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
            throw new RuntimeException("mods must not contain both "+ INFINITE +" and "+FINITE+".");
        }

-
        for (String s : modslist) {
            if (!validModifiers.contains(s)) {
                throw new RuntimeException("modifier '" + s + "' is not a valid modifier. Use one of " + validModifiers + " instead.");
@ -122,9 +121,9 @@ public class IntToDoubleContinuousCurve implements IntToDoubleFunction {
        boolean finite = ( mods.contains(FINITE) || !mods.contains(INFINITE));

        function = interpolate ?
-                new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Long.MAX_VALUE, finite)
+                new InterpolatingIntDoubleSampler(icdSource, 1000, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, finite)
                :
-                new RealIntDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE);
+                new RealIntDoubleSampler(icdSource, hash, clamp, Integer.MIN_VALUE, Integer.MAX_VALUE, true);

    }

--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/continuous/long_double/LongToDoubleContinuousCurve.java
@ -124,7 +124,7 @@ public class LongToDoubleContinuousCurve implements LongToDoubleFunction {
        function = interpolate ?
                new InterpolatingLongDoubleSampler(icdSource, 1000, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, finite)
                :
-                new RealLongDoubleSampler(icdSource, hash, clamp, (double) Long.MAX_VALUE);
+                new RealLongDoubleSampler(icdSource, hash, clamp, Long.MIN_VALUE, Long.MAX_VALUE, true);

    }

--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntIntSampler.java
@ -1,5 +1,7 @@
 package io.nosqlbench.virtdata.library.curves4.discrete.common;

+import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
+
 import java.util.function.DoubleToIntFunction;
 import java.util.function.IntUnaryOperator;

@ -7,46 +9,42 @@ public class InterpolatingIntIntSampler implements IntUnaryOperator {

    private final double[] lut;
    private final DoubleToIntFunction f;
-    private int resolution;
-    private ThreadSafeHash hash;
+    private Hash hash;
+    private final double scaleToIntRanged;

    public InterpolatingIntIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
-        this.resolution = resolution;
        if (hash) {
-            this.hash = new ThreadSafeHash();
+            this.hash = new Hash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
+        double[] padded = new double[computed.length+1];
+        System.arraycopy(computed,0,padded,0,computed.length);
+        padded[padded.length-1] = padded[padded.length-2];
+        scaleToIntRanged = (1.0d/Integer.MAX_VALUE)*(padded.length-2);
+        this.lut=padded;
+
    }

-    private double[] precompute() {
-        double[] precomputed = new double[resolution+2];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+    private double[] precompute(int resolution) {
+        double[] precomputed = new double[resolution];
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
-        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }

    @Override
    public int applyAsInt(int input) {
-        int value = input;
-
        if (hash!=null) {
-            value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE);
+            input = hash.applyAsInt(input);
        }
-
-        double unit = (double) value / (double) Integer.MAX_VALUE;
-        double samplePoint = unit * resolution;
-        int leftidx = (int) samplePoint;
-        double leftPartial = samplePoint - leftidx;
-
-        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
-        double rightComponent = (lut[leftidx+1] * leftPartial);
-
-        double sample = leftComponent + rightComponent;
+        double samplePoint = scaleToIntRanged * input;
+        int leftidx = (int)samplePoint;
+        double fractional = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        return (int) sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingIntLongSampler.java
@ -1,5 +1,7 @@
 package io.nosqlbench.virtdata.library.curves4.discrete.common;

+import io.nosqlbench.virtdata.library.basics.shared.unary_int.Hash;
+
 import java.util.function.DoubleToIntFunction;
 import java.util.function.IntToLongFunction;

@ -7,46 +9,40 @@ public class InterpolatingIntLongSampler implements IntToLongFunction {

    private final double[] lut;
    private final DoubleToIntFunction f;
-    private int resolution;
-    private ThreadSafeHash hash;
+    private Hash hash;
+    private final double scaleToIntRanged;

    public InterpolatingIntLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
-        this.resolution = resolution;
        if (hash) {
-            this.hash = new ThreadSafeHash();
+            this.hash = new Hash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
+        double[] padded = new double[computed.length+1];
+        System.arraycopy(computed,0,padded,0,computed.length);
+        this.scaleToIntRanged = (1.0d / Integer.MAX_VALUE) * (padded.length-2);
+        this.lut=padded;
    }

-    private double[] precompute() {
-        double[] precomputed = new double[resolution+2];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+    private double[] precompute(int resolution) {
+        double[] precomputed = new double[resolution];
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
-        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }

    @Override
    public long applyAsLong(int input) {
-        int value = input;
-
        if (hash!=null) {
-            value = (int) (hash.applyAsLong(input) % Integer.MAX_VALUE);
+            input = hash.applyAsInt(input);
        }
-
-        double unit = (double) value / (double) Integer.MAX_VALUE;
-        double samplePoint = unit * resolution;
-        int leftidx = (int) samplePoint;
-        double leftPartial = samplePoint - leftidx;
-
-        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
-        double rightComponent = (lut[leftidx+1] * leftPartial);
-
-        double sample = leftComponent + rightComponent;
+        double samplePoint = scaleToIntRanged * input;
+        int leftidx = (int)samplePoint;
+        double fractional = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
        return (long) sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongIntSampler.java
@ -7,43 +7,41 @@ public class InterpolatingLongIntSampler implements LongToIntFunction {

    private final double[] lut;
    private final DoubleToIntFunction f;
-    private int resolution;
    private ThreadSafeHash hash;
+    private final double scaleToLong;

    public InterpolatingLongIntSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
-        this.resolution = resolution;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
+        double[] padded = new double[computed.length+1];
+        System.arraycopy(computed,0,padded,0,computed.length);
+        padded[padded.length-1] = padded[padded.length-2];
+        scaleToLong=(1.0d/Long.MAX_VALUE) * (padded.length-2);
+        this.lut=padded;
    }

-    private double[] precompute() {
-        double[] precomputed = new double[resolution+2];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+    private double[] precompute(int resolution) {
+        double[] precomputed = new double[resolution];
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
-        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }

    @Override
-    public int applyAsInt(long value) {
+    public int applyAsInt(long input) {
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsLong(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
-        double samplePoint = unit * resolution;
-        int leftidx = (int) samplePoint;
-        double leftPartial = samplePoint - leftidx;
-
-        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
-        double rightComponent = (lut[leftidx+1] * leftPartial);
-
-        double sample = leftComponent + rightComponent;
-        return (int) sample;
+        double samplePoint = scaleToLong * input;
+        int leftidx = (int)samplePoint;
+        double fractional = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
+        return (int)sample;
    }
 }
--- a/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java
+++ b/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/common/InterpolatingLongLongSampler.java
@ -7,43 +7,41 @@ public class InterpolatingLongLongSampler implements LongUnaryOperator {

    private final double[] lut;
    private final DoubleToIntFunction f;
-    private int resolution;
    private ThreadSafeHash hash;
+    private final double scaleToLong;

    public InterpolatingLongLongSampler(DoubleToIntFunction icdSource, int resolution, boolean hash) {
        this.f = icdSource;
-        this.resolution = resolution;
        if (hash) {
            this.hash = new ThreadSafeHash();
        }
-        this.lut = precompute();
+        double[] computed = precompute(resolution);
+        double[] padded = new double[computed.length+1];
+        System.arraycopy(computed,0,padded,0,computed.length);
+        padded[padded.length-1] = padded[padded.length-2];
+        scaleToLong = (1.0d/Long.MAX_VALUE) * ((double)(padded.length-2));
+        this.lut = padded;
    }

-    private double[] precompute() {
-        double[] precomputed = new double[resolution+2];
-        for (int s = 0; s <= resolution; s++) { // not a ranging error
+    private double[] precompute(int resolution) {
+        double[] precomputed = new double[resolution];
+        for (int s = 0; s < resolution; s++) { // not a ranging error
            double rangedToUnit = (double) s / (double) resolution;
            int sampleValue = f.applyAsInt(rangedToUnit);
            precomputed[s] =  sampleValue;
        }
-        precomputed[precomputed.length-1]=0.0D; // only for right of max, when S==Max in the rare case
        return precomputed;
    }

    @Override
-    public long applyAsLong(long value) {
+    public long applyAsLong(long input) {
        if (hash!=null) {
-            value = hash.applyAsLong(value);
+            input = hash.applyAsLong(input);
        }
-        double unit = (double) value / (double) Long.MAX_VALUE;
-        double samplePoint = unit * resolution;
-        int leftidx = (int) samplePoint;
-        double leftPartial = samplePoint - leftidx;
-
-        double leftComponent=(lut[leftidx] * (1.0-leftPartial));
-        double rightComponent = (lut[leftidx+1] * leftPartial);
-
-        double sample = leftComponent + rightComponent;
-        return (long) sample;
+        double samplePoint = scaleToLong * input;
+        int leftidx = (int)samplePoint;
+        double fractional = samplePoint - leftidx;
+        double sample = (lut[leftidx]* (1.0d-fractional)) + (lut[leftidx+1] * fractional);
+        return (long)sample;
    }
 }
--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/LevyTest.java
@ -11,7 +11,7 @@ public class LevyTest {
    @Test
    public void testLevy() {
        Levy levy = new Levy(2.3d, 1.0d);
-        assertThat(levy.applyAsDouble(10L)).isCloseTo(2.938521849905433, Offset.offset(0.000001d));
+        assertThat(levy.applyAsDouble(10L)).isCloseTo(2.9379325000660304, Offset.offset(0.000001d));
    }

 }
--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/continuous/RealDistributionsValuesTest.java
@ -17,7 +17,7 @@ public class RealDistributionsValuesTest {

    @Test
    public void testComputedNormal() {
-        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000);
+        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"compute"), 1000000,1);
        System.out.println(runData);
        assertThat(runData.getFractionalPercentile(0.5D))
                .isCloseTo(10.0D, Offset.offset(0.01D));
@ -29,7 +29,7 @@ public class RealDistributionsValuesTest {

    @Test
    public void testInterpolatedNormal() {
-        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000);
+        RunData runData = iterateMapperDouble(new Normal(10.0,2.0,"interpolate"), 1000000,1);
        System.out.println(runData);
        assertThat(runData.getFractionalPercentile(0.5D))
                .isCloseTo(10.0D, Offset.offset(0.01D));
@ -41,7 +41,7 @@ public class RealDistributionsValuesTest {

    @Test
    public void testComputedUniform() {
-        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000);
+        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"compute"), 1000000,1);
        assertThat(runData.getFractionalPercentile(0.33D))
                .isCloseTo(33.33D, Offset.offset(1.0D));
        assertThat(runData.getFractionalPercentile(0.5D))
@ -53,7 +53,7 @@ public class RealDistributionsValuesTest {

    @Test
    public void testInterpolatedUniform() {
-        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000);
+        RunData runData = iterateMapperDouble(new Uniform(0.0,100.0,"interpolate"), 1000000,1);
        assertThat(runData.getFractionalPercentile(0.33D))
                .isCloseTo(33.33D, Offset.offset(1.0D));
        assertThat(runData.getFractionalPercentile(0.5D))
@ -66,22 +66,27 @@ public class RealDistributionsValuesTest {
    @Test
    public void testInterpolatedMappedUniform() {
        Uniform mapper = new Uniform(0.0, 100.0, "map", "interpolate");
-        RunData runData = iterateMapperDouble(mapper,10000000);
+        RunData runData = iterateMapperDouble(mapper,10000000,Long.MAX_VALUE/10000000L);
+
+        assertThat(runData.getFractionalPercentile(0.001D))
+            .isCloseTo(0.0D, Offset.offset(1.0D));
+
        assertThat(runData.getFractionalPercentile(0.999D))
-                .isCloseTo(0.0D, Offset.offset(1.0D));
+                .isCloseTo(099.99D, Offset.offset(1.0D));

        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0D, Offset.offset(1.0D));

+        System.out.println(runData);
    }

-    private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations) {
+    private RunData iterateMapperDouble(LongToDoubleFunction mapper, int iterations, long funcstep) {
        assertThat(mapper).isNotNull();

        double[] samples = new double[iterations];

        long time_generating = System.nanoTime();
        for (int i = 0; i < iterations; i++) {
-            samples[i] = mapper.applyAsDouble(i);
+            samples[i] = mapper.applyAsDouble(i*funcstep);
        }
        long time_generated = System.nanoTime();

--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsConcurrencyTest.java
@ -46,7 +46,7 @@ public class IntegerDistributionsConcurrencyTest {
        // threshold test against CDF
        expected = mapper.get((long) (0.03515d * (double) Long.MAX_VALUE));
        assertThat(expected).isEqualTo(1);
-        expected = mapper.get((long) (0.03600d * (double) Long.MAX_VALUE));
+        expected = mapper.get((long) (0.03700d * (double) Long.MAX_VALUE));
        assertThat(expected).isEqualTo(2);
    }

--- a/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java
+++ b/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/IntegerDistributionsValuesTest.java
@ -16,6 +16,7 @@ import static org.assertj.core.api.Assertions.assertThat;

 public class IntegerDistributionsValuesTest {

+
    @Disabled
    @Test
    public void testComputedZipf() {
@ -70,13 +71,19 @@ public class IntegerDistributionsValuesTest {
        System.out.println(runData);
    }

+    @Test
+    public void testMaximumValue() {
+        Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
+        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));
+    }
+
    @Test
    public void testInterpolatedMappedUniform() {
        Uniform mapper = new Uniform(0.0d, 100.0d, "interpolate", "map");
        RunData runData = iterateMapperDouble(mapper,10000000);
        assertThat(runData.getFractionalPercentile(0.999D))
                .isCloseTo(0.0D, Offset.offset(1.0D));
-        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.0001D));
+        assertThat(mapper.applyAsDouble(Long.MAX_VALUE)).isCloseTo(100.0d,Offset.offset(0.1D));

    }

@ -101,7 +108,11 @@ public class IntegerDistributionsValuesTest {
        double[] samples = new double[iterations];

        long time_generating = System.nanoTime();
+        int readout = iterations/10;
        for (int i = 0; i < iterations; i++) {
+            if ((i%readout)==0) {
+                System.out.println("i="+i+"/"+iterations);
+            }
            samples[i] = mapper.applyAsDouble(i);
        }
        long time_generated = System.nanoTime();
--- a/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md
+++ b/virtdata-userlibs/src/main/resources/docs/category_blurbs/funcref_distributions.md
@ -69,23 +69,24 @@ little loss in accuracy, but the difference is generally negligible for nearly a

 #### Infinite or Finite

-For interpolated samples, you also have the option of including or excluding infinite values
-which may occur in some distributions. If you want to include them, use `infinite`, or `finite`
-to explicitly avoid them (the default). Specifying 'infinite' doesn't guarantee that you will
-see +Infinity or -Infinity, only that they are allowed. The Normal distribution often contains
-Infinity and +Infinity, for example, due to the function used to estimate its cumulative
-distribution. These values can often be valuable in finding corner cases which should be treated
-uniformly according to [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).
+For interpolated samples from continuous distributions, you also have the option of including or
+excluding infinite values which may occur in some distributions. If you want to include them,
+use `infinite`, or `finite` to explicitly avoid them (the default). Specifying 'infinite'
+doesn't guarantee that you will see +Infinity or -Infinity, only that they are allowed. The
+Normal distribution often contains -Infinity and +Infinity, for example, due to the function
+used to estimate its cumulative distribution. These values can often be valuable in finding
+corner cases which should be treated uniformly according to
+[IEEE 754](https://en.wikipedia.org/wiki/IEEE_754).

 #### Clamp or Noclamp

-For interpolated samples, you also have the option of clamping the allowed values to the valid
-range for the integral data type used as input. To clamp the output values to the range
-(Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.MIN_VALUE,Integer.
-MAX_VALUE) for int-double functions, specify `clamp`, which is also the default. To explicitly
-disable this, use `noclamp`. This is useful when you know the downstream functions will only
-work with a certain range of values without truncating conversions. When you are using double
-values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
+For interpolated samples from continuous distributions, you also have the option of clamping the
+allowed values to the valid range for the integral data type used as input. To clamp the output
+values to the range (Long.MIN_VALUE,Long.MAX_VALUE) for long->double functions, or to (Integer.
+MIN_VALUE,Integer.MAX_VALUE) for int-double functions, specify `clamp`, which is also the default.
+To explicitly disable this, use `noclamp`. This is useful when you know the downstream functions
+will only work with a certain range of values without truncating conversions. When you are using
+double values natively on the downstream functions, use `noclamp` to avoid limiting the domain of
 values in your test data. (In this case, you might also consider `infinite`).

 ### Computed Samples
--- a/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java
+++ b/virtdata-userlibs/src/test/java/io/virtdata/IntegratedCurvesTest.java
@ -25,8 +25,8 @@ public class IntegratedCurvesTest {
        assertThat(runData.getStats().getPercentile(1.0d)).isCloseTo(1.0, Offset.offset(0.01d));
        assertThat(runData.getStats().getPercentile(10.0d)).isCloseTo(1.0, Offset.offset(0.01d));
        assertThat(runData.getStats().getPercentile(90.0d)).isCloseTo(6.0, Offset.offset(0.01d));
-        assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(61.0, Offset.offset(0.01d));
-        assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(311.0, Offset.offset(0.01d));
+        assertThat(runData.getStats().getPercentile(99.0d)).isCloseTo(55.989, Offset.offset(0.01d));
+        assertThat(runData.getStats().getPercentile(99.9d)).isCloseTo(202.999, Offset.offset(0.01d));

    }