diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/EmpiricalDistribution.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/EmpiricalDistribution.java new file mode 100644 index 000000000..6c0dd88ff --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/EmpiricalDistribution.java @@ -0,0 +1,75 @@ +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_double; + +import io.nosqlbench.nb.api.errors.BasicError; +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.Example; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +/// This distribution is an easy-to use and modify distribution which +/// is simply based on observed or expected frequencies. If you imagine +/// drawing a line across a chart and then being able to use that to +/// model frequencies, that is what this function does. +/// +/// Values must be specified as x,y points, alternating. The x points draw a line segment +/// from left 0.0 to right 1.0 on the unit interval, and the y points +/// plot the magnitude. A LERP table with 1000 fixed points, which provides +/// substantial precision for most systems testing purposes. +/// +/// It is valid to have y values repeated, which is another way of saying that part +/// of the sampled population will have identical values. x coordinates must be monotonically +/// increasing, while y values may be any valid value, even out of order +@ThreadSafeMapper +@Categories(Category.distributions) +public class EmpiricalDistribution extends Interpolate { + + private static int lutSize = 1000; + + @Example({ + "EmpiricalDistribution(0.0d, 0.0d, 1.0d, 1.0d)", + "Create a uniform distribution, " + "from (x,y)=0,0 to (x,y) = 1,1" + }) + @Example({ + "EmpiricalDistribution(0.0d, 0.0d, 0.333d, 0.1d, 1.0d, 1.0d)", + "Create a distribution where 1/3 of values range from 0.0 to 0" + + ".1 and 2/3 range from 0.1 to 1.0" + }) + public EmpiricalDistribution(double... values) { + super(genTable(values)); + } + + private static double[] genTable(double[] values) { + if (values.length < 4) { + throw new BasicError("You must specify at least 2 x,y points, as in 0.0, 0.0, 1.0, 1" + + ".0, which describes a uniform distribution"); + } + double[] lut = new double[lutSize + 1]; + double[] offsets = new double[values.length >> 1]; + double[] magnitudes = new double[values.length >> 1]; + for (int idx = 0; idx < offsets.length; idx++) { + offsets[idx] = values[idx << 1]; + magnitudes[idx] = values[(idx << 1) + 1]; + } + for (int idx = 0; idx < offsets.length - 1; idx++) { + double offsetBase = offsets[idx]; + int startIdx = (int) (offsetBase * lutSize); + double unitFraction = (offsets[idx + 1] - offsetBase); + if (unitFraction < 0.0) { + throw new BasicError("offsets must be increasing"); + } + int segmentSize = (int) (unitFraction * lutSize); + double[] segment = new double[segmentSize + 1]; + double startMagnitude = magnitudes[idx]; + double endMagnitude = magnitudes[idx + 1]; + Interpolate segmentLine = new Interpolate(startMagnitude, endMagnitude); + for (int ins = 0; ins < segmentSize; ins++) { + double frac = (double) ins / (double) segment.length; + frac = frac * (double) Long.MAX_VALUE; + segment[ins] = segmentLine.applyAsDouble((long) frac); + } + segment[segment.length - 1] = endMagnitude; + System.arraycopy(segment, 0, lut, startIdx, segment.length); + } + return lut; + } +} diff --git a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/EmpiricalDistributionTest.java b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/EmpiricalDistributionTest.java new file mode 100644 index 000000000..145c95ba7 --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_double/EmpiricalDistributionTest.java @@ -0,0 +1,98 @@ +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_double; + +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.InterpolateTest; +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.assertj.core.data.Offset; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class EmpiricalDistributionTest { + + @Test + @Disabled("performance intensive") + public void testUniform() { + EmpiricalDistribution d = + new EmpiricalDistribution(0.0d, 0.0d, 1.0d, 1.0d); + DescriptiveStatistics data = InterpolateTest.tabulate(new Hash(), d, 1000000000); + assertThat(data.getPercentile(0.0001d)).isCloseTo(0.0d, Offset.offset(0.0001)); + assertThat(data.getPercentile(50.0d)).isCloseTo(0.5d, Offset.offset(0.005)); + assertThat(data.getPercentile(100.0d)).isCloseTo(1.0d, Offset.offset(0.001)); + } + + /// convergence to expected value at different number of samples and LERP resolution + /// + /// @100000 / 100 + /// p50 = 0.09961336762080965 + /// p55 = 0.4887600943079539 + /// p80 = 0.9486573852803234 + /// @100000 / 1000 + /// p50 = 0.0996064221289679 + /// p55 = 0.4887600943079539 + /// p80 = 0.9497494462965901 + /// + /// @1000000 / 100 + /// p50 = 0.10105949687725542 + /// p55 = 0.49758658404616063 + /// p80 = 0.9486389093179619 + /// @1000000 / 1000 + /// p50 = 0.10105949687725548 + /// p55 = 0.49758658404616074 + /// p80 = 0.9497305556617565 + /// + /// @10000000 / 100 + /// p50 = 0.1000117051372746 + /// p55 = 0.4997387848207568 + /// p80 = 0.9487722639153554 + /// @10000000 / 1000 + /// p50 = 0.10001170513727448 + /// p55 = 0.4997387848207569 + /// p80 = 0.9498669032551016 + /// + /// @100000000 / 100 + /// p50 = 0.0999966957844636 + /// p55 = 0.5001328046490157 + /// p80 = 0.9487758571324978 + /// @100000000 / 1000 + /// p50 = 0.09999663642729828 + /// p55 = 0.5001328046490157 + /// p80 = 0.9498705771180153 + /// + /// @1000000000 / 100 + /// p50 = 0.09999563860575955 + /// p55 = 0.5000398035892097 + /// p80 = 0.9487774978532897 + /// @1000000000 / 1000 + /// + /// + @Test + @Disabled("performance intensive") + public void testPieceWise() { + EmpiricalDistribution d = + new EmpiricalDistribution(0.0d, 0.0d, 0.5d, 0.1d, 0.6d, 0.9d, 1.0d, 1.0d); + DescriptiveStatistics data = InterpolateTest.tabulate(new Hash(), d, 1000000000); + assertThat(data.getPercentile(0.0001d)).isCloseTo(0.0d, Offset.offset(0.01)); + assertThat(data.getPercentile(25.0d)).isCloseTo(0.05d, Offset.offset(0.01)); + + // was 0.101059 + double p50 = data.getPercentile(50.0d); + System.out.println("p50 = " + p50); + assertThat(p50).isCloseTo(0.1d, Offset.offset(0.005)); + + // was 0.4975865 + double p55 = data.getPercentile(55.0d); + System.out.println("p55 = " + p55); + assertThat(p55).isCloseTo(0.5d, Offset.offset(0.1)); + + // was 0.948638 + double p80 = data.getPercentile(80.0d); + System.out.println("p80 = " + p80); + assertThat(p80).isCloseTo(0.95d, Offset.offset(0.005)); + + assertThat(data.getPercentile(100.0d)).isCloseTo(1.0d, Offset.offset(0.001)); + + } + +}