mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
implement emperical distribution
This commit is contained in:
parent
a1fd8c07db
commit
185d427fcd
@ -0,0 +1,75 @@
|
|||||||
|
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_double;
|
||||||
|
|
||||||
|
import io.nosqlbench.nb.api.errors.BasicError;
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.Categories;
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.Category;
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.Example;
|
||||||
|
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||||
|
|
||||||
|
/// This distribution is an easy-to use and modify distribution which
|
||||||
|
/// is simply based on observed or expected frequencies. If you imagine
|
||||||
|
/// drawing a line across a chart and then being able to use that to
|
||||||
|
/// model frequencies, that is what this function does.
|
||||||
|
///
|
||||||
|
/// Values must be specified as x,y points, alternating. The x points draw a line segment
|
||||||
|
/// from left 0.0 to right 1.0 on the unit interval, and the y points
|
||||||
|
/// plot the magnitude. A LERP table with 1000 fixed points, which provides
|
||||||
|
/// substantial precision for most systems testing purposes.
|
||||||
|
///
|
||||||
|
/// It is valid to have y values repeated, which is another way of saying that part
|
||||||
|
/// of the sampled population will have identical values. x coordinates must be monotonically
|
||||||
|
/// increasing, while y values may be any valid value, even out of order
|
||||||
|
@ThreadSafeMapper
|
||||||
|
@Categories(Category.distributions)
|
||||||
|
public class EmpiricalDistribution extends Interpolate {
|
||||||
|
|
||||||
|
private static int lutSize = 1000;
|
||||||
|
|
||||||
|
@Example({
|
||||||
|
"EmpiricalDistribution(0.0d, 0.0d, 1.0d, 1.0d)",
|
||||||
|
"Create a uniform distribution, " + "from (x,y)=0,0 to (x,y) = 1,1"
|
||||||
|
})
|
||||||
|
@Example({
|
||||||
|
"EmpiricalDistribution(0.0d, 0.0d, 0.333d, 0.1d, 1.0d, 1.0d)",
|
||||||
|
"Create a distribution where 1/3 of values range from 0.0 to 0"
|
||||||
|
+ ".1 and 2/3 range from 0.1 to 1.0"
|
||||||
|
})
|
||||||
|
public EmpiricalDistribution(double... values) {
|
||||||
|
super(genTable(values));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double[] genTable(double[] values) {
|
||||||
|
if (values.length < 4) {
|
||||||
|
throw new BasicError("You must specify at least 2 x,y points, as in 0.0, 0.0, 1.0, 1"
|
||||||
|
+ ".0, which describes a uniform distribution");
|
||||||
|
}
|
||||||
|
double[] lut = new double[lutSize + 1];
|
||||||
|
double[] offsets = new double[values.length >> 1];
|
||||||
|
double[] magnitudes = new double[values.length >> 1];
|
||||||
|
for (int idx = 0; idx < offsets.length; idx++) {
|
||||||
|
offsets[idx] = values[idx << 1];
|
||||||
|
magnitudes[idx] = values[(idx << 1) + 1];
|
||||||
|
}
|
||||||
|
for (int idx = 0; idx < offsets.length - 1; idx++) {
|
||||||
|
double offsetBase = offsets[idx];
|
||||||
|
int startIdx = (int) (offsetBase * lutSize);
|
||||||
|
double unitFraction = (offsets[idx + 1] - offsetBase);
|
||||||
|
if (unitFraction < 0.0) {
|
||||||
|
throw new BasicError("offsets must be increasing");
|
||||||
|
}
|
||||||
|
int segmentSize = (int) (unitFraction * lutSize);
|
||||||
|
double[] segment = new double[segmentSize + 1];
|
||||||
|
double startMagnitude = magnitudes[idx];
|
||||||
|
double endMagnitude = magnitudes[idx + 1];
|
||||||
|
Interpolate segmentLine = new Interpolate(startMagnitude, endMagnitude);
|
||||||
|
for (int ins = 0; ins < segmentSize; ins++) {
|
||||||
|
double frac = (double) ins / (double) segment.length;
|
||||||
|
frac = frac * (double) Long.MAX_VALUE;
|
||||||
|
segment[ins] = segmentLine.applyAsDouble((long) frac);
|
||||||
|
}
|
||||||
|
segment[segment.length - 1] = endMagnitude;
|
||||||
|
System.arraycopy(segment, 0, lut, startIdx, segment.length);
|
||||||
|
}
|
||||||
|
return lut;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,98 @@
|
|||||||
|
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_double;
|
||||||
|
|
||||||
|
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash;
|
||||||
|
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.InterpolateTest;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
|
||||||
|
import org.assertj.core.data.Offset;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class EmpiricalDistributionTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Disabled("performance intensive")
|
||||||
|
public void testUniform() {
|
||||||
|
EmpiricalDistribution d =
|
||||||
|
new EmpiricalDistribution(0.0d, 0.0d, 1.0d, 1.0d);
|
||||||
|
DescriptiveStatistics data = InterpolateTest.tabulate(new Hash(), d, 1000000000);
|
||||||
|
assertThat(data.getPercentile(0.0001d)).isCloseTo(0.0d, Offset.offset(0.0001));
|
||||||
|
assertThat(data.getPercentile(50.0d)).isCloseTo(0.5d, Offset.offset(0.005));
|
||||||
|
assertThat(data.getPercentile(100.0d)).isCloseTo(1.0d, Offset.offset(0.001));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// convergence to expected value at different number of samples and LERP resolution
|
||||||
|
///
|
||||||
|
/// @100000 / 100
|
||||||
|
/// p50 = 0.09961336762080965
|
||||||
|
/// p55 = 0.4887600943079539
|
||||||
|
/// p80 = 0.9486573852803234
|
||||||
|
/// @100000 / 1000
|
||||||
|
/// p50 = 0.0996064221289679
|
||||||
|
/// p55 = 0.4887600943079539
|
||||||
|
/// p80 = 0.9497494462965901
|
||||||
|
///
|
||||||
|
/// @1000000 / 100
|
||||||
|
/// p50 = 0.10105949687725542
|
||||||
|
/// p55 = 0.49758658404616063
|
||||||
|
/// p80 = 0.9486389093179619
|
||||||
|
/// @1000000 / 1000
|
||||||
|
/// p50 = 0.10105949687725548
|
||||||
|
/// p55 = 0.49758658404616074
|
||||||
|
/// p80 = 0.9497305556617565
|
||||||
|
///
|
||||||
|
/// @10000000 / 100
|
||||||
|
/// p50 = 0.1000117051372746
|
||||||
|
/// p55 = 0.4997387848207568
|
||||||
|
/// p80 = 0.9487722639153554
|
||||||
|
/// @10000000 / 1000
|
||||||
|
/// p50 = 0.10001170513727448
|
||||||
|
/// p55 = 0.4997387848207569
|
||||||
|
/// p80 = 0.9498669032551016
|
||||||
|
///
|
||||||
|
/// @100000000 / 100
|
||||||
|
/// p50 = 0.0999966957844636
|
||||||
|
/// p55 = 0.5001328046490157
|
||||||
|
/// p80 = 0.9487758571324978
|
||||||
|
/// @100000000 / 1000
|
||||||
|
/// p50 = 0.09999663642729828
|
||||||
|
/// p55 = 0.5001328046490157
|
||||||
|
/// p80 = 0.9498705771180153
|
||||||
|
///
|
||||||
|
/// @1000000000 / 100
|
||||||
|
/// p50 = 0.09999563860575955
|
||||||
|
/// p55 = 0.5000398035892097
|
||||||
|
/// p80 = 0.9487774978532897
|
||||||
|
/// @1000000000 / 1000
|
||||||
|
///
|
||||||
|
///
|
||||||
|
@Test
|
||||||
|
@Disabled("performance intensive")
|
||||||
|
public void testPieceWise() {
|
||||||
|
EmpiricalDistribution d =
|
||||||
|
new EmpiricalDistribution(0.0d, 0.0d, 0.5d, 0.1d, 0.6d, 0.9d, 1.0d, 1.0d);
|
||||||
|
DescriptiveStatistics data = InterpolateTest.tabulate(new Hash(), d, 1000000000);
|
||||||
|
assertThat(data.getPercentile(0.0001d)).isCloseTo(0.0d, Offset.offset(0.01));
|
||||||
|
assertThat(data.getPercentile(25.0d)).isCloseTo(0.05d, Offset.offset(0.01));
|
||||||
|
|
||||||
|
// was 0.101059
|
||||||
|
double p50 = data.getPercentile(50.0d);
|
||||||
|
System.out.println("p50 = " + p50);
|
||||||
|
assertThat(p50).isCloseTo(0.1d, Offset.offset(0.005));
|
||||||
|
|
||||||
|
// was 0.4975865
|
||||||
|
double p55 = data.getPercentile(55.0d);
|
||||||
|
System.out.println("p55 = " + p55);
|
||||||
|
assertThat(p55).isCloseTo(0.5d, Offset.offset(0.1));
|
||||||
|
|
||||||
|
// was 0.948638
|
||||||
|
double p80 = data.getPercentile(80.0d);
|
||||||
|
System.out.println("p80 = " + p80);
|
||||||
|
assertThat(p80).isCloseTo(0.95d, Offset.offset(0.005));
|
||||||
|
|
||||||
|
assertThat(data.getPercentile(100.0d)).isCloseTo(1.0d, Offset.offset(0.001));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user