move stat bucket up to core api

This commit is contained in:
Jonathan Shook
2024-07-23 11:15:39 -05:00
parent 1468f21d14
commit 3990b42022
5 changed files with 118 additions and 57 deletions

View File

@@ -0,0 +1,67 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.nb.api.stats;
public class DoubleRing {
private final double[] dbuf;
private int count;
private int idx;
public DoubleRing(int size) {
this.dbuf = new double[size];
this.count = 0;
}
public DoubleRing(double[] samples) {
this.dbuf=samples;
this.count =samples.length;
}
public double push(double value) {
double ejected = (count == dbuf.length) ? dbuf[idx] : Double.NaN;
count += (count < dbuf.length) ? 1 : 0;
dbuf[idx] = value;
idx = (idx + 1) % dbuf.length;
return ejected;
}
public int size() {
return dbuf.length;
}
public int count() {
return count;
}
public double min() {
double min = Double.MAX_VALUE;
for (int i = 0; i < count; i++) {
min = Math.min(min,dbuf[i]);
}
return min;
}
public double max() {
double max = Double.MIN_VALUE;
for (int i = 0; i < count; i++) {
max = Math.max(max,dbuf[i]);
}
return max;
}
}

View File

@@ -0,0 +1,134 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.nb.api.stats;
import java.util.Objects;
/**
* This is a relatively efficient statistics bucket which can maintain moving
* aggregates over a window of samples for count, mean, variance, stddev, sum.
* This is particularly useful when you know that each update to the data
* will likely be used in a query.
*/
public final class StatBucket {
DoubleRing ringbuf;
private double mean;
private double dSquared = 0.0d;
public StatBucket() {
this(10);
}
public StatBucket(int sampleWindow) {
this.ringbuf = new DoubleRing(sampleWindow);
}
public StatBucket(double[] samples) {
this.ringbuf = new DoubleRing(samples);
}
public StatBucket apply(double value) {
// System.out.println("stat->" + value + " bucket:" + toString());
double popped = ringbuf.push(value);
if (ringbuf.count() == 1) {
mean = value;
dSquared = 0.0d;
} else if (Double.isNaN(popped)) {
var newMean = mean + ((value - mean) / ringbuf.count());
var dSquaredIncrement = ((value - newMean) * (value - mean));
// If this value is too small to be interpreted as a double it gets converted to
// zero, which is not what we want. So we use the smallest possible double value
if (dSquaredIncrement == 0) dSquaredIncrement = Double.MIN_VALUE;
dSquared += dSquaredIncrement;
mean = newMean;
} else {
var meanIncrement = (value - popped) / ringbuf.count();
var newMean = mean + meanIncrement;
var dSquaredIncrement = ((value - popped) * (value - newMean + popped - mean));
// If this value is too small to be interpreted as a double it gets converted to
// zero, which is not what we want. So we use the smallest possible double value
if (dSquaredIncrement == 0) dSquaredIncrement = Double.MIN_VALUE;
var newDSquared = this.dSquared + dSquaredIncrement;
mean = newMean;
dSquared = newDSquared;
}
return this;
}
public double variance() {
double variance = dSquared / ringbuf.count();
return (variance < 0) ? Math.abs(variance) : variance;
}
public double stddev() {
return Math.sqrt(variance());
}
public int count() {
return ringbuf.count();
}
public double mean() {
return mean;
}
@Override
public boolean equals(Object obj) {
if (obj == this) return true;
if (obj == null || obj.getClass() != this.getClass()) return false;
var that = (StatBucket) obj;
return this.ringbuf.count() == that.ringbuf.count() &&
Double.doubleToLongBits(this.mean) == Double.doubleToLongBits(that.mean);
}
@Override
public int hashCode() {
return Objects.hash(ringbuf.count(), mean);
}
@Override
public String toString() {
return "StatBucket[" +
"count=" + ringbuf.count() + ", " +
"mean=" + mean + ", " +
"stddev=" + stddev() + ", " +
"variance=" + variance() + ']';
}
public boolean primed() {
return this.count()== ringbuf.size();
}
public double getMin() {
return ringbuf.min();
}
public double getMax() {
return ringbuf.max();
}
public double getAverage() {
return this.mean();
}
public double getCount() {
return count();
}
public double getSum() {
return this.mean() * this.count();
}
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.nb.api.stats;
import org.assertj.core.data.Offset;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
class StatBucketTest {
@Test
public void testStreamingMean() {
var bucket = new StatBucket();
bucket.apply(5.0d);
assertThat(bucket.mean()).isCloseTo(5.0d, Offset.offset(0.001d));
bucket.apply(10.0d);
assertThat(bucket.mean()).isCloseTo(7.5d, Offset.offset(0.001d));
bucket.apply(15.0d);
assertThat(bucket.mean()).isCloseTo(10.0d, Offset.offset(0.001d));
bucket.apply(20.0d);
assertThat(bucket.mean()).isCloseTo(12.5d, Offset.offset(0.001d));
}
@Test
public void testStreamingComputations() {
double[] samples = new double[]{2, 4, 4, 4, 5, 5, 7, 9};
var bucket = new StatBucket(8);
for (int i = 0; i < samples.length * 10; i++) {
bucket.apply(samples[i % samples.length]);
if (i > 0 && (i % samples.length) == 0) {
assertThat(bucket.mean()).isCloseTo(5, Offset.offset(0.001d));
assertThat(bucket.stddev()).isCloseTo(2.0, Offset.offset(0.001d));
}
}
}
@Test
public void testErrorAccumulation1() {
var bucket = new StatBucket(11);
for (long base = 1; base <10000000000000000L ; base*=10) {
for (int i = 0; i< 10; i++) {
long value = base+i;
bucket.apply(value);
}
for (int i = 10; i < 20; i++) {
long value = base+i;
bucket.apply(value);
double streamingMean = bucket.mean();
assertThat(streamingMean).isCloseTo((double)(value-5), Offset.offset(0.03d));
}
}
}
}