mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
nosqlbench-824 Add function to provide clustered values with monotonic stepping
This commit is contained in:
parent
2ca4320c24
commit
dc74283952
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2022 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
|
||||
|
||||
import io.nosqlbench.virtdata.api.annotations.Example;
|
||||
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
|
||||
import java.util.function.LongUnaryOperator;
|
||||
|
||||
/**
|
||||
* <P>Compute a value which increases monotonically with respect to the cycle value.
|
||||
* All values for f(X+(m>=0)) will be equal or greater than f(X). In effect, this
|
||||
* means that with a sequence of monotonic inputs, the results will be monotonic as
|
||||
* well as clustered. The values will approximate input/average, but will vary in frequency
|
||||
* around a simple binomial distribution.</P>
|
||||
*
|
||||
* <p>The practical effect of this is to be able to compute a sequence of values
|
||||
* over inputs which can act as foreign keys, but which are effectively ordered.</p>
|
||||
*
|
||||
* <H3>Call for Ideas</H3>
|
||||
* <p>Due to the complexity of generalizing this as a pure function over other distributions,
|
||||
* this is the only function of this type for now. If you are interested in this problem
|
||||
* domain and have some suggestions for how to extend it to other distributions, please
|
||||
* join the project or let us know.</p>
|
||||
*/
|
||||
@ThreadSafeMapper
|
||||
public class TriangularStepFunction implements LongUnaryOperator {
|
||||
|
||||
private final Hash hasher = new Hash();
|
||||
private final long median;
|
||||
private final LongUnaryOperator sizer;
|
||||
|
||||
private final long variance;
|
||||
|
||||
|
||||
@Example({"TriangularStepFunction(100,20)","Create a sequence of values where the average and median is 100, but the range of values is between 90 and 120."})
|
||||
@Example({"TriangularStepFunction(80,10)","Create a sequence of values where the average and median is 80, but the range of values is between 70 and 90."})
|
||||
TriangularStepFunction(long average, long variance) {
|
||||
if (variance < 0 || variance > average) {
|
||||
throw new RuntimeException(
|
||||
"The median must be non-negative, and the variance must be less than the median. " +
|
||||
"You provided median=" + average + ", variance=" + variance + "."
|
||||
);
|
||||
}
|
||||
this.median = average;
|
||||
this.variance = variance;
|
||||
this.sizer = new HashRange(average-variance,average+variance);
|
||||
}
|
||||
|
||||
TriangularStepFunction(long average) {
|
||||
this(average, average/2);
|
||||
// if (maxOffset>=avgsize) {
|
||||
// throw new RuntimeException("max offset " + maxOffset + " has to be less than avg size " + avgsize);
|
||||
// }
|
||||
}
|
||||
|
||||
@Override
|
||||
public long applyAsLong(long operand) {
|
||||
// window number
|
||||
long count = operand / median;
|
||||
// offset within window
|
||||
long offset = operand % median;
|
||||
// base of window
|
||||
long base = operand - offset;
|
||||
// variate up to window size
|
||||
long variance = sizer.applyAsLong(base);
|
||||
// variate offset from start of window
|
||||
long slice = base + variance;
|
||||
// select current or next window
|
||||
long result = ((slice)>operand) ? count : count + 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
public long inlined(long operand) {
|
||||
return (operand < operand - operand % median + sizer.applyAsLong(operand - operand % median)) ? operand / median : operand / median + 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.getClass().getSimpleName()+"{median="+median+",variance="+variance+"}";
|
||||
}
|
||||
}
|
@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2022 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.security.InvalidParameterException;
|
||||
import java.util.Arrays;
|
||||
import java.util.LongSummaryStatistics;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
public class TriangularStepFunctionTest {
|
||||
|
||||
private static final int LABEL=0;
|
||||
private static final int FREQUENCY=1;
|
||||
|
||||
@Test
|
||||
public void testExample1() {
|
||||
TriangularStepFunction e1 = new TriangularStepFunction(100, 20);
|
||||
int[] runLengths = this.rleStatsFor(e1, 0, 10000);
|
||||
System.out.println(Arrays.toString(runLengths));
|
||||
assertThat(IntStream.of(runLengths).min().orElseThrow()).isEqualTo(80L);
|
||||
assertThat(IntStream.of(runLengths).max().orElseThrow()).isEqualTo(120L);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExample2() {
|
||||
TriangularStepFunction e1 = new TriangularStepFunction(80, 10);
|
||||
int[] runLengths = this.rleStatsFor(e1, 0, 10000);
|
||||
System.out.println(Arrays.toString(runLengths));
|
||||
assertThat(IntStream.of(runLengths).min().orElseThrow()).isEqualTo(70L);
|
||||
assertThat(IntStream.of(runLengths).max().orElseThrow()).isEqualTo(90L);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStepSlice() {
|
||||
int avgsize=10;
|
||||
TriangularStepFunction f = new TriangularStepFunction(avgsize);
|
||||
int[] ary = new int[avgsize*2];
|
||||
long current=0L;
|
||||
int count=0;
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
long result = f.applyAsLong(i);
|
||||
if (result==current) {
|
||||
count++;
|
||||
} else {
|
||||
ary[count]++;
|
||||
current=result;
|
||||
count=0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < ary.length; i++) {
|
||||
System.out.println("bucket " + i + ", count " + ary[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testIncrementalVariance() {
|
||||
TriangularStepFunction f = new TriangularStepFunction(100, 0);
|
||||
assertThat(f.applyAsLong(0L)).isEqualTo(0L);
|
||||
assertThat(f.applyAsLong(1L)).isEqualTo(0L);
|
||||
assertThat(f.applyAsLong(99L)).isEqualTo(0L);
|
||||
assertThat(f.applyAsLong(100L)).isEqualTo(1L);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVariance() {
|
||||
long first=0;
|
||||
TriangularStepFunction f = new TriangularStepFunction(100,1);
|
||||
var rlestats = rleStatsFor(f, 0, 100000);
|
||||
LongSummaryStatistics stats99to101 = statsForRle((int) f.applyAsLong(first),rlestats);
|
||||
assertThat(stats99to101.getMin()).isEqualTo(99L);
|
||||
assertThat(stats99to101.getMax()).isEqualTo(101L);
|
||||
|
||||
int[][] histo = histoFor(rlestats);
|
||||
LongSummaryStatistics histoStats = new LongSummaryStatistics();
|
||||
for (int[] ints : histo) {
|
||||
histoStats.accept(ints[LABEL]);
|
||||
}
|
||||
assertThat(histoStats.getAverage()).isEqualTo(100);
|
||||
}
|
||||
|
||||
private int[] rleStatsFor(TriangularStepFunction f, long firstTrialIncl, long lastTrialExcl) {
|
||||
long firstBucket = f.applyAsLong(firstTrialIncl);
|
||||
long lastBucket = f.applyAsLong(lastTrialExcl);
|
||||
if (firstBucket>Integer.MAX_VALUE||lastBucket>Integer.MAX_VALUE) {
|
||||
throw new InvalidParameterException("can't fit result data into range of ints from long [" + firstBucket + ","+lastBucket+"]");
|
||||
}
|
||||
int base = (int) firstBucket;
|
||||
int[] counts = new int[(((int) lastBucket-(int)firstBucket))+1];
|
||||
for (long trial=firstTrialIncl; trial < lastTrialExcl; trial++) {
|
||||
long result = f.applyAsLong(trial);
|
||||
counts[(int)(result-base)]++;
|
||||
}
|
||||
// remove last partial, as only the front initial partial is compensated
|
||||
counts= Arrays.copyOfRange(counts,0,counts.length-1);
|
||||
return counts;
|
||||
}
|
||||
|
||||
private int[][] histoFor(int[] counts) {
|
||||
var minval = IntStream.of(counts).min().orElseThrow();
|
||||
var maxval = IntStream.of(counts).max().orElseThrow();
|
||||
|
||||
int[][] histo = new int[(maxval-minval)+1][2];
|
||||
for (int i = 0; i <= histo[LABEL].length; i++) {
|
||||
histo[i][LABEL]=i+minval;
|
||||
}
|
||||
|
||||
for (int count : counts) {
|
||||
System.out.println(count);
|
||||
histo[count-minval][FREQUENCY]++;
|
||||
}
|
||||
return histo;
|
||||
}
|
||||
|
||||
private LongSummaryStatistics statsForRle(int base, int[] counts) {
|
||||
LongSummaryStatistics stats = new LongSummaryStatistics();
|
||||
for (int element = 0; element < counts.length; element++) {
|
||||
int count = counts[element];
|
||||
if (count==0) {
|
||||
continue;
|
||||
}
|
||||
stats.accept(count);
|
||||
}
|
||||
return stats;
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user