From 9afe376b4f797129013e6f411ad568e59781d568 Mon Sep 17 00:00:00 2001
From: Snyk bot
+ * Run a function on the current cached result in the current thread and replace it
+ * with the result of the function. ChainingOps are one way of invoking
* logic within a cycle. However, they are not intended to stand alone.
- * A CycleFunction must always have an input to work on. This input is
- * provided by a Supplier as optionally implemented by an Op
+ * A ChainingOp must always have an input to work on,
+ * provided by either a {@link CycleOp} OR another call to a {@link ChainingOp} A CycleOp of T is an operation which takes a long input value
+ * and produces a value of type T. It is implemented as
+ * {@link LongFunction} of T. This variant of {@link Op} has the ability to see the cycle
+ * which was previously used to select the op implementation. It also has the ability to emit an value which can be seen a subsequent operation, if
+ * and only if it is a {@link ChainingOp}s.
+ * If you are using the value in this call to select a specific type of behavior, it is very
+ * likely a candidate for factoring into separate op implementations.
+ * The {@link io.nosqlbench.engine.api.activityimpl.OpMapper}
+ * and {@link io.nosqlbench.engine.api.activityimpl.OpDispenser} abstractions are meant to move
+ * op type selection and scheduling to earlier in the activity.
+ * Run an action for the given cycle. The cycle is provided for anecdotal
-// * usage such as logging and debugging. It is valid to use the cycle value in these places,
-// * but you should not use it to determine the logic of what is run. The mechanism
-// * for doing this is provided in {@link io.nosqlbench.engine.api.activityimpl.OpMapper}
-// * and {@link io.nosqlbench.engine.api.activityimpl.OpDispenser} types. Run an action for the given cycle. The cycle
- * value is only to be used for anecdotal presentation. This form is called
- * when there is a chaining operation which will do something with this result. Run an action for the given cycle. This is the root type of any operation which is used in a NoSQLBench
* DriverAdapter. It is a tagging interface for incremental type validation
- * in the NB runtime. You probably don't want to use it directly.
+ * in the NB runtime. You probably don't want to use it directly. Instead, use one of these:
* ChainingOp: f(I) -> O
+ * CycleOp: f(cycle) -> T
+ * Designer Notes
+ *
*
- *
- * either {@link CycleOp} or {@link ChainingOp} (but not both!)
- *
- * In the standard flow of an activity, either of the above interfaces is called
- * so long as an Op implements one of them.
+ *
This is the simplest form of an executable operation in NoSQLBench. + * It is simply an operation is run for side-effect only.
+ */ public interface RunnableOp extends Op, Runnable { + + /** + * Invoke the operation. If you need to see the value of the current + * cycle, then you can use {@link CycleOp} instead. If you need to + * use a cached result of a previous operation, then you may need to + * use {@link ChainingOp}. + */ + @Override + void run(); } From 72602805939986783b4a0e117dc5beabdaa8179d Mon Sep 17 00:00:00 2001 From: Jonathan ShookCompute a value which increases monotonically with respect to the cycle value. + * All values for f(X+(m>=0)) will be equal or greater than f(X). In effect, this + * means that with a sequence of monotonic inputs, the results will be monotonic as + * well as clustered. The values will approximate input/average, but will vary in frequency + * around a simple binomial distribution.
+ * + *The practical effect of this is to be able to compute a sequence of values + * over inputs which can act as foreign keys, but which are effectively ordered.
+ * + *Due to the complexity of generalizing this as a pure function over other distributions, + * this is the only function of this type for now. If you are interested in this problem + * domain and have some suggestions for how to extend it to other distributions, please + * join the project or let us know.
+ */ +@ThreadSafeMapper +public class TriangularStepFunction implements LongUnaryOperator { + + private final Hash hasher = new Hash(); + private final long median; + private final LongUnaryOperator sizer; + + private final long variance; + + + @Example({"TriangularStepFunction(100,20)","Create a sequence of values where the average and median is 100, but the range of values is between 90 and 120."}) + @Example({"TriangularStepFunction(80,10)","Create a sequence of values where the average and median is 80, but the range of values is between 70 and 90."}) + TriangularStepFunction(long average, long variance) { + if (variance < 0 || variance > average) { + throw new RuntimeException( + "The median must be non-negative, and the variance must be less than the median. " + + "You provided median=" + average + ", variance=" + variance + "." + ); + } + this.median = average; + this.variance = variance; + this.sizer = new HashRange(average-variance,average+variance); + } + + TriangularStepFunction(long average) { + this(average, average/2); +// if (maxOffset>=avgsize) { +// throw new RuntimeException("max offset " + maxOffset + " has to be less than avg size " + avgsize); +// } + } + + @Override + public long applyAsLong(long operand) { + // window number + long count = operand / median; + // offset within window + long offset = operand % median; + // base of window + long base = operand - offset; + // variate up to window size + long variance = sizer.applyAsLong(base); + // variate offset from start of window + long slice = base + variance; + // select current or next window + long result = ((slice)>operand) ? count : count + 1; + return result; + } + + public long inlined(long operand) { + return (operand < operand - operand % median + sizer.applyAsLong(operand - operand % median)) ? operand / median : operand / median + 1; + } + + @Override + public String toString() { + return this.getClass().getSimpleName()+"{median="+median+",variance="+variance+"}"; + } +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/TriangularStepFunctionTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/TriangularStepFunctionTest.java new file mode 100644 index 000000000..4df0c7b90 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/TriangularStepFunctionTest.java @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; + +import org.junit.jupiter.api.Test; + +import java.security.InvalidParameterException; +import java.util.Arrays; +import java.util.LongSummaryStatistics; +import java.util.stream.IntStream; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TriangularStepFunctionTest { + + private static final int LABEL=0; + private static final int FREQUENCY=1; + + @Test + public void testExample1() { + TriangularStepFunction e1 = new TriangularStepFunction(100, 20); + int[] runLengths = this.rleStatsFor(e1, 0, 10000); + System.out.println(Arrays.toString(runLengths)); + assertThat(IntStream.of(runLengths).min().orElseThrow()).isEqualTo(80L); + assertThat(IntStream.of(runLengths).max().orElseThrow()).isEqualTo(120L); + } + + @Test + public void testExample2() { + TriangularStepFunction e1 = new TriangularStepFunction(80, 10); + int[] runLengths = this.rleStatsFor(e1, 0, 10000); + System.out.println(Arrays.toString(runLengths)); + assertThat(IntStream.of(runLengths).min().orElseThrow()).isEqualTo(70L); + assertThat(IntStream.of(runLengths).max().orElseThrow()).isEqualTo(90L); + } + + @Test + public void testStepSlice() { + int avgsize=10; + TriangularStepFunction f = new TriangularStepFunction(avgsize); + int[] ary = new int[avgsize*2]; + long current=0L; + int count=0; + for (int i = 0; i < 10000; i++) { + long result = f.applyAsLong(i); + if (result==current) { + count++; + } else { + ary[count]++; + current=result; + count=0; + } + } + + for (int i = 0; i < ary.length; i++) { + System.out.println("bucket " + i + ", count " + ary[i]); + } + } + + + @Test + public void testIncrementalVariance() { + TriangularStepFunction f = new TriangularStepFunction(100, 0); + assertThat(f.applyAsLong(0L)).isEqualTo(0L); + assertThat(f.applyAsLong(1L)).isEqualTo(0L); + assertThat(f.applyAsLong(99L)).isEqualTo(0L); + assertThat(f.applyAsLong(100L)).isEqualTo(1L); + } + + @Test + public void testVariance() { + long first=0; + TriangularStepFunction f = new TriangularStepFunction(100,1); + var rlestats = rleStatsFor(f, 0, 100000); + LongSummaryStatistics stats99to101 = statsForRle((int) f.applyAsLong(first),rlestats); + assertThat(stats99to101.getMin()).isEqualTo(99L); + assertThat(stats99to101.getMax()).isEqualTo(101L); + + int[][] histo = histoFor(rlestats); + LongSummaryStatistics histoStats = new LongSummaryStatistics(); + for (int[] ints : histo) { + histoStats.accept(ints[LABEL]); + } + assertThat(histoStats.getAverage()).isEqualTo(100); + } + + private int[] rleStatsFor(TriangularStepFunction f, long firstTrialIncl, long lastTrialExcl) { + long firstBucket = f.applyAsLong(firstTrialIncl); + long lastBucket = f.applyAsLong(lastTrialExcl); + if (firstBucket>Integer.MAX_VALUE||lastBucket>Integer.MAX_VALUE) { + throw new InvalidParameterException("can't fit result data into range of ints from long [" + firstBucket + ","+lastBucket+"]"); + } + int base = (int) firstBucket; + int[] counts = new int[(((int) lastBucket-(int)firstBucket))+1]; + for (long trial=firstTrialIncl; trial < lastTrialExcl; trial++) { + long result = f.applyAsLong(trial); + counts[(int)(result-base)]++; + } + // remove last partial, as only the front initial partial is compensated + counts= Arrays.copyOfRange(counts,0,counts.length-1); + return counts; + } + + private int[][] histoFor(int[] counts) { + var minval = IntStream.of(counts).min().orElseThrow(); + var maxval = IntStream.of(counts).max().orElseThrow(); + + int[][] histo = new int[(maxval-minval)+1][2]; + for (int i = 0; i <= histo[LABEL].length; i++) { + histo[i][LABEL]=i+minval; + } + + for (int count : counts) { + System.out.println(count); + histo[count-minval][FREQUENCY]++; + } + return histo; + } + + private LongSummaryStatistics statsForRle(int base, int[] counts) { + LongSummaryStatistics stats = new LongSummaryStatistics(); + for (int element = 0; element < counts.length; element++) { + int count = counts[element]; + if (count==0) { + continue; + } + stats.accept(count); + } + return stats; + + } +} From 97cf9a32cb9429909c9015a61a449679693a284e Mon Sep 17 00:00:00 2001 From: Jonathan Shook