From 1e8d675cc0475334c55d000a3f478896146edb8d Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 4 Oct 2024 15:19:11 -0500 Subject: [PATCH] nosqlbench-2031 Make modeling occasional 'blip' payloads easy --- .../shared/from_long/to_int/SequenceOf.java | 101 ++++++++++++++++++ .../shared/from_long/to_long/SequenceOf.java | 100 +++++++++++++++++ .../from_long/to_long/SequenceOfTest.java | 62 +++++++++++ .../discrete/int_int/EnumeratedInts.java | 54 ++++++++++ .../discrete/long_int/EnumeratedInts.java | 35 ++++++ .../discrete/int_int/EnumeratedIntsTest.java | 70 ++++++++++++ 6 files changed, 422 insertions(+) create mode 100644 nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_int/SequenceOf.java create mode 100644 nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOf.java create mode 100644 nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java create mode 100644 nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java create mode 100644 nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/long_int/EnumeratedInts.java create mode 100644 nb-virtdata/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedIntsTest.java diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_int/SequenceOf.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_int/SequenceOf.java new file mode 100644 index 000000000..6fed42f82 --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_int/SequenceOf.java @@ -0,0 +1,101 @@ +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_int; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.Example; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Arrays; +import java.util.function.LongToIntFunction; +import java.util.function.LongUnaryOperator; + +/** + * SequenceOf bindings allow you to specify an order and count of a set of values which will then + * be repeated in that order. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class SequenceOf implements LongToIntFunction { + private final static Logger logger = LogManager.getLogger(SequenceOf.class); + + final int[] sequence; + /** + *

+ * This function produces values from a lookup table for direct control of numerical sequences. + * The sequence spec is a string containing the sequence values and their occurences, defaulting to 1 each. + * Example: "1:6 2 3 4 5", which means "1 at a relative frequency of 6 and 2, 3, 4, and 5 at a relative frequency + * of 1 each. This will yield pattern "1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, ..." + *

+ * + *

+ * Each implementation of {@link SequenceOf} must include a type sigil as the first parameter to disambiguate + * it from the others. + *

+ * + * @param ignored + * any long value, discarded after signature matching. The exampleValue is thrown away, but is necessary for + * matching the right version of SequenceOf. + * @param spec + * A string of numbers separated by spaces, semicolons, or commas. This is the sequence spec.. + */ + @Example({"SequenceOf(1L,'3:3 2:2 1:1')","Generate sequence 3,3,3,2,2,1"}) + @Example({"SequenceOf(1L,'1000:99 1000000:1')","Generate sequence 1000 (99 times) and then 1000000 (1 time)"}) + public SequenceOf(int ignored, String spec) { + this.sequence = parseSequence(spec); + + } + + public static int[] parseSequence(String input) { + String[] entries = input.split("[;, ]"); + int[][] subarys = new int[entries.length][]; + int entry=0; + int size=0; + + String[] parts; + for (int i = 0; i < entries.length; i++) { + parts = entries[i].split(":"); + int value = Integer.parseInt(parts[0]); + int count = (parts.length==1) ? 1 : Integer.parseInt(parts[1]); + int[] segment = new int[count]; + Arrays.fill(segment,value); + subarys[entry++]=segment; + size+=segment.length; + } + if (size>1E6) { + logger.warn("The sequence you have specified is very large, which may cause problems. You should consider" + + " a different approach for this type of function."); + } + int[] sequence = new int[size]; + int offset=0; + for (int[] subary : subarys) { + System.arraycopy(subary,0,sequence,offset,subary.length); + offset+=subary.length; + } + return sequence; + } + + @Override + public int applyAsInt(long value) { + return sequence[(int) value % sequence.length]; + } +} diff --git a/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOf.java b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOf.java new file mode 100644 index 000000000..617b1c50c --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOf.java @@ -0,0 +1,100 @@ +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.Example; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Arrays; +import java.util.function.LongUnaryOperator; + +/** + * SequenceOf bindings allow you to specify an order and count of a set of values which will then + * be repeated in that order. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class SequenceOf implements LongUnaryOperator { + private final static Logger logger = LogManager.getLogger(SequenceOf.class); + + final long[] sequence; + /** + *

+ * This function produces values from a lookup table for direct control of numerical sequences. + * The sequence spec is a string containing the sequence values and their occurences, defaulting to 1 each. + * Example: "1:6 2 3 4 5", which means "1 at a relative frequency of 6 and 2, 3, 4, and 5 at a relative frequency + * of 1 each. This will yield pattern "1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, ..." + *

+ * + *

+ * Each implementation of {@link SequenceOf} must include a type sigil as the first parameter to disambiguate + * it from the others. + *

+ * + * @param ignored + * any long value, discarded after signature matching. The exampleValue is thrown away, but is necessary for + * matching the right version of SequenceOf. + * @param spec + * A string of numbers separated by spaces, semicolons, or commas. This is the sequence spec.. + */ + @Example({"SequenceOf(1L,'3:3 2:2 1:1')","Generate sequence 3,3,3,2,2,1"}) + @Example({"SequenceOf(1L,'1000:99 1000000:1')","Generate sequence 1000 (99 times) and then 1000000 (1 time)"}) + public SequenceOf(long ignored, String spec) { + this.sequence = parseSequence(spec); + + } + + public static long[] parseSequence(String input) { + String[] entries = input.split("[;, ]"); + long[][] subarys = new long[entries.length][]; + int entry=0; + int size=0; + + String[] parts; + for (int i = 0; i < entries.length; i++) { + parts = entries[i].split(":"); + long value = Long.parseLong(parts[0]); + int count = (parts.length==1) ? 1 : Integer.parseInt(parts[1]); + long[] segment = new long[count]; + Arrays.fill(segment,value); + subarys[entry++]=segment; + size+=segment.length; + } + if (size>1E6) { + logger.warn("The sequence you have specified is very large, which may cause problems. You should consider" + + " a different approach for this type of function."); + } + long[] sequence = new long[size]; + int offset=0; + for (long[] subary : subarys) { + System.arraycopy(subary,0,sequence,offset,subary.length); + offset+=subary.length; + } + return sequence; + } + + @Override + public long applyAsLong(long operand) { + return sequence[(int) operand % sequence.length]; + } +} diff --git a/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java new file mode 100644 index 000000000..0f1b6fb10 --- /dev/null +++ b/nb-virtdata/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_long/SequenceOfTest.java @@ -0,0 +1,62 @@ +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +public class SequenceOfTest { + + @Test + public void testSequenceSimple() { + SequenceOf so = new SequenceOf(1L,"0 1 2 3 4 5 6 7 8 9"); + long[] results = new long[10]; + for (int i = 0; i < 10; i++) { + results[i] = so.applyAsLong(i); + } + for (int i = 0; i < 10; i++) { + assertEquals(i,results[i]); + } + } + + @Test + public void testSequenceWeighted() { + SequenceOf so = new SequenceOf(1L,"0:6 1 2 3 4"); + int samples = 100; + long[] results = new long[samples]; + for (int i = 0; i < samples; i++) { + results[i]=so.applyAsLong(i); + } + assertThat(results[0]).isEqualTo(0); + assertThat(results[1]).isEqualTo(0); + assertThat(results[2]).isEqualTo(0); + assertThat(results[3]).isEqualTo(0); + assertThat(results[4]).isEqualTo(0); + assertThat(results[5]).isEqualTo(0); + assertThat(results[6]).isEqualTo(1); + assertThat(results[7]).isEqualTo(2); + assertThat(results[8]).isEqualTo(3); + assertThat(results[9]).isEqualTo(4); + assertThat(results[10]).isEqualTo(0); + + } + +} diff --git a/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java b/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java new file mode 100644 index 000000000..8609c24a3 --- /dev/null +++ b/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedInts.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2022-2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.curves4.discrete.int_int; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import org.apache.commons.math4.legacy.distribution.EnumeratedIntegerDistribution; + +@ThreadSafeMapper +@Categories({Category.distributions}) +public class EnumeratedInts extends IntToIntDiscreteCurve { + public EnumeratedInts(String data, String... mods) { + super(new EnumeratedIntegerDistribution(parseIntLabels(data), parseDoubleWeights(data)), mods); + } + + public static int[] parseIntLabels(String input) { + String[] entries = input.split("[;, ]"); + int[] elements = new int[entries.length]; + String[] parts; + for (int i = 0; i < entries.length; i++) { + parts = entries[i].split(":"); + elements[i] = Integer.parseInt(parts[0]); + } + return elements; + } + + public static double[] parseDoubleWeights(String input) { + String[] entries = input.split("[;, ]"); + double[] weights = new double[entries.length]; + String[] parts; + for (int i = 0; i < entries.length; i++) { + parts = entries[i].split(":"); + weights[i] = parts.length==2 ? Double.parseDouble(parts[1]) : 1.0d; + } + return weights; + } + + +} diff --git a/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/long_int/EnumeratedInts.java b/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/long_int/EnumeratedInts.java new file mode 100644 index 000000000..a53b2badd --- /dev/null +++ b/nb-virtdata/virtdata-lib-curves4/src/main/java/io/nosqlbench/virtdata/library/curves4/discrete/long_int/EnumeratedInts.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022-2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.curves4.discrete.long_int; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.curves4.discrete.int_int.IntToIntDiscreteCurve; +import io.nosqlbench.virtdata.library.curves4.discrete.int_long.IntToLongDiscreteCurve; +import org.apache.commons.math4.legacy.distribution.EnumeratedIntegerDistribution; + +import static io.nosqlbench.virtdata.library.curves4.discrete.int_int.EnumeratedInts.parseDoubleWeights; +import static io.nosqlbench.virtdata.library.curves4.discrete.int_int.EnumeratedInts.parseIntLabels; + +@ThreadSafeMapper +@Categories({Category.distributions}) +public class EnumeratedInts extends IntToLongDiscreteCurve { + public EnumeratedInts(String data, String... mods) { + super(new EnumeratedIntegerDistribution(parseIntLabels(data), parseDoubleWeights(data)), mods); + } +} diff --git a/nb-virtdata/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedIntsTest.java b/nb-virtdata/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedIntsTest.java new file mode 100644 index 000000000..39e19fd0c --- /dev/null +++ b/nb-virtdata/virtdata-lib-curves4/src/test/java/io/nosqlbench/virtdata/library/curves4/discrete/int_int/EnumeratedIntsTest.java @@ -0,0 +1,70 @@ +package io.nosqlbench.virtdata.library.curves4.discrete.int_int; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.Arrays; +import java.util.List; +import java.util.stream.StreamSupport; + +import static java.util.Spliterators.spliterator; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +import org.assertj.core.data.Offset; +import org.junit.jupiter.api.Test; +public class EnumeratedIntsTest { + + @Test + public void EnumeratedIntsToInt() { + EnumeratedInts ei = new EnumeratedInts("0 1 2 3 4"); + double counts[] = new double[5]; + Arrays.fill(counts,0.0d); + int samples = 1000; + for (int i = 0; i < samples; i++) { + int v = ei.applyAsInt(i); + assertThat(v).isGreaterThanOrEqualTo(0); + assertThat(v).isLessThanOrEqualTo(4); + counts[v] += 1.0d; + } + for (double count : counts) { + assertThat(count/samples).isCloseTo((count/(double) samples), Offset.offset(0.01d)); + } + StreamSupport.stream(spliterator(counts,0),false).forEach(System.out::println); + } + + @Test + public void EnumeratedIntsToLong() { + io.nosqlbench.virtdata.library.curves4.discrete.long_int.EnumeratedInts ei = + new io.nosqlbench.virtdata.library.curves4.discrete.long_int.EnumeratedInts ("0 1 2 3 4"); + double counts[] = new double[5]; + Arrays.fill(counts,0.0d); + int samples = 1000; + for (int i = 0; i < samples; i++) { + long v = ei.applyAsLong(i); + assertThat(v).isGreaterThanOrEqualTo(0); + assertThat(v).isLessThanOrEqualTo(4); + counts[(int) v] += 1.0d; + } + for (double count : counts) { + assertThat(count/samples).isCloseTo((count/(double) samples), Offset.offset(0.01d)); + } + StreamSupport.stream(spliterator(counts,0),false).forEach(System.out::println); + } + +}