nosqlbench-2031 Make modeling occasional 'blip' payloads easy

This commit is contained in:
Jonathan Shook 2024-10-04 15:19:11 -05:00
parent d0686b3411
commit 1e8d675cc0
6 changed files with 422 additions and 0 deletions

View File

@ -0,0 +1,101 @@
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_int;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Arrays;
import java.util.function.LongToIntFunction;
import java.util.function.LongUnaryOperator;
/**
* SequenceOf bindings allow you to specify an order and count of a set of values which will then
* be repeated in that order.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class SequenceOf implements LongToIntFunction {
private final static Logger logger = LogManager.getLogger(SequenceOf.class);
final int[] sequence;
/**
* <p>
* This function produces values from a lookup table for direct control of numerical sequences.
* The sequence spec is a string containing the sequence values and their occurences, defaulting to 1 each.
* Example: "1:6 2 3 4 5", which means "1 at a relative frequency of 6 and 2, 3, 4, and 5 at a relative frequency
* of 1 each. This will yield pattern "1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, ..."
* </p>
*
* <p>
* Each implementation of {@link SequenceOf} must include a type sigil as the first parameter to disambiguate
* it from the others.
* </p>
*
* @param ignored
* any long value, discarded after signature matching. The exampleValue is thrown away, but is necessary for
* matching the right version of SequenceOf.
* @param spec
* A string of numbers separated by spaces, semicolons, or commas. This is the sequence spec..
*/
@Example({"SequenceOf(1L,'3:3 2:2 1:1')","Generate sequence 3,3,3,2,2,1"})
@Example({"SequenceOf(1L,'1000:99 1000000:1')","Generate sequence 1000 (99 times) and then 1000000 (1 time)"})
public SequenceOf(int ignored, String spec) {
this.sequence = parseSequence(spec);
}
public static int[] parseSequence(String input) {
String[] entries = input.split("[;, ]");
int[][] subarys = new int[entries.length][];
int entry=0;
int size=0;
String[] parts;
for (int i = 0; i < entries.length; i++) {
parts = entries[i].split(":");
int value = Integer.parseInt(parts[0]);
int count = (parts.length==1) ? 1 : Integer.parseInt(parts[1]);
int[] segment = new int[count];
Arrays.fill(segment,value);
subarys[entry++]=segment;
size+=segment.length;
}
if (size>1E6) {
logger.warn("The sequence you have specified is very large, which may cause problems. You should consider" +
" a different approach for this type of function.");
}
int[] sequence = new int[size];
int offset=0;
for (int[] subary : subarys) {
System.arraycopy(subary,0,sequence,offset,subary.length);
offset+=subary.length;
}
return sequence;
}
@Override
public int applyAsInt(long value) {
return sequence[(int) value % sequence.length];
}
}

View File

@ -0,0 +1,100 @@
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Arrays;
import java.util.function.LongUnaryOperator;
/**
* SequenceOf bindings allow you to specify an order and count of a set of values which will then
* be repeated in that order.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class SequenceOf implements LongUnaryOperator {
private final static Logger logger = LogManager.getLogger(SequenceOf.class);
final long[] sequence;
/**
* <p>
* This function produces values from a lookup table for direct control of numerical sequences.
* The sequence spec is a string containing the sequence values and their occurences, defaulting to 1 each.
* Example: "1:6 2 3 4 5", which means "1 at a relative frequency of 6 and 2, 3, 4, and 5 at a relative frequency
* of 1 each. This will yield pattern "1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, ..."
* </p>
*
* <p>
* Each implementation of {@link SequenceOf} must include a type sigil as the first parameter to disambiguate
* it from the others.
* </p>
*
* @param ignored
* any long value, discarded after signature matching. The exampleValue is thrown away, but is necessary for
* matching the right version of SequenceOf.
* @param spec
* A string of numbers separated by spaces, semicolons, or commas. This is the sequence spec..
*/
@Example({"SequenceOf(1L,'3:3 2:2 1:1')","Generate sequence 3,3,3,2,2,1"})
@Example({"SequenceOf(1L,'1000:99 1000000:1')","Generate sequence 1000 (99 times) and then 1000000 (1 time)"})
public SequenceOf(long ignored, String spec) {
this.sequence = parseSequence(spec);
}
public static long[] parseSequence(String input) {
String[] entries = input.split("[;, ]");
long[][] subarys = new long[entries.length][];
int entry=0;
int size=0;
String[] parts;
for (int i = 0; i < entries.length; i++) {
parts = entries[i].split(":");
long value = Long.parseLong(parts[0]);
int count = (parts.length==1) ? 1 : Integer.parseInt(parts[1]);
long[] segment = new long[count];
Arrays.fill(segment,value);
subarys[entry++]=segment;
size+=segment.length;
}
if (size>1E6) {
logger.warn("The sequence you have specified is very large, which may cause problems. You should consider" +
" a different approach for this type of function.");
}
long[] sequence = new long[size];
int offset=0;
for (long[] subary : subarys) {
System.arraycopy(subary,0,sequence,offset,subary.length);
offset+=subary.length;
}
return sequence;
}
@Override
public long applyAsLong(long operand) {
return sequence[(int) operand % sequence.length];
}
}

View File

@ -0,0 +1,62 @@
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;
public class SequenceOfTest {
@Test
public void testSequenceSimple() {
SequenceOf so = new SequenceOf(1L,"0 1 2 3 4 5 6 7 8 9");
long[] results = new long[10];
for (int i = 0; i < 10; i++) {
results[i] = so.applyAsLong(i);
}
for (int i = 0; i < 10; i++) {
assertEquals(i,results[i]);
}
}
@Test
public void testSequenceWeighted() {
SequenceOf so = new SequenceOf(1L,"0:6 1 2 3 4");
int samples = 100;
long[] results = new long[samples];
for (int i = 0; i < samples; i++) {
results[i]=so.applyAsLong(i);
}
assertThat(results[0]).isEqualTo(0);
assertThat(results[1]).isEqualTo(0);
assertThat(results[2]).isEqualTo(0);
assertThat(results[3]).isEqualTo(0);
assertThat(results[4]).isEqualTo(0);
assertThat(results[5]).isEqualTo(0);
assertThat(results[6]).isEqualTo(1);
assertThat(results[7]).isEqualTo(2);
assertThat(results[8]).isEqualTo(3);
assertThat(results[9]).isEqualTo(4);
assertThat(results[10]).isEqualTo(0);
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.curves4.discrete.int_int;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import org.apache.commons.math4.legacy.distribution.EnumeratedIntegerDistribution;
@ThreadSafeMapper
@Categories({Category.distributions})
public class EnumeratedInts extends IntToIntDiscreteCurve {
public EnumeratedInts(String data, String... mods) {
super(new EnumeratedIntegerDistribution(parseIntLabels(data), parseDoubleWeights(data)), mods);
}
public static int[] parseIntLabels(String input) {
String[] entries = input.split("[;, ]");
int[] elements = new int[entries.length];
String[] parts;
for (int i = 0; i < entries.length; i++) {
parts = entries[i].split(":");
elements[i] = Integer.parseInt(parts[0]);
}
return elements;
}
public static double[] parseDoubleWeights(String input) {
String[] entries = input.split("[;, ]");
double[] weights = new double[entries.length];
String[] parts;
for (int i = 0; i < entries.length; i++) {
parts = entries[i].split(":");
weights[i] = parts.length==2 ? Double.parseDouble(parts[1]) : 1.0d;
}
return weights;
}
}

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.curves4.discrete.long_int;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.curves4.discrete.int_int.IntToIntDiscreteCurve;
import io.nosqlbench.virtdata.library.curves4.discrete.int_long.IntToLongDiscreteCurve;
import org.apache.commons.math4.legacy.distribution.EnumeratedIntegerDistribution;
import static io.nosqlbench.virtdata.library.curves4.discrete.int_int.EnumeratedInts.parseDoubleWeights;
import static io.nosqlbench.virtdata.library.curves4.discrete.int_int.EnumeratedInts.parseIntLabels;
@ThreadSafeMapper
@Categories({Category.distributions})
public class EnumeratedInts extends IntToLongDiscreteCurve {
public EnumeratedInts(String data, String... mods) {
super(new EnumeratedIntegerDistribution(parseIntLabels(data), parseDoubleWeights(data)), mods);
}
}

View File

@ -0,0 +1,70 @@
package io.nosqlbench.virtdata.library.curves4.discrete.int_int;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.Arrays;
import java.util.List;
import java.util.stream.StreamSupport;
import static java.util.Spliterators.spliterator;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;
import org.assertj.core.data.Offset;
import org.junit.jupiter.api.Test;
public class EnumeratedIntsTest {
@Test
public void EnumeratedIntsToInt() {
EnumeratedInts ei = new EnumeratedInts("0 1 2 3 4");
double counts[] = new double[5];
Arrays.fill(counts,0.0d);
int samples = 1000;
for (int i = 0; i < samples; i++) {
int v = ei.applyAsInt(i);
assertThat(v).isGreaterThanOrEqualTo(0);
assertThat(v).isLessThanOrEqualTo(4);
counts[v] += 1.0d;
}
for (double count : counts) {
assertThat(count/samples).isCloseTo((count/(double) samples), Offset.offset(0.01d));
}
StreamSupport.stream(spliterator(counts,0),false).forEach(System.out::println);
}
@Test
public void EnumeratedIntsToLong() {
io.nosqlbench.virtdata.library.curves4.discrete.long_int.EnumeratedInts ei =
new io.nosqlbench.virtdata.library.curves4.discrete.long_int.EnumeratedInts ("0 1 2 3 4");
double counts[] = new double[5];
Arrays.fill(counts,0.0d);
int samples = 1000;
for (int i = 0; i < samples; i++) {
long v = ei.applyAsLong(i);
assertThat(v).isGreaterThanOrEqualTo(0);
assertThat(v).isLessThanOrEqualTo(4);
counts[(int) v] += 1.0d;
}
for (double count : counts) {
assertThat(count/samples).isCloseTo((count/(double) samples), Offset.offset(0.01d));
}
StreamSupport.stream(spliterator(counts,0),false).forEach(System.out::println);
}
}