import virtdata

This commit is contained in:
Jonathan Shook
2020-02-20 15:37:40 -06:00
parent 0b733bfa1d
commit 62d53ecec6
1570 changed files with 370965 additions and 0 deletions

106
virtdata-lib-random/pom.xml Normal file
View File

@@ -0,0 +1,106 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>virtdata-lib-random</artifactId>
<packaging>jar</packaging>
<parent>
<artifactId>virtdata-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>2.12.16-SNAPSHOT</version>
<relativePath>../virtdata-defaults</relativePath>
</parent>
<name>virtdata-lib-random</name>
<url>http://virtdata.io/</url>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>virtdata-api</artifactId>
<version>2.12.16-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang-version}</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.9.9</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<dependency>
<groupId>com.elega9t</groupId>
<artifactId>number-to-words</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.13.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core-java8</artifactId>
<version>1.0.0m1</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.md</include>
<include>**/*.yaml</include>
<include>**/*.txt</include>
</includes>
</resource>
<resource>
<directory>src/main/resources</directory>
<includes>
<include>**</include>
</includes>
</resource>
</resources>
</build>
<profiles>
<profile>
<id>shade</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@@ -0,0 +1,19 @@
package io.virtdata.random;
import io.virtdata.core.murmur.Murmur3F;
import java.util.function.LongUnaryOperator;
/**
* A generator that is mostly useless, except for testing useless generators.
* This is used as a control for the concurrent generation tester.
*/
public class Murmur3Time implements LongUnaryOperator {
private Murmur3F murmur3F = new Murmur3F(Thread.currentThread().getName().hashCode());
@Override
public long applyAsLong(long operand) {
murmur3F.updateLongLE(System.nanoTime());
return murmur3F.getValue();
}
}

View File

@@ -0,0 +1,88 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import io.virtdata.util.VirtDataResources;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.UniformIntegerDistribution;
import org.apache.commons.math3.random.MersenneTwister;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import java.nio.CharBuffer;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomFileExtractToString implements LongFunction<String> {
private final static Logger logger = LogManager.getLogger(RandomFileExtractToString.class);private static CharBuffer fileDataImage =null;
private int minsize, maxsize;
private final MersenneTwister rng;
private final IntegerDistribution sizeDistribution;
private final IntegerDistribution positionDistribution;
private final String fileName;
public RandomFileExtractToString(String fileName, int minsize, int maxsize) {
this(fileName, minsize,maxsize,System.nanoTime());
}
public RandomFileExtractToString(String fileName, int minsize, int maxsize, long seed) {
this.fileName = fileName;
this.minsize = minsize;
this.maxsize = maxsize;
loadData();
this.rng = new MersenneTwister(seed);
this.sizeDistribution = new UniformIntegerDistribution(rng, minsize, maxsize);
this.positionDistribution = new UniformIntegerDistribution(rng, 1, fileDataImage.limit() - maxsize);
}
private void loadData() {
if (fileDataImage == null) {
synchronized (RandomFileExtractToString.class) {
if (fileDataImage == null) {
CharBuffer image= VirtDataResources.readDataFileToCharBuffer(fileName);
fileDataImage = image;
}
}
}
}
@Override
public String apply(long input) {
int offset = positionDistribution.sample();
int length = sizeDistribution.sample();
String sub = null;
try {
sub = fileDataImage.subSequence(offset, offset + length).toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
return sub;
}
public String toString() {
return getClass().getSimpleName() + ":" + minsize + ":" + maxsize;
}
}

View File

@@ -0,0 +1,44 @@
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import io.virtdata.util.VirtDataResources;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.UniformIntegerDistribution;
import org.apache.commons.math3.random.MersenneTwister;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import java.util.List;
import java.util.function.LongToIntFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomLineToInt implements LongToIntFunction {
private final static Logger logger = LogManager.getLogger(RandomLineToInt.class);private final List<String> lines;
private final MersenneTwister rng;
private final IntegerDistribution itemDistribution;
private final String filename;
public RandomLineToInt(String filename) {
this(filename, System.nanoTime());
}
public RandomLineToInt(String filename, long seed) {
this.filename = filename;
this.lines = VirtDataResources.readDataFileLines(filename);
this.rng = new MersenneTwister(seed);
this.itemDistribution= new UniformIntegerDistribution(rng, 0, lines.size()-2);
}
public String toString() {
return getClass().getSimpleName() + ":" + filename;
}
@Override
public int applyAsInt(long value) {
int itemIdx = itemDistribution.sample();
String item = lines.get(itemIdx);
return Integer.valueOf(item);
}
}

View File

@@ -0,0 +1,77 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import io.virtdata.util.VirtDataResources;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.UniformIntegerDistribution;
import org.apache.commons.math3.random.MersenneTwister;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import java.util.ArrayList;
import java.util.List;
import java.util.function.LongFunction;
/**
* TODO: Redo this a functional with murmur3F
*/
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomLineToString implements LongFunction<String> {
private final static Logger logger = LogManager.getLogger(RandomLineToString.class);private List<String> lines = new ArrayList<>();
private final MersenneTwister rng;
private final IntegerDistribution itemDistribution;
private final String filename;
public RandomLineToString(String filename) {
this.rng = new MersenneTwister(System.nanoTime());
this.filename = filename;
this.lines = VirtDataResources.readDataFileLines(filename);
itemDistribution= new UniformIntegerDistribution(rng, 0, lines.size()-2);
}
public RandomLineToString(String filename, MersenneTwister rng) {
this.rng = rng;
this.filename = filename;
this.lines = VirtDataResources.readDataFileLines(filename);
this.lines = VirtDataResources.readDataFileLines(filename);
itemDistribution= new UniformIntegerDistribution(rng, 0, lines.size()-2);
}
public RandomLineToString(String filename, long seed) {
this.rng = new MersenneTwister(seed);
this.filename = filename;
this.lines = VirtDataResources.readDataFileLines(filename);
itemDistribution= new UniformIntegerDistribution(rng, 0, lines.size()-2);
}
public String toString() {
return getClass().getSimpleName() + ":" + filename;
}
@Override
public String apply(long operand) {
int itemIdx = itemDistribution.sample();
String item = lines.get(itemIdx);
return item;
}
}

View File

@@ -0,0 +1,60 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.UniformIntegerDistribution;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.HashMap;
import java.util.Map;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomLineToStringMap implements LongFunction<Map<String,String>> {
private final RandomLineToString lineDataMapper;
private final IntegerDistribution sizeDistribution;
private final MersenneTwister rng;
public RandomLineToStringMap(String paramFile, int maxSize) {
rng = new MersenneTwister(System.nanoTime());
this.sizeDistribution = new UniformIntegerDistribution(rng, 0,maxSize-1);
this.lineDataMapper = new RandomLineToString(paramFile);
}
public RandomLineToStringMap(String paramFile, int maxSize, long seed) {
this.rng = new MersenneTwister(seed);
this.sizeDistribution = new UniformIntegerDistribution(rng, 0,maxSize-1);
this.lineDataMapper = new RandomLineToString(paramFile);
}
@Override
public Map<String, String> apply(long input) {
int mapSize = sizeDistribution.sample();
Map<String,String> map = new HashMap<>();
for (int idx=0;idx<mapSize;idx++) {
map.put(lineDataMapper.apply(input), lineDataMapper.apply(input));
}
return map;
}
}

View File

@@ -0,0 +1,55 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.random.MersenneTwister;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import java.util.Map;
import java.util.function.LongFunction;
import java.util.stream.Collectors;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomLinesToKeyValueString implements LongFunction<String> {
private static final Logger logger = LogManager.getLogger(RandomLinesToKeyValueString.class);
private final RandomLineToStringMap lineDataMapper;
private final MersenneTwister rng;
public RandomLinesToKeyValueString(String paramFile, int maxSize) {
this(paramFile,maxSize,System.nanoTime());
}
public RandomLinesToKeyValueString(String paramFile, int maxsize, long seed) {
rng = new MersenneTwister(seed);
lineDataMapper = new RandomLineToStringMap(paramFile, maxsize);
}
@Override
public String apply(long input) {
Map<String, String> stringStringMap = lineDataMapper.apply(input);
String mapstring = stringStringMap.entrySet().stream().
map(es -> es.getKey() + ":" + es.getValue() + ";")
.collect(Collectors.joining());
return mapstring;
}
}

View File

@@ -0,0 +1,43 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomLongToString implements LongFunction<String> {
private final MersenneTwister theTwister;
public RandomLongToString() {
this(System.nanoTime());
}
public RandomLongToString(long seed) {
theTwister = new MersenneTwister(seed);
}
@Override
public String apply(long input) {
return String.valueOf(Math.abs(theTwister.nextLong()));
}
}

View File

@@ -0,0 +1,42 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.pache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomLoremExtractToString implements LongFunction<String> {
private final RandomFileExtractToString randomFileExtractMapper;
public RandomLoremExtractToString(int minsize, int maxsize, long seed) {
randomFileExtractMapper = new RandomFileExtractToString("lorem-ipsum.txt", minsize, maxsize, seed);
}
public RandomLoremExtractToString(int minsize, int maxsize) {
this(minsize,maxsize,System.nanoTime());
}
@Override
public String apply(long input) {
return randomFileExtractMapper.apply(input);
}
}

View File

@@ -0,0 +1,42 @@
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import io.virtdata.api.DataMapper;
import org.apache.commons.math3.random.MersenneTwister;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomRangedToDouble implements DataMapper<Double> {
private final MersenneTwister theTwister;
private final long min;
private final long max;
private final long length;
public RandomRangedToDouble(long min, long max) {
this(min,max,System.nanoTime());
}
public RandomRangedToDouble(long min, long max, long seed) {
this.theTwister = new MersenneTwister(seed);
if (max<=min) {
throw new RuntimeException("max must be >= min");
}
this.min = min;
this.max = max;
this.length = max - min;
}
@Override
public Double get(long input) {
Double value = Math.abs(theTwister.nextDouble());
value %= length;
value += min;
return value;
}
public String toString() {
return getClass().getSimpleName() + ":" + min + ":" + max;
}
}

View File

@@ -0,0 +1,58 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomRangedToInt implements LongFunction<Integer> {
private final MersenneTwister theTwister;
private final long min;
private final long max;
private final long length;
public RandomRangedToInt(long min, long max) {
this(min,max,System.nanoTime());
}
public RandomRangedToInt(long min, long max, long seed) {
this.theTwister = new MersenneTwister(seed);
if (max<=min) {
throw new RuntimeException("max must be >= min");
}
this.min = min;
this.max = max;
this.length = max - min;
}
public String toString() {
return getClass().getSimpleName() + ":" + min + ":" + max;
}
@Override
public Integer apply(long operand) {
long value = Math.abs(theTwister.nextLong());
value %= length;
value += min;
return (int) value;
}
}

View File

@@ -0,0 +1,58 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.function.LongUnaryOperator;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomRangedToLong implements LongUnaryOperator {
private final MersenneTwister theTwister;
private final long min;
private final long max;
private final long length;
public RandomRangedToLong(long min, long max) {
this(min,max,System.nanoTime());
}
public RandomRangedToLong(long min, long max, long seed) {
this.theTwister = new MersenneTwister(seed);
if (max<=min) {
throw new RuntimeException("max must be >= min");
}
this.min = min;
this.max = max;
this.length = max - min;
}
@Override
public long applyAsLong(long input) {
long value = Math.abs(theTwister.nextLong());
value %= length;
value += min;
return value;
}
public String toString() {
return getClass().getSimpleName() + ":" + min + ":" + max;
}
}

View File

@@ -0,0 +1,59 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomRangedToString implements LongFunction<String> {
private final MersenneTwister theTwister;
private long min;
private long max;
private long _length;
public RandomRangedToString(long min, long max) {
this(min,max,System.nanoTime());
}
public RandomRangedToString(long min, long max, long seed) {
this.theTwister = new MersenneTwister(seed);
if (max<=min) {
throw new RuntimeException("max must be >= min");
}
this.min = min;
this.max = max;
this._length = max - min;
}
@Override
public String apply(long input) {
long value = Math.abs(theTwister.nextLong());
value %= _length;
value += min;
return String.valueOf(value);
}
public String toString() {
return getClass().getSimpleName() + ":" + min + ":" + max;
}
}

View File

@@ -0,0 +1,50 @@
/*
*
* Copyright 2015 Jonathan Shook
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.virtdata.random;
import io.virtdata.annotations.DeprecatedFunction;
import org.apache.commons.math3.random.MersenneTwister;
import java.nio.ByteBuffer;
import java.util.function.LongFunction;
@DeprecatedFunction("random mappers are not deterministic. They will be replaced with hash-based functions.")
public class RandomToByteBuffer implements LongFunction<ByteBuffer> {
private final MersenneTwister rng;
private int length;
public RandomToByteBuffer(int length) {
this.length = length;
rng = new MersenneTwister(System.nanoTime());
}
public RandomToByteBuffer(int length, long seed) {
this.length = length;
rng = new MersenneTwister(seed);
}
@Override
public ByteBuffer apply(long input) {
byte[] buffer = new byte[length];
rng.nextBytes(buffer);
return ByteBuffer.wrap(buffer);
}
}

View File

@@ -0,0 +1,2 @@
io.virtdata.processors.FunctionDocInfoProcessor
io.virtdata.processors.ServiceProcessor