mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-01-26 15:36:33 -06:00
allow HOF form of HashedFileExtract with sizefunc parameter
This commit is contained in:
parent
afebe874a9
commit
7e776d5fcf
@ -21,12 +21,14 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_string;
|
||||
import io.nosqlbench.nb.api.content.NBIO;
|
||||
import io.nosqlbench.virtdata.api.annotations.Example;
|
||||
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
import io.nosqlbench.virtdata.api.bindings.VirtDataConversions;
|
||||
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.HashRange;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
||||
import java.nio.CharBuffer;
|
||||
import java.util.function.LongFunction;
|
||||
import java.util.function.LongToIntFunction;
|
||||
|
||||
/**
|
||||
* Pseudo-randomly extract a section of a text file and return it according to some
|
||||
@ -39,58 +41,44 @@ public class HashedFileExtractToString implements LongFunction<String> {
|
||||
|
||||
private final static Logger logger = LogManager.getLogger(HashedFileExtractToString.class);
|
||||
|
||||
private static CharBuffer fileDataImage = null;
|
||||
private final HashRange sizeRange;
|
||||
private final HashRange positionRange;
|
||||
|
||||
private final int minsize;
|
||||
private final int maxsize;
|
||||
private final String fileName;
|
||||
private final CharBuffer buf;
|
||||
private final LongToIntFunction sizeFunc;
|
||||
private final LongToIntFunction positionRange = new HashRange(0, Integer.MAX_VALUE);
|
||||
private final static ThreadLocal<StringBuilder> tl_sb = ThreadLocal.withInitial(StringBuilder::new);
|
||||
private final String filename;
|
||||
|
||||
@Example({"HashedFileExtractToString('data/adventures.txt',100,200)", "return a fragment from adventures.txt between 100 and 200 characters long"})
|
||||
public HashedFileExtractToString(String fileName, int minsize, int maxsize) {
|
||||
this.fileName = fileName;
|
||||
this.minsize = minsize;
|
||||
this.maxsize = maxsize;
|
||||
loadData();
|
||||
this.sizeRange = new HashRange(minsize, maxsize);
|
||||
this.positionRange = new HashRange(1, (fileDataImage.limit() - maxsize) - 1);
|
||||
public HashedFileExtractToString(String filename, int minsize, int maxsize) {
|
||||
this.filename = filename;
|
||||
this.buf = NBIO.readCharBuffer(filename).asReadOnlyBuffer();
|
||||
this.sizeFunc = new HashRange(minsize, maxsize);
|
||||
}
|
||||
|
||||
// @Example({"HashedFileExtractToString('data/adventures.txt',100,.1)", "return a fragment between 90 and 110 characters long"})
|
||||
// public HashedFileExtractToString(String fileName, int avgsize, double percentVary) {
|
||||
// this(fileName, avgsize - (int) (percentVary * avgsize), avgsize + (int) (percentVary * avgsize));
|
||||
// }
|
||||
|
||||
private void loadData() {
|
||||
if (fileDataImage == null) {
|
||||
synchronized (HashedFileExtractToString.class) {
|
||||
if (fileDataImage == null) {
|
||||
CharBuffer image = NBIO.readCharBuffer(fileName);
|
||||
fileDataImage = image;
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Provide a size function for the fragment to be extracted. In this form, if the size function specifies a string
|
||||
* size which is larger than the text image, it is truncated via modulo to fall within the text image size.
|
||||
*
|
||||
* @param filename The file name to be loaded
|
||||
* @param sizefunc A function which determines the size of the data to be loaded.
|
||||
*/
|
||||
@Example({"HashedFileExtractToString('data/adventures.txt',Uniform())", "return a fragment from adventures.txt from a random offset, based on the size function provided."})
|
||||
public HashedFileExtractToString(String filename, Object sizefunc) {
|
||||
this.filename = filename;
|
||||
this.buf = NBIO.readCharBuffer(filename).asReadOnlyBuffer();
|
||||
sizeFunc = VirtDataConversions.adaptFunction(sizefunc, LongToIntFunction.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(long input) {
|
||||
|
||||
|
||||
int offset = positionRange.applyAsInt(input);
|
||||
int length = sizeRange.applyAsInt(input);
|
||||
String sub = null;
|
||||
try {
|
||||
sub = fileDataImage.subSequence(offset, offset + length).toString();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return sub;
|
||||
|
||||
int size = sizeFunc.applyAsInt(input) % buf.limit();
|
||||
int pos = positionRange.applyAsInt(input);
|
||||
pos = pos % (buf.limit() - size); // modulo by overrun if >0
|
||||
return buf.subSequence(pos, pos + size).toString();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + ":" + minsize + ":" + maxsize;
|
||||
return getClass().getSimpleName() + ":" + filename;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,9 @@ package io.nosqlbench.virtdata.library.basics.tests.long_string;
|
||||
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_string.HashedFileExtractToString;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.IntSummaryStatistics;
|
||||
import java.util.function.LongUnaryOperator;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
public class HashedFileExtractToStringTest {
|
||||
@ -10,12 +13,29 @@ public class HashedFileExtractToStringTest {
|
||||
@Test
|
||||
public void testHashedFileBasic() {
|
||||
HashedFileExtractToString extract =
|
||||
new HashedFileExtractToString("data/lorem_ipsum_full.txt", 3, 3000);
|
||||
new HashedFileExtractToString("data/lorem_ipsum_full.txt", 3, 3000);
|
||||
IntSummaryStatistics iss = new IntSummaryStatistics();
|
||||
for (long cycle = 0; cycle < 50000; cycle++) {
|
||||
String apply = extract.apply(cycle);
|
||||
iss.accept(apply.length());
|
||||
assertThat(apply.length()).isGreaterThanOrEqualTo(3);
|
||||
assertThat(apply.length()).isLessThanOrEqualTo(3000);
|
||||
}
|
||||
|
||||
System.out.println("Loaded examples from data/lorem_ipsum_full.txt:" + iss.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashedFileFunction() {
|
||||
HashedFileExtractToString extract =
|
||||
new HashedFileExtractToString("data/lorem_ipsum_full.txt", (LongUnaryOperator) ((long f) -> 32734 * f));
|
||||
IntSummaryStatistics iss = new IntSummaryStatistics();
|
||||
|
||||
for (long cycle = 0; cycle < 50000; cycle++) {
|
||||
String apply = extract.apply(cycle);
|
||||
iss.accept(apply.length());
|
||||
}
|
||||
|
||||
System.out.println("Loaded examples from data/lorem_ipsum_full.txt:" + iss.toString());
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user