allow HOF form of HashedFileExtract with sizefunc parameter

This commit is contained in:
Jonathan Shook 2021-02-18 15:55:13 -06:00
parent afebe874a9
commit 7e776d5fcf
2 changed files with 50 additions and 42 deletions

View File

@ -21,12 +21,14 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_string;
import io.nosqlbench.nb.api.content.NBIO;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.api.bindings.VirtDataConversions;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.HashRange;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import java.nio.CharBuffer;
import java.util.function.LongFunction;
import java.util.function.LongToIntFunction;
/**
* Pseudo-randomly extract a section of a text file and return it according to some
@ -39,58 +41,44 @@ public class HashedFileExtractToString implements LongFunction<String> {
private final static Logger logger = LogManager.getLogger(HashedFileExtractToString.class);
private static CharBuffer fileDataImage = null;
private final HashRange sizeRange;
private final HashRange positionRange;
private final int minsize;
private final int maxsize;
private final String fileName;
private final CharBuffer buf;
private final LongToIntFunction sizeFunc;
private final LongToIntFunction positionRange = new HashRange(0, Integer.MAX_VALUE);
private final static ThreadLocal<StringBuilder> tl_sb = ThreadLocal.withInitial(StringBuilder::new);
private final String filename;
@Example({"HashedFileExtractToString('data/adventures.txt',100,200)", "return a fragment from adventures.txt between 100 and 200 characters long"})
public HashedFileExtractToString(String fileName, int minsize, int maxsize) {
this.fileName = fileName;
this.minsize = minsize;
this.maxsize = maxsize;
loadData();
this.sizeRange = new HashRange(minsize, maxsize);
this.positionRange = new HashRange(1, (fileDataImage.limit() - maxsize) - 1);
public HashedFileExtractToString(String filename, int minsize, int maxsize) {
this.filename = filename;
this.buf = NBIO.readCharBuffer(filename).asReadOnlyBuffer();
this.sizeFunc = new HashRange(minsize, maxsize);
}
// @Example({"HashedFileExtractToString('data/adventures.txt',100,.1)", "return a fragment between 90 and 110 characters long"})
// public HashedFileExtractToString(String fileName, int avgsize, double percentVary) {
// this(fileName, avgsize - (int) (percentVary * avgsize), avgsize + (int) (percentVary * avgsize));
// }
private void loadData() {
if (fileDataImage == null) {
synchronized (HashedFileExtractToString.class) {
if (fileDataImage == null) {
CharBuffer image = NBIO.readCharBuffer(fileName);
fileDataImage = image;
}
}
}
/**
* Provide a size function for the fragment to be extracted. In this form, if the size function specifies a string
* size which is larger than the text image, it is truncated via modulo to fall within the text image size.
*
* @param filename The file name to be loaded
* @param sizefunc A function which determines the size of the data to be loaded.
*/
@Example({"HashedFileExtractToString('data/adventures.txt',Uniform())", "return a fragment from adventures.txt from a random offset, based on the size function provided."})
public HashedFileExtractToString(String filename, Object sizefunc) {
this.filename = filename;
this.buf = NBIO.readCharBuffer(filename).asReadOnlyBuffer();
sizeFunc = VirtDataConversions.adaptFunction(sizefunc, LongToIntFunction.class);
}
@Override
public String apply(long input) {
int offset = positionRange.applyAsInt(input);
int length = sizeRange.applyAsInt(input);
String sub = null;
try {
sub = fileDataImage.subSequence(offset, offset + length).toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
return sub;
int size = sizeFunc.applyAsInt(input) % buf.limit();
int pos = positionRange.applyAsInt(input);
pos = pos % (buf.limit() - size); // modulo by overrun if >0
return buf.subSequence(pos, pos + size).toString();
}
public String toString() {
return getClass().getSimpleName() + ":" + minsize + ":" + maxsize;
return getClass().getSimpleName() + ":" + filename;
}
}

View File

@ -3,6 +3,9 @@ package io.nosqlbench.virtdata.library.basics.tests.long_string;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_string.HashedFileExtractToString;
import org.junit.Test;
import java.util.IntSummaryStatistics;
import java.util.function.LongUnaryOperator;
import static org.assertj.core.api.Assertions.assertThat;
public class HashedFileExtractToStringTest {
@ -10,12 +13,29 @@ public class HashedFileExtractToStringTest {
@Test
public void testHashedFileBasic() {
HashedFileExtractToString extract =
new HashedFileExtractToString("data/lorem_ipsum_full.txt", 3, 3000);
new HashedFileExtractToString("data/lorem_ipsum_full.txt", 3, 3000);
IntSummaryStatistics iss = new IntSummaryStatistics();
for (long cycle = 0; cycle < 50000; cycle++) {
String apply = extract.apply(cycle);
iss.accept(apply.length());
assertThat(apply.length()).isGreaterThanOrEqualTo(3);
assertThat(apply.length()).isLessThanOrEqualTo(3000);
}
System.out.println("Loaded examples from data/lorem_ipsum_full.txt:" + iss.toString());
}
}
@Test
public void testHashedFileFunction() {
HashedFileExtractToString extract =
new HashedFileExtractToString("data/lorem_ipsum_full.txt", (LongUnaryOperator) ((long f) -> 32734 * f));
IntSummaryStatistics iss = new IntSummaryStatistics();
for (long cycle = 0; cycle < 50000; cycle++) {
String apply = extract.apply(cycle);
iss.accept(apply.length());
}
System.out.println("Loaded examples from data/lorem_ipsum_full.txt:" + iss.toString());
}
}