allow HOF form of HashedFileExtract with sizefunc parameter

This commit is contained in:
Jonathan Shook
2021-02-18 15:55:13 -06:00
parent afebe874a9
commit 7e776d5fcf
2 changed files with 50 additions and 42 deletions

View File

@@ -21,12 +21,14 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_string;
import io.nosqlbench.nb.api.content.NBIO; import io.nosqlbench.nb.api.content.NBIO;
import io.nosqlbench.virtdata.api.annotations.Example; import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.api.bindings.VirtDataConversions;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.HashRange; import io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.HashRange;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.util.function.LongFunction; import java.util.function.LongFunction;
import java.util.function.LongToIntFunction;
/** /**
* Pseudo-randomly extract a section of a text file and return it according to some * Pseudo-randomly extract a section of a text file and return it according to some
@@ -39,58 +41,44 @@ public class HashedFileExtractToString implements LongFunction<String> {
private final static Logger logger = LogManager.getLogger(HashedFileExtractToString.class); private final static Logger logger = LogManager.getLogger(HashedFileExtractToString.class);
private static CharBuffer fileDataImage = null;
private final HashRange sizeRange;
private final HashRange positionRange;
private final int minsize; private final CharBuffer buf;
private final int maxsize; private final LongToIntFunction sizeFunc;
private final String fileName; private final LongToIntFunction positionRange = new HashRange(0, Integer.MAX_VALUE);
private final static ThreadLocal<StringBuilder> tl_sb = ThreadLocal.withInitial(StringBuilder::new);
private final String filename;
@Example({"HashedFileExtractToString('data/adventures.txt',100,200)", "return a fragment from adventures.txt between 100 and 200 characters long"}) @Example({"HashedFileExtractToString('data/adventures.txt',100,200)", "return a fragment from adventures.txt between 100 and 200 characters long"})
public HashedFileExtractToString(String fileName, int minsize, int maxsize) { public HashedFileExtractToString(String filename, int minsize, int maxsize) {
this.fileName = fileName; this.filename = filename;
this.minsize = minsize; this.buf = NBIO.readCharBuffer(filename).asReadOnlyBuffer();
this.maxsize = maxsize; this.sizeFunc = new HashRange(minsize, maxsize);
loadData();
this.sizeRange = new HashRange(minsize, maxsize);
this.positionRange = new HashRange(1, (fileDataImage.limit() - maxsize) - 1);
} }
// @Example({"HashedFileExtractToString('data/adventures.txt',100,.1)", "return a fragment between 90 and 110 characters long"}) /**
// public HashedFileExtractToString(String fileName, int avgsize, double percentVary) { * Provide a size function for the fragment to be extracted. In this form, if the size function specifies a string
// this(fileName, avgsize - (int) (percentVary * avgsize), avgsize + (int) (percentVary * avgsize)); * size which is larger than the text image, it is truncated via modulo to fall within the text image size.
// } *
* @param filename The file name to be loaded
private void loadData() { * @param sizefunc A function which determines the size of the data to be loaded.
if (fileDataImage == null) { */
synchronized (HashedFileExtractToString.class) { @Example({"HashedFileExtractToString('data/adventures.txt',Uniform())", "return a fragment from adventures.txt from a random offset, based on the size function provided."})
if (fileDataImage == null) { public HashedFileExtractToString(String filename, Object sizefunc) {
CharBuffer image = NBIO.readCharBuffer(fileName); this.filename = filename;
fileDataImage = image; this.buf = NBIO.readCharBuffer(filename).asReadOnlyBuffer();
} sizeFunc = VirtDataConversions.adaptFunction(sizefunc, LongToIntFunction.class);
}
}
} }
@Override @Override
public String apply(long input) { public String apply(long input) {
int size = sizeFunc.applyAsInt(input) % buf.limit();
int pos = positionRange.applyAsInt(input);
int offset = positionRange.applyAsInt(input); pos = pos % (buf.limit() - size); // modulo by overrun if >0
int length = sizeRange.applyAsInt(input); return buf.subSequence(pos, pos + size).toString();
String sub = null;
try {
sub = fileDataImage.subSequence(offset, offset + length).toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
return sub;
} }
public String toString() { public String toString() {
return getClass().getSimpleName() + ":" + minsize + ":" + maxsize; return getClass().getSimpleName() + ":" + filename;
} }
} }

View File

@@ -3,6 +3,9 @@ package io.nosqlbench.virtdata.library.basics.tests.long_string;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_string.HashedFileExtractToString; import io.nosqlbench.virtdata.library.basics.shared.from_long.to_string.HashedFileExtractToString;
import org.junit.Test; import org.junit.Test;
import java.util.IntSummaryStatistics;
import java.util.function.LongUnaryOperator;
import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
public class HashedFileExtractToStringTest { public class HashedFileExtractToStringTest {
@@ -10,12 +13,29 @@ public class HashedFileExtractToStringTest {
@Test @Test
public void testHashedFileBasic() { public void testHashedFileBasic() {
HashedFileExtractToString extract = HashedFileExtractToString extract =
new HashedFileExtractToString("data/lorem_ipsum_full.txt", 3, 3000); new HashedFileExtractToString("data/lorem_ipsum_full.txt", 3, 3000);
IntSummaryStatistics iss = new IntSummaryStatistics();
for (long cycle = 0; cycle < 50000; cycle++) { for (long cycle = 0; cycle < 50000; cycle++) {
String apply = extract.apply(cycle); String apply = extract.apply(cycle);
iss.accept(apply.length());
assertThat(apply.length()).isGreaterThanOrEqualTo(3); assertThat(apply.length()).isGreaterThanOrEqualTo(3);
assertThat(apply.length()).isLessThanOrEqualTo(3000); assertThat(apply.length()).isLessThanOrEqualTo(3000);
} }
System.out.println("Loaded examples from data/lorem_ipsum_full.txt:" + iss.toString());
} }
}
@Test
public void testHashedFileFunction() {
HashedFileExtractToString extract =
new HashedFileExtractToString("data/lorem_ipsum_full.txt", (LongUnaryOperator) ((long f) -> 32734 * f));
IntSummaryStatistics iss = new IntSummaryStatistics();
for (long cycle = 0; cycle < 50000; cycle++) {
String apply = extract.apply(cycle);
iss.accept(apply.length());
}
System.out.println("Loaded examples from data/lorem_ipsum_full.txt:" + iss.toString());
}
}