virtdata changes for vector branch

This commit is contained in:
Jonathan Shook 2023-07-25 11:49:56 -05:00
parent 4fdd39fff9
commit d10c78150b
50 changed files with 2491 additions and 154 deletions

View File

@ -82,12 +82,6 @@
<groupId>org.apache.commons</groupId>
<artifactId>commons-statistics-distribution</artifactId>
</dependency>
<dependency>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-core</artifactId>
<version>4.16.0</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>

View File

@ -84,8 +84,26 @@ public class ParsedTemplateMap implements LongFunction<Map<String, ?>>, StaticFi
* when rendering the full map with dynamic values.
*/
private final LinkedHashMap<String, Object> protomap = new LinkedHashMap<>();
/**
* Any auxiliary source of values to be applied beyond what is specified directly in the op fields.
* This includes, for example, the activity parameters which are allowed by the config model on
* an adapter. This means that you can specify defaults for an op field outside of the workload/op
* templates simply by providing them on the command line or activity parameters otherwise.
* This is exactly how the required op field `driver` works.
*/
private final List<Map<String, Object>> cfgsources;
private Map<String, Object> specmap;
/**
* This remembers the original template object so that diagnostic and debugging views
* may see the original specifiers, whether they are literals of any type, or a string
* value which is recognized as being or containing some dynamic span, i.e. bind points.
*/
private Map<String, Object> originalTemplateObject;
/**
* The bindings definitions from the raw op template data structure.
*/
private Map<String, String> bindings;
private final String name;
@ -100,7 +118,7 @@ public class ParsedTemplateMap implements LongFunction<Map<String, ?>>, StaticFi
// fields. This seems like the saner and less confusing approach, so implementing
// op field references should be left until it is requested if at all
private void applyTemplateFields(Map<String, Object> map, Map<String, String> bindings) {
this.specmap = map;
this.originalTemplateObject = map;
this.bindings = bindings;
map.forEach((k, v) -> {
if (v instanceof CharSequence charvalue) {
@ -701,9 +719,84 @@ public class ParsedTemplateMap implements LongFunction<Map<String, ?>>, StaticFi
return false;
}
public Optional<ParsedTemplateString> takeAsOptionalStringTemplate(String field) {
Optional<ParsedTemplateString> asStringTemplate = this.getAsStringTemplate(field);
if (asStringTemplate.isPresent()) {
originalTemplateObject.remove(field);
return asStringTemplate;
}
return Optional.empty();
}
public <V> Optional<V> takeAsOptionalRawSpecifier(String field) {
if (dynamics.containsKey(name)) {
Object value = statics.remove(name);
protomap.remove(name);
return (Optional<V>) Optional.of(value);
}
if (statics.containsKey(name)) {
Object value = statics.remove(name);
protomap.remove(name);
return (Optional<V>) Optional.of(value);
}
return Optional.empty();
}
/**
* Take the value of the specified field from the dynamic or static layers, or reference it
* from the config layer without removal. Then, flatten any string, list, or map structures
* into a map of strings with names injected as needed. Then, convert the values to string
* templates and return that.
* @param fieldname the field to take the templates from
* @return A map of templates, or an empty map if the field is not defined or is empty.
*/
public Map<String,ParsedTemplateString> takeAsNamedTemplates(String fieldname) {
Object entry = originalTemplateObject.get(fieldname);
if (entry !=null) {
dynamics.remove(fieldname);
statics.remove(fieldname);
protomap.remove(fieldname);
}
if (entry==null) {
for (Map<String, Object> cfgsource : cfgsources) {
if (cfgsource.containsKey(fieldname)) {
entry = cfgsource.get(fieldname);
break;
}
}
}
if (entry==null) {
return Map.of();
}
Map<String,Object> elements = new LinkedHashMap<>();
if (entry instanceof CharSequence chars) {
elements.put(this.getName()+"-verifier-0",chars.toString());
} else if (entry instanceof List list) {
for (int i = 0; i < list.size(); i++) {
elements.put(this.getName()+"-verifier-"+i,list.get(0));
}
} else if (entry instanceof Map map) {
map.forEach((k,v) -> {
elements.put(this.getName()+"-verifier-"+k,v);
});
}
Map<String,ParsedTemplateString> parsedStringTemplates
= new LinkedHashMap<>();
elements.forEach((k,v) -> {
if (v instanceof CharSequence chars) {
parsedStringTemplates.put(k,new ParsedTemplateString(chars.toString(), this.bindings));
}
});
return parsedStringTemplates;
}
public Optional<ParsedTemplateString> getAsStringTemplate(String fieldname) {
if (specmap.containsKey(fieldname)) {
Object fval = specmap.get(fieldname);
if (originalTemplateObject.containsKey(fieldname)) {
Object fval = originalTemplateObject.get(fieldname);
if (fval instanceof CharSequence) {
return Optional.of(new ParsedTemplateString(fval.toString(), this.bindings));
} else {
@ -993,11 +1086,12 @@ public class ParsedTemplateMap implements LongFunction<Map<String, ?>>, StaticFi
.append(k)
.append("->")
.append(
v ==null? specmap.get(k) : v.toString()
v ==null? originalTemplateObject.get(k) : v.toString()
).append("\n");
}
return sb.toString();
}
}

View File

@ -31,5 +31,7 @@ public enum Category {
objects,
periodic,
experimental,
combinitoric,
vectors,
HOF
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 nosqlbench
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -89,11 +89,12 @@ public class BindingsTemplate {
*
* @param bindPairs A map of named binding specifiers
*/
public void addFieldBindings(Map<String, String> bindPairs) {
public BindingsTemplate addFieldBindings(Map<String, String> bindPairs) {
for (Map.Entry<String, String> e : bindPairs.entrySet()) {
this.bindPointNames.add(e.getKey());
this.specifiers.add(e.getValue());
}
return this;
}
public String getDiagnostics() {

View File

@ -200,7 +200,7 @@ public class VirtDataComposer {
FunctionAssembly assembly = new FunctionAssembly();
boolean isThreadSafe = true;
diagnostics.trace("FUNCTION chain selected: (multi) '" + this.summarize(flattenedFuncs, " - ") + "'");
diagnostics.trace("FUNCTION chain selected: (multi):\n" + this.summarize(flattenedFuncs, " - "));
for (ResolvedFunction resolvedFunction : flattenedFuncs) {
try {
Object functionObject = resolvedFunction.getFunctionObject();

View File

@ -16,8 +16,10 @@
package io.nosqlbench.engine.api.templating;
import io.nosqlbench.virtdata.core.templates.ParsedTemplateString;
import org.junit.jupiter.api.Test;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -32,4 +34,28 @@ public class ParsedTemplateMapTest {
assertThat(ptm.getOpFieldNames()).isEqualTo(Set.of("string1"));
}
@Test
public void testTakeAsNamedTemplates() {
ParsedTemplateMap ptm = new ParsedTemplateMap(
"test2",
new LinkedHashMap<String,Object>(Map.of(
"astring","astring",
"alist",List.of("listentry1","listentry2"),
"amap", Map.of("entry1","val1", "entry2", "val2")
)),
new LinkedHashMap<>(Map.of()),
List.of(Map.of())
);
Map<String, ParsedTemplateString> ofString = ptm.takeAsNamedTemplates("astring");
assertThat(ofString).containsKey("test2-verifier-0");
Map<String, ParsedTemplateString> ofList = ptm.takeAsNamedTemplates("alist");
assertThat(ofList).containsKey("test2-verifier-0");
assertThat(ofList).containsKey("test2-verifier-1");
Map<String, ParsedTemplateString> ofMap = ptm.takeAsNamedTemplates("amap");
assertThat(ofMap).containsKey("test2-verifier-entry1");
assertThat(ofMap).containsKey("test2-verifier-entry2");
// TODO: Get actual testing bindings into this example
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.testmappers;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongFunction;
@ThreadSafeMapper
public class TestingStringFunc implements LongFunction<String> {
private final String stringValue;
public TestingStringFunc(String stringValue) {
this.stringValue = stringValue;
}
@Override
public String apply(long value) {
return stringValue;
}
}

View File

@ -43,7 +43,7 @@
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>4.12.0</version>
<version>4.13.0</version>
<configuration>
<sourceDirectory>src/main/java/io/nosqlbench/virtdata/lang/grammars</sourceDirectory>
<arguments>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 nosqlbench
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -56,10 +56,6 @@ public class ListSizedStepped implements LongFunction<List<Object>> {
}
this.valueFuncs = VirtDataConversions.adaptFunctionList(funcs, LongFunction.class, Object.class);
}
public ListSizedStepped(int size, Object... funcs) {
this.sizeFunc = s -> size;
this.valueFuncs = VirtDataConversions.adaptFunctionList(funcs, LongFunction.class, Object.class);
}
@Override
public List<Object> apply(long value) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 nosqlbench
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +18,7 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_long;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.murmur.Murmur3F;
@ -35,6 +36,10 @@ import java.util.function.LongUnaryOperator;
@Categories({Category.general, Category.general})
public class Hash implements LongUnaryOperator {
@Example({"Hash()","Create a hash function that takes a long and returns a positive long value"})
public Hash() {
}
private final transient ThreadLocal<Murmur3F> murmur3f_TL = ThreadLocal.withInitial(Murmur3F::new);
@Override

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 nosqlbench
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -22,7 +22,7 @@ import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.api.bindings.VirtDataConversions;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_int.Hash;
import io.nosqlbench.virtdata.library.basics.shared.util.CharsetMapping;
import io.nosqlbench.virtdata.library.basics.shared.util.Combiner;
import java.nio.CharBuffer;
import java.util.function.LongFunction;
@ -143,7 +143,7 @@ public class CharBufImage implements LongFunction<CharBuffer> {
}
private CharBuffer genBuf(String chars, int size, long seed) {
char[] charset = CharsetMapping.rangeFor(chars);
char[] charset = Combiner.rangeFor(chars);
CharBuffer newimage = CharBuffer.allocate(size);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 nosqlbench
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -20,7 +20,7 @@ import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.basics.shared.util.CharsetMapping;
import io.nosqlbench.virtdata.library.basics.shared.util.Combiner;
import java.nio.CharBuffer;
import java.util.function.LongFunction;
@ -52,8 +52,8 @@ public class Combinations implements LongFunction<String> {
@Example({"Combinations('0-9A-F;0-9A-F;0-9A-F;0-9A-F;')","two bytes of hexadecimal"})
@Example({"Combinations('A-9')","upper case alphanumeric"})
public Combinations(String spec) {
this.charsets = CharsetMapping.parseSpec(spec);
this.modulo = computeRadixFactors(this.charsets);
this.charsets = Combiner.parseSpec(spec);
this.modulo = Combiner.computeRadixFactors(this.charsets);
}
@Override
@ -69,15 +69,4 @@ public class Combinations implements LongFunction<String> {
return cb.toString();
}
private long[] computeRadixFactors(char[][] charsets) {
long modulo = 1L;
long[] m = new long[charsets.length];
for (int i = charsets.length-1; i >=0; i--) {
m[i] = modulo;
modulo = Math.multiplyExact(modulo, charsets[i].length);
}
// m[m.length-1]=modulo;
return m;
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.repeaters;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
/**
* Repeat the incoming list into a new list, filling it to the given size.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class RepeatList implements Function<List, List> {
private final int size;
/**
* Create a list repeater to build up a list from a smaller list.
* @param size - the total size of the new list
*/
@Example({"RepeatList(50)","repeat the incoming values into a new List of size 50"})
public RepeatList(int size) {
this.size = size;
}
@Override
public List apply(List input) {
Object[] values = input.toArray();
ArrayList list = new ArrayList(size);
for (int i = 0; i < size; i++) {
list.add(values[i%values.length]);
}
return list;}
}

View File

@ -1,113 +0,0 @@
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.util;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
public class CharsetMapping {
/**
* Parse the spec, yielding an array of character arrays. each position in the spec delimited
* by comma or semicolon is represented by an array. Each array is then constructed from
* {@link #rangeFor(String)}.
*
* @param spec A range set specifier
* @return An array of char arrays
*/
public static char[][] parseSpec(String spec) {
String[] ranges = spec.split("[,;]");
char[][] cs = new char[ranges.length][];
for (int i = 0; i < ranges.length; i++) {
char[] range = rangeFor(ranges[i]);
cs[i] = range;
}
return cs;
}
/**
* Parse the range and return set of characters in an array. Any occurrences of a range specifier
* like {@code a-z} are expanded into the two characters and every on in between, in ordinal order.
* Otherwise, the characters are taken as they are presented. Each range is built and sanity
* checked by {@link #rangeFor} to ensure ordering is valid as well as that the characters are
* all in the printable range of ordinal 32 to ordinal 126.
* @param range a character range specifier like 'a-z' or '1357'
* @return An array of characters
*/
public static char[] rangeFor(String range) {
range = range.replaceAll("\\n","\n").replaceAll("\\r","\r");
List<Character> chars = new ArrayList<>();
int pos = 0;
while (pos < range.length()) {
if (range.length() > pos + 2 && range.charAt(pos + 1) == '-') {
List<Character> rangeChars = rangeFor(range.substring(pos, pos + 1), range.substring(pos + 2, pos + 3));
chars.addAll(rangeChars);
pos += 3;
} else {
chars.add(range.substring(pos, pos + 1).charAt(0));
pos += 1;
}
}
char[] charAry = new char[chars.size()];
for (int i = 0; i < chars.size(); i++) {
charAry[i] = chars.get(i);
}
return charAry;
}
/**
* Create a list of characters from the US ASCII plane based on a start and end character.
* @param startChar A single ASCII character
* @param endChar A single ASCII character, must be equal to or come after startChar
* @return A list of characters in the range
*/
public static List<Character> rangeFor(String startChar, String endChar) {
int start = startChar.getBytes(StandardCharsets.US_ASCII)[0];
int end = endChar.getBytes(StandardCharsets.US_ASCII)[0];
assertPrintable(start);
assertPrintable(end);
assertOrder(start, end);
List<Character> chars = new ArrayList<>();
ByteBuffer bb = ByteBuffer.allocate(1);
for (int i = start; i <= end; i++) {
bb.clear();
bb.put(0, (byte) i);
CharBuffer decoded = StandardCharsets.US_ASCII.decode(bb);
chars.add(decoded.get(0));
}
return chars;
}
private static void assertOrder(int start, int end) {
if (end < start) {
throw new RuntimeException("char '" + (char) end + "' (" + end + ") occurs after '" + (char) start + "' (" + start + "). Are you sure this is the right spec? (reverse the order)");
}
}
private static void assertPrintable(int asciiCode) {
if (asciiCode > 126 || asciiCode < 32) {
throw new RuntimeException("ASCII character for code " + asciiCode + " is outside the range of printable characters.");
}
}
}

View File

@ -0,0 +1,424 @@
/*
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.util;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.LongFunction;
/**
* <H1>Combiner - a <A href="https://en.wikipedia.org/wiki/Combinatorics">combinatoric</A> toolkit for NoSQLBench</H1>
* <HR></HR>
* <H2>Synopsis</H2>
* <P>Combiner is the core implementation of a combinatoric toolkit which is used
* by other NoSQLBench functions in a more type-specific way. It allows for a common approach
* to encoding unique values across a range of dimensions (which can be non-uniform)
* with an affine mapping between different forms of data.</P>
*
* <HR></HR>
* <H2>Specifier</H2>
* <P>The specifier required by the constructor is a way to specify a range of character sets, each representing both
* the per-value labeling as well as the radix of each position in the associated index, value, or character position.
* Each position is delimited from the others with commas or semicolons. Each position can be either a single printable
* character or a range of characters separated by
* '-'.
* Optionally, you can repeat a position with a multiplier in the form of '*n' where n is any valid number.
* </P>
* <P>Examples:
* <UL>
* <LI>"0-9A-F" - hexadecimal characters, one digit only ; 0123456789ABCDEF</LI>
* <LI>"0-9*12" - characters 0-9 in 12 digits, symbolic of values 000000000000 (0) .. 999999999999</LI>
* <LI>"5;5;5;-;8;6;7;-;5;3;0;9" - 12 digits with one character each, effectively 555-867-5309, a single value</LI>
* <LI>"0-9;*2_=24j36*5*1" - a somewhat random pattern with char sets [0123456789] and [*2_=24j36*5], showing how '*1'
* at the end can be used to escape '*5'</LI>
* </UL>
* The specifier is parsed into a non-uniform radix model, where the characters for each position represent a numerical
* encoding. As such, the cardinalities of each position are multiplied together to determine the total cardinality of
* the specified pattern. Any total cardinality below Long.MAX_VALUE, or 9,223,372,036,854,775,807 is allowed, and any
* combinations which would overflow this value will throw an error.
* </P>
*
* <HR></HR>
* <H2>Value Function</H2>
* <p>
* The function provided in the constructor is used to symbolically map the characters in the encoding string to a value
* of any type. The value function will be called with number of distinct values up the the cardinality of the largest
* position in the radix model. For example, a specifier of `A-Za-z0-9` would provide an input range from 0 to 61
* inclusive to the value function. It is the combination of positions and unique values which provides the overall
* cardinality, although the value function itself is responsible for the relatively lower cardinality elements which
* are combined together to create higher-cardinality value arrays.
* </P>
*
* <HR></HR>
* <H2>Types and Forms</H2>
*
* <P>Each form represents one way of seeing the data for a given cycle:
* <OL>
* <LI><B>ordinal</B> (long) - also known as the cycle, or input. This is an enumeration of all distinct
* combinations.</LI>
* <LI><B>indexes</B> (int[]) - an array of indexes, one for each position in the specifier and thus each element in
* the
* array
* or character in the encoding.</LI>
* <LI><B>encoding</B> (String) - a string which encodes the ordinal and the indexes in a convenient label which is
* unique
* within the range of possible values.</LI>
* <LI><B>(values) array (T[])</B> - An array of the type T which can be provided via a mapping function. This is a
* mapping from the
* indexes through the provided value function.</LI>
* </OL>
* </P>
*
* <HR></HR>
* <H2>Mapping between forms</H2>
*
* <P>The array value can be derived with {@link #apply(long)}, {@link #getArray(int[])} (int[])}, and
* {@link #getArray(String)},
* given ordinal, indexes, or encoding as a starting point, respectively. This all ultimately use the one-way
* function which you provide, thus you can't go from array form to the others.</P>
*
* <P>Mapping between the other three is fairly trivial:</P>
* <UL>
* <LI>You can get indexes from ordinal and encoding with {@link #getIndexes(long)} and
* {@link #getArray(String)}.</LI>
* <LI>You can get encoding from ordinal and indexes with {@link #getEncoding(long)} and
* {@link #getEncoding(int[])}.</LI>
* <LI>You can get ordinal from indexes or encoding with {@link #getOrdinal(int[])} and
* {@link #getOrdinal(String)}.</LI>
* </UL>
* </P>
* <p>
* This makes it easy to derive textual identifiers for specific combinations of elements such as a vector, use them
* for
* cross-checks such as with correctness testing, and represent specific test values in a very convenient form within
* deterministic testing harnesses like NoSQLBench.
*
* @param <T>
* The generic type of the value which is mapped into each array position
*/
@ThreadSafeMapper
@Categories({Category.combinitoric, Category.conversion})
public class Combiner<T> implements LongFunction<T[]> {
/**
* converts an index for a given column position into a value type.
*/
private final LongFunction<T> elementFunction;
/**
* Used for instancing the correct type of array, since arrays can't be reified from generics
*/
private final Class<? extends T> elementClazz;
/**
* The columnar character sequences which represent radix values
*/
private final char[][] charsets;
/**
* The columnar radix factors, cached
*/
private final long[] modulo;
/**
* Columnar indexes from the character to the index values, for reverse mapping
*/
private final int[][] inverted;
/**
* Construct a combiner which can compose unique combinations of array data.
*
* @param spec
* The string specifier, as explained in {@link Combiner} docs.
* @param elementFunction
* The function that indexes into a unique population of T elements
* @param elementClazz
* The component type for the values array which are produced by {@link #apply(long)}
*/
public Combiner(String spec, LongFunction<T> elementFunction, Class<? extends T> elementClazz) {
this.charsets = Combiner.parseSpec(spec);
this.elementFunction = elementFunction;
this.elementClazz = elementClazz;
this.modulo = computeRadixFactors(charsets);
this.inverted = invertedIndexFor(this.charsets);
}
/**
* Parse the spec, yielding an array of character arrays. each position in the spec delimited
* by comma or semicolon is represented by an array. Each array is then constructed from
* {@link #rangeFor(String)}.
*
* @param rangesSpec
* A range set specifier
* @return An array of char arrays
*/
public static char[][] parseSpec(String rangesSpec) {
String[] ranges = rangesSpec.split("[,;]");
List<String> specs = new ArrayList<>();
for (String range : ranges) {
if (range.matches("(.*?)\\*(\\d+)")) {
int rangeAt = range.lastIndexOf('*');
int times = Integer.parseInt(range.substring(rangeAt + 1));
for (int i = 0; i < times; i++) {
specs.add(range.substring(0, rangeAt));
}
} else {
specs.add(range);
}
}
char[][] cs = new char[specs.size()][];
for (int i = 0; i < specs.size(); i++) {
char[] range = rangeFor(specs.get(i));
cs[i] = range;
}
return cs;
}
/**
* Parse the range and return set of characters in an array. Any occurrences of a range specifier
* like {@code a-z} are expanded into the two characters and every on in between, in ordinal order.
* Otherwise, the characters are taken as they are presented. Each range is built and sanity
* checked by {@link #rangeFor} to ensure ordering is valid as well as that the characters are
* all in the printable range of ordinal 32 to ordinal 126.
*
* @param range
* a character range specifier like 'a-z' or '1357'
* @return An array of characters
*/
public static char[] rangeFor(String range) {
range = range.replaceAll("\\n", "\n").replaceAll("\\r", "\r");
List<Character> chars = new ArrayList<>();
int pos = 0;
while (pos < range.length()) {
if (range.length() > pos + 2 && range.charAt(pos + 1) == '-') {
List<Character> rangeChars = rangeFor(range.substring(pos, pos + 1), range.substring(pos + 2, pos + 3));
chars.addAll(rangeChars);
pos += 3;
} else {
chars.add(range.substring(pos, pos + 1).charAt(0));
pos += 1;
}
}
char[] charAry = new char[chars.size()];
for (int i = 0; i < chars.size(); i++) {
charAry[i] = chars.get(i);
}
return charAry;
}
/**
* Create a list of characters from the US ASCII plane based on a start and end character.
*
* @param startChar
* A single ASCII character
* @param endChar
* A single ASCII character, must be equal to or come after startChar
* @return A list of characters in the range
*/
public static List<Character> rangeFor(String startChar, String endChar) {
int start = startChar.getBytes(StandardCharsets.US_ASCII)[0];
int end = endChar.getBytes(StandardCharsets.US_ASCII)[0];
assertPrintable(start);
assertPrintable(end);
assertOrder(start, end);
List<Character> chars = new ArrayList<>();
ByteBuffer bb = ByteBuffer.allocate(1);
for (int i = start; i <= end; i++) {
bb.clear();
bb.put(0, (byte) i);
CharBuffer decoded = StandardCharsets.US_ASCII.decode(bb);
chars.add(decoded.get(0));
}
return chars;
}
private static void assertOrder(int start, int end) {
if (end < start) {
throw new RuntimeException("char '" + (char) end + "' (" + end + ") occurs after '" + (char) start + "' (" + start + "). Are you sure this is the right spec? (reverse the order)");
}
}
private static void assertPrintable(int asciiCode) {
if (asciiCode > 126 || asciiCode < 32) {
throw new RuntimeException("ASCII character for code " + asciiCode + " is outside the range of printable characters.");
}
}
public static int[][] invertedIndexFor(String charsetsSpecifier) {
char[][] chars = parseSpec(charsetsSpecifier);
return invertedIndexFor(chars);
}
public static int[][] invertedIndexFor(char[][] charsetColumns) {
int[][] inverted = new int[charsetColumns.length][];
for (int charsetIdx = 0; charsetIdx < charsetColumns.length; charsetIdx++) {
char[] charsForColumn = charsetColumns[charsetIdx];
inverted[charsetIdx] = indexesByChar(charsForColumn);
}
return inverted;
}
private static int[] indexesByChar(char[] charsForColumn) {
int maxval = Integer.MIN_VALUE;
for (char c : charsForColumn) {
maxval = (int) c > maxval ? (int) c : maxval;
}
int[] idx = new int[maxval + 1];
Arrays.fill(idx, -1);
for (int i = 0; i < charsForColumn.length; i++) {
idx[charsForColumn[i]] = i;
}
return idx;
}
/**
* Return an array of {@link T} elements by indexing into the sequence
* of character sets and their relative cardinality to derive column-specific
* index, and then converting them to the type T through the provided function.
*
* @param value
* the function argument
* @return a T which is identified by the provided value, unique if value is
* less than the maximum number of combinations, but repeated otherwise
*/
@Override
public T[] apply(long value) {
@SuppressWarnings("Unchecked")
T[] ary = (T[]) Array.newInstance(elementClazz, charsets.length);
for (int colIdx = 0; colIdx < charsets.length; colIdx++) {
int valueSelector = (int) ((value / modulo[colIdx]) % Integer.MAX_VALUE);
ary[colIdx] = elementFunction.apply(valueSelector);
value %= modulo[colIdx];
}
return ary;
}
/**
* @param indexes
* indexes derived from {@link #getIndexes(long)}
* @return a T[]
*/
public T[] getArray(int[] indexes) {
T[] ary = (T[]) Array.newInstance(elementClazz, charsets.length);
for (int colIdx = 0; colIdx < indexes.length; colIdx++) {
ary[colIdx] = elementFunction.apply(indexes[colIdx]);
}
return ary;
}
public T[] getArray(String encoding) {
long ordinal = getOrdinal(encoding);
return apply(ordinal);
}
public String getEncoding(long ordinal) {
return getEncoding(getIndexes(ordinal));
}
public String getEncoding(int[] indexes) {
StringBuilder sb = new StringBuilder(charsets.length);
for (int i = 0; i < indexes.length; i++) {
sb.append(charsets[i][indexes[i]]);
}
return sb.toString();
}
/**
* Get the indexes directly which are used by {@link #apply(long)}
*
* @param value
* @return an offset array for each column in the provided charset specifiers
*/
public int[] getIndexes(long value) {
int[] ary = new int[charsets.length];
for (int colIdx = 0; colIdx < charsets.length; colIdx++) {
int valueSelector = (int) ((value / modulo[colIdx]) % Integer.MAX_VALUE);
ary[colIdx] = valueSelector;
value %= modulo[colIdx];
}
return ary;
}
/**
* @param encoding
* the string encoding for the given ordinal
* @return the indexes used to select a value from the value function for each position in the output array
*/
public int[] getIndexes(String encoding) {
int[] indexes = new int[charsets.length];
char[] chars = encoding.toCharArray();
for (int i = 0; i < charsets.length; i++) {
indexes[i] = inverted[i][chars[i]];
}
return indexes;
}
/**
* Using the provided name, derive the ordinal value which matches it.
*
* @param name
* - the textual name, expressed as an ASCII string
* @return the long which can be used to construct the matching name or related array.
*/
public long getOrdinal(String name) {
char[] chars = name.toCharArray();
long ordinal = 0;
for (int i = 0; i < chars.length; i++) {
ordinal += (modulo[i] * inverted[i][chars[i]]);
}
return ordinal;
}
/**
* Using the provided column offsets, derive the ordinal value which matches it.
*
* @param indexes
* - the indexes used to derive an array of values, or equivalently a name
* @return the long which can be used to construct the matching name or related array.
*/
public long getOrdinal(int[] indexes) {
long ordinal = 0;
for (int i = 0; i < indexes.length; i++) {
ordinal += (modulo[i] *= indexes[i]);
}
return ordinal;
}
public static long[] computeRadixFactors(char[][] charsets) {
long modulo = 1L;
long[] m = new long[charsets.length];
for (int i = charsets.length - 1; i >= 0; i--) {
m[i] = modulo;
modulo = Math.multiplyExact(modulo, charsets[i].length);
}
// m[m.length-1]=modulo;
return m;
}
}

View File

@ -0,0 +1,182 @@
/*
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.util;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.Arrays;
import java.util.function.LongFunction;
import java.util.function.LongToDoubleFunction;
/**
* For comprehensive docs on how this works, please see the javadocs for
* {@link Combiner}&lt;T&gt;. This class is merely a primitive specialization.
*/
@ThreadSafeMapper
@Categories({Category.combinitoric, Category.conversion})
public class DoubleCombiner implements LongFunction<double[]> {
/**
* converts an index for a given column position into a value type.
*/
private final LongToDoubleFunction elementFunction;
/**
* The columnar character sequences which represent radix values
*/
private final char[][] charsets;
/**
* The columnar radix factors, cached
*/
private final long[] modulo;
/**
* Columnar indexes from the character to the index values, for reverse mapping
*/
private final int[][] inverted;
/**
* Construct a combiner which can compose unique combinations of array data.
*
* @param spec
* The string specifier, as explained in {@link DoubleCombiner} docs.
* @param elementFunction
* The function that indexes into a unique population of T elements
*/
public DoubleCombiner(String spec, LongToDoubleFunction elementFunction) {
this.charsets = Combiner.parseSpec(spec);
this.elementFunction = elementFunction;
this.modulo = computeRadixFactors(charsets);
this.inverted = Combiner.invertedIndexFor(this.charsets);
}
protected static long maxRadixDigits(String spec) {
return Arrays.stream(Combiner.parseSpec(spec)).mapToInt(c->c.length).max().orElse(0);
}
@Override
public double[] apply(long value) {
double[] ary = new double[charsets.length];
for (int colIdx = 0; colIdx < charsets.length; colIdx++) {
int valueSelector = (int) ((value / modulo[colIdx]) % Integer.MAX_VALUE);
ary[colIdx] = elementFunction.applyAsDouble(valueSelector);
value %= modulo[colIdx];
}
return ary;
}
public double[] getArray(int[] indexes) {
double[] ary = new double[charsets.length];
for (int colIdx = 0; colIdx < indexes.length; colIdx++) {
ary[colIdx] = elementFunction.applyAsDouble(indexes[colIdx]);
}
return ary;
}
public double[] getArray(String encoding) {
long ordinal = getOrdinal(encoding);
return apply(ordinal);
}
public String getEncoding(long ordinal) {
return getEncoding(getIndexes(ordinal));
}
public String getEncoding(int[] indexes) {
StringBuilder sb = new StringBuilder(charsets.length);
for (int i = 0; i < indexes.length; i++) {
sb.append(charsets[i][indexes[i]]);
}
return sb.toString();
}
/**
* Get the indexes directly which are used by {@link #apply(long)}
*
* @param value
* @return an offset array for each column in the provided charset specifiers
*/
public int[] getIndexes(long value) {
int[] ary = new int[charsets.length];
for (int colIdx = 0; colIdx < charsets.length; colIdx++) {
int valueSelector = (int) ((value / modulo[colIdx]) % Integer.MAX_VALUE);
ary[colIdx] = valueSelector;
value %= modulo[colIdx];
}
return ary;
}
/**
* @param encoding
* the string encoding for the given ordinal
* @return the indexes used to select a value from the value function for each position in the output array
*/
public int[] getIndexes(String encoding) {
int[] indexes = new int[charsets.length];
char[] chars = encoding.toCharArray();
for (int i = 0; i < charsets.length; i++) {
indexes[i] = inverted[i][chars[i]];
}
return indexes;
}
/**
* Using the provided name, derive the ordinal value which matches it.
*
* @param name
* - the textual name, expressed as an ASCII string
* @return the long which can be used to construct the matching name or related array.
*/
public long getOrdinal(String name) {
char[] chars = name.toCharArray();
long ordinal = 0;
for (int i = 0; i < chars.length; i++) {
ordinal += (modulo[i] * inverted[i][chars[i]]);
}
return ordinal;
}
/**
* Using the provided column offsets, derive the ordinal value which matches it.
*
* @param indexes
* - the indexes used to derive an array of values, or equivalently a name
* @return the long which can be used to construct the matching name or related array.
*/
public long getOrdinal(int[] indexes) {
long ordinal = 0;
for (int i = 0; i < indexes.length; i++) {
ordinal += (modulo[i] *= indexes[i]);
}
return ordinal;
}
public static long[] computeRadixFactors(char[][] charsets) {
long modulo = 1L;
long[] m = new long[charsets.length];
for (int i = charsets.length - 1; i >= 0; i--) {
m[i] = modulo;
modulo = Math.multiplyExact(modulo, charsets[i].length);
}
// m[m.length-1]=modulo;
return m;
}
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.util;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.Arrays;
import java.util.function.Function;
@ThreadSafeMapper
@Categories(Category.diagnostics)
public class Stringify implements Function<Object,String> {
@Override
public String apply(Object o) {
if (o instanceof float[] fary) {
return Arrays.toString(fary);
} else if (o instanceof double[] dary) {
return Arrays.toString(dary);
} else if (o instanceof long[] lary) {
return Arrays.toString(lary);
} else if (o instanceof int[] iary) {
return Arrays.toString(iary);
} else if (o instanceof Object[] oary) {
return Arrays.toString(oary);
} else if (o instanceof byte[] bary) {
return Arrays.toString(bary);
} else if (o instanceof boolean[] bary) {
return Arrays.toString(bary);
} else if (o instanceof char[] cary) {
return Arrays.toString(cary);
} else if (o instanceof short[] sary) {
return Arrays.toString(sary);
} else {
return String.valueOf(o);
}
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.CircleAlgorithm;
import java.util.List;
import java.util.function.LongFunction;
@Categories(Category.general)
@ThreadSafeMapper
public class CircleVectors implements LongFunction<List<Object>> {
private final int circleCount;
private final CircleAlgorithm algorithm;
public CircleVectors(int circleCount, String algorithmClass) throws Exception {
this.circleCount = circleCount;
Object algo = Class.forName(algorithmClass).newInstance();
if (!(algo instanceof CircleAlgorithm)) {
throw new RuntimeException("The class '" + algorithmClass +
"' does not implement CircleAlgorithm");
}
algorithm = (CircleAlgorithm) algo;
}
@Override
public List<Object> apply(long value) {
return algorithm.getVector((value % circleCount), circleCount);
}
public int getCircleCount() {
return circleCount;
}
public CircleAlgorithm getAlgorithm() {
return algorithm;
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms;
import java.util.List;
public interface CircleAlgorithm {
List<Object> getVector(long value, long circleCount);
double getMinimumVectorAngle();
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms;
import java.util.List;
public class GoldenAngle implements CircleAlgorithm {
private final static double goldenAngle = 137.5;
@Override
public List<Object> getVector(long value, long circleCount) {
double y = 1 - (value / (double) (circleCount - 1)) * 2;
double radius = Math.sqrt(1 - y * y);
double theta = goldenAngle * value;
double x = Math.cos(theta) * radius;
double z = Math.sin(theta) * radius;
return List.of((float)x, (float)y, (float)z);
}
@Override
public double getMinimumVectorAngle() {
return 0;
}
}

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms;
import java.util.List;
public class LatLonBased implements CircleAlgorithm {
private final static double goldenAngle = 137.5;
@Override
public List<Object> getVector(long value, long circleCount) {
double longitude = 2 * Math.PI * value / circleCount;
double latitude = Math.asin(1 - 2 * (double) value / (circleCount - 1));
double x = Math.cos(latitude) * Math.cos(longitude);
double y = Math.cos(latitude) * Math.sin(longitude);
double z = Math.sin(latitude);
return List.of((float)x, (float)y, (float)z);
}
@Override
public double getMinimumVectorAngle() {
return 0;
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
/**
* Precompute the interior double[] values to use as a LUT.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class DoubleArrayCache extends VectorSequence {
private final VectorSequence function;
private final double[][] cache;
public DoubleArrayCache(VectorSequence function) {
super(function.getCardinality());
this.function=function;
if (function.getCardinality()>1E10) {
throw new RuntimeException("you are trying to pre-compute and cache " + function.getCardinality() + " elements. Too many! Compute instead without caching.");
}
int size = (int)function.getCardinality();
this.cache = new double[size][];
for (int idx = 0; idx < cache.length; idx++) {
cache[idx]=function.apply(idx);
}
}
@Override
public long getDimensions() {
return function.getDimensions();
}
@Override
public double[] apply(long value) {
return cache[(int)(value % cache.length)];
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* Precompute the interior double[] values to use as a LUT.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class DoubleCache extends DoubleSequence {
private final static Logger logger = LogManager.getLogger(DoubleCache.class);
private final double[] cache;
public DoubleCache(DoubleSequence sequence) {
super(sequence.cardinality);
if (sequence.cardinality > 1000000) {
logger.warn("initializing cache with " + sequence.cardinality + " elements, which seems high.");
}
this.cache = new double[(int) sequence.getCardinality()];
for (int i = 0; i < cache.length; i++) {
this.cache[i] = sequence.applyAsDouble(i);
}
}
@Override
public double applyAsDouble(long value) {
return cache[(int)(value%cache.length)];
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import java.util.function.LongToDoubleFunction;
/**
* A VectorSequence is a sequence of vectors which are a deterministic
* map between a set of ordinal values and vector values. Ideally, they
* are computed with closed-form functions. If not, they should
* be pre-computed in cached.
* Although each element is provided in an array, this is simply a wrapper
* for one-to-many cardinality which avoids auto-boxing. If a user of DoubleSequence
* does not understand one-to-many semantics and receives values longer than 1 element,
* then an error should be thrown.
*/
public abstract class DoubleSequence implements LongToDoubleFunction {
protected final long cardinality;
public DoubleSequence(long cardinality) {
this.cardinality = cardinality;
}
/**
* @return the number of unique vectors produced.
*/
public long getCardinality() {
return this.cardinality;
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Prefix the incoming array with an empty double[] so that it is sized up to at least the given size. If it is already
* at least that size, pass it through as-is.
*/
@ThreadSafeMapper
@Categories({Category.experimental, Category.vectors})
public class DoubleVectorPadLeft implements Function<double[], double[]> {
private final int size;
public DoubleVectorPadLeft(int size) {
this.size = size;
}
@Override
public double[] apply(double[] doubles) {
if (doubles.length>=size) {
return doubles;
}
double[] newary = new double[size];
System.arraycopy(doubles, 0, newary, newary.length-doubles.length, doubles.length);
return newary;
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Suffix the incoming array with an empty double[] so that it is sized up to at least the given size. If it is already
* at least that size, pass it through as-is.
*/
@ThreadSafeMapper
@Categories({Category.experimental, Category.vectors})
public class DoubleVectorPadRight implements Function<double[], double[]> {
private final int size;
public DoubleVectorPadRight(int size) {
this.size = size;
}
@Override
public double[] apply(double[] doubles) {
if (doubles.length>=size) {
return doubles;
}
double[] newary = new double[size];
System.arraycopy(doubles, 0, newary, 0, doubles.length);
return newary;
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Prefix the incoming array with an empty double[] of the given size.
*/
@ThreadSafeMapper
@Categories({Category.experimental,Category.vectors})
public class DoubleVectorPrefix implements Function<double[],double[]> {
private final int size;
public DoubleVectorPrefix(int size) {
this.size = size;
}
@Override
public double[] apply(double[] doubles) {
double[] newary = new double[size+doubles.length];
System.arraycopy(doubles,0,newary,size,doubles.length);
return newary;
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Suffix the incoming array with an empty double[] of the given size.
*/
@ThreadSafeMapper
@Categories({Category.experimental,Category.vectors})
public class DoubleVectorSuffix implements Function<double[],double[]> {
private final int size;
public DoubleVectorSuffix(int size) {
this.size = size;
}
@Override
public double[] apply(double[] doubles) {
double[] newary = new double[size+doubles.length];
System.arraycopy(doubles,0,newary,0,doubles.length);
return newary;
}
}

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.basics.shared.util.DoubleCombiner;
import java.util.function.LongFunction;
/**
* This is a version of the NoSQLBench {@link io.nosqlbench.virtdata.library.basics.shared.util.Combiner}
* which is especially suited to constructing unique sequences of doubles. This can be to create arbitrarily long
* vectors in double[] form, where each vector corresponds to a specific character encoding. Based on the
* maximum cardinality of symbol values in each position, a step function on the unit interval is created for you
* and used as a source of magnitudes.
* <p>
* For example, with a combiner spec of "{@code a-yA-Y*1024}", the "{@code }a-yA-Y" part creates a character set
* mapping for 50 distinct indexed character values with the letter acting as a code, and then the "{@code *1024}"
* repeats ths mapping over 1024 <em>digits</em> of values, which are then concatenated into an array of values as a
* uniquely encoded vector. In actuality, the internal model is computed separately from the character encoding, so is
* efficient, although the character encoding can be used to uniquely identify each vector.
* </p>
*
* <p>Note that as with other combiner forms, you can specify a different cardinality for each position, although
* the automatically computed step function for unit-interval will be based on the largest cardinality. It is not
* computed separately for each position. Thus, a specifier like "{@code a-z*5;0-9*2}"</p> will only see the last two
* positions using a fraction of the possible magnitudes, as the a-z element has the most steps at 26 between 0.0 and
* 1.0.
*/
@ThreadSafeMapper
@Categories({Category.experimental, Category.premade})
public class DoubleVectors extends DoubleCombiner implements LongFunction<double[]> {
/**
* Create a radix-mapped vector function based on a spec of character ranges and combinations.
* @param spec - The string specifier for a symbolic cardinality and symbol model that represents the vector values
*/
@Example({"DoubleVector('0-9*12')","Create a sequence of vectors encoding a 10-valued step function over 12 dimensions"})
@Example({"DoubleVector('01*1024')","Create a sequence of vectors encoding a 2-valued step function over 1024 dimensions"})
@Example({"DoubleVector('a-yA-Y0-9!@#$%^&*()*512')","Create a sequence of vectors encoding a 70-valued step function over 512 dimensions"})
public DoubleVectors(String spec) {
super(spec, new DoubleCache(new UnitSteps(DoubleCombiner.maxRadixDigits(spec))));
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Prefix the incoming array with an empty float[] so that it is sized up to at least the given size. If it is already
* at least that size, pass it through as-is.
*/
@ThreadSafeMapper
@Categories({Category.experimental, Category.vectors})
public class FloatVectorPadLeft implements Function<float[], float[]> {
private final int size;
public FloatVectorPadLeft(int size) {
this.size = size;
}
@Override
public float[] apply(float[] floats) {
if (floats.length>=size) {
return floats;
}
float[] newary = new float[size];
System.arraycopy(floats, 0, newary, newary.length-floats.length, floats.length);
return newary;
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Suffix the incoming array with an empty float[] so that it is sized up to at least the given size. If it is already
* at least that size, pass it through as-is.
*/
@ThreadSafeMapper
@Categories({Category.experimental, Category.vectors})
public class FloatVectorPadRight implements Function<float[], float[]> {
private final int size;
public FloatVectorPadRight(int size) {
this.size = size;
}
@Override
public float[] apply(float[] floats) {
if (floats.length>=size) {
return floats;
}
float[] newary = new float[size];
System.arraycopy(floats, 0, newary, 0, floats.length);
return newary;
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Prefix the incoming array with an empty float[] of the given size.
*/
@ThreadSafeMapper
@Categories({Category.experimental,Category.vectors})
public class FloatVectorPrefix implements Function<float[],float[]> {
private final int size;
public FloatVectorPrefix(int size) {
this.size = size;
}
@Override
public float[] apply(float[] floats) {
float[] newary = new float[size+floats.length];
System.arraycopy(floats,0,newary,size,floats.length);
return newary;
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
/**
* Suffix the incoming array with an empty double[] of the given size.
*/
@ThreadSafeMapper
@Categories({Category.experimental,Category.vectors})
public class FloatVectorSuffix implements Function<float[],float[]> {
private final int size;
public FloatVectorSuffix(int size) {
this.size = size;
}
@Override
public float[] apply(float[] doubles) {
float[] newary = new float[size+doubles.length];
System.arraycopy(doubles,0,newary,0,doubles.length);
return newary;
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.LongFunction;
/**
* This is the float version of the DoubleVector function. To facilitate a direct
* relationship between the double values from that function and the float values from this,
* this one defers to the double version for primary data. This function essentially
* wraps and converts the results from the DoubleVector function.
*/
@ThreadSafeMapper
@Categories(Category.vectors)
public class FloatVectors implements LongFunction<float[]> {
private final DoubleVectors doubleVectorFunction;
public FloatVectors(String spec) {
doubleVectorFunction = new DoubleVectors(spec);
}
@Override
public float[] apply(long value) {
double[] doubles = doubleVectorFunction.apply(value);
float[] floats = new float[doubles.length];
for (int i = 0; i < doubles.length; i++) {
floats[i]=(float) doubles[i];
}
return floats;
}
}

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
public class GoldenAngleDoubleVectors extends VectorSequence {
private final static double goldenAngle = 137.5;
public GoldenAngleDoubleVectors(long vectorCount) {
super(vectorCount);
}
@Override
public double[] apply(long value) {
double y = 1 - (value / (double) (cardinality - 1)) * 2;
double radius = Math.sqrt(1 - y * y);
double theta = goldenAngle * value;
return new double[] {Math.cos(theta) * radius, y, Math.sin(theta) * radius};
}
@Override
public long getDimensions() {
return 3;
}
}

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.api.bindings.VirtDataConversions;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashRange;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash;
import java.util.function.LongFunction;
import java.util.function.LongToDoubleFunction;
import java.util.function.LongToIntFunction;
/**
* Construct an arbitrarily large vector with hashes. The initial value is assumed to be non-hashed, and is thus hashed
* on input to ensure that inputs are non-contiguous. Once the starting value is hashed, the sequence of long values is
* walked and each value added to the vector is hashed from the values in that sequence.
*/
@Categories({Category.vectors, Category.experimental})
@ThreadSafeMapper
public class HashedDoubleVectors implements LongFunction<double[]> {
private final LongToIntFunction sizeFunc;
private final Hash rehasher;
private final LongToDoubleFunction valueFunc;
/**
* Build a double[] generator with a given size value or size function, and the given long->double function.
* @param sizer Either a numeric type which sets a fixed dimension, or a long->int function to derive it uniquely for each input
* @param valueFunc A long->double function
*/
public HashedDoubleVectors(Object sizer, Object valueFunc) {
if (sizer instanceof Number number) {
int size = number.intValue();
this.sizeFunc = (long l) -> size;
} else {
this.sizeFunc = VirtDataConversions.adaptFunction(sizer, LongToIntFunction.class);
}
this.valueFunc = VirtDataConversions.adaptFunction(valueFunc, LongToDoubleFunction.class);
this.rehasher = new Hash();
}
public HashedDoubleVectors(Object sizer, double min, double max) {
this(sizer, new HashRange(min, max));
}
public HashedDoubleVectors(Object sizer) {
this(sizer, new HashRange(0.0d, 1.0d));
}
@Override
public double[] apply(long value) {
int size = sizeFunc.applyAsInt(value);
double[] doubles = new double[size];
long image = rehasher.applyAsLong(value);
for (int i = 0; i < doubles.length; i++) { // don't consider overflow, hashing doesn't care
doubles[i] = valueFunc.applyAsDouble(image + i);
}
return doubles;
}
}

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.api.bindings.VirtDataConversions;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashRange;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_long.Hash;
import java.util.function.LongFunction;
import java.util.function.LongToDoubleFunction;
import java.util.function.LongToIntFunction;
/**
* Construct an arbitrarily large float vector with hashes. The initial value is assumed to be non-hashed, and is thus hashed
* on input to ensure that inputs are non-contiguous. Once the starting value is hashed, the sequence of long values is
* walked and each value added to the vector is hashed from the values in that sequence.
*/
@Categories({Category.vectors, Category.experimental})
@ThreadSafeMapper
public class HashedFloatVectors implements LongFunction<float[]> {
private final LongToIntFunction sizeFunc;
private final Hash rehasher;
private final LongToDoubleFunction valueFunc;
/**
* Build a double[] generator with a given size value or size function, and the given long->double function.
* @param sizer Either a numeric type which sets a fixed dimension, or a long->int function to derive it uniquely for each input
* @param valueFunc A long->double function
*/
public HashedFloatVectors(Object sizer, Object valueFunc) {
if (sizer instanceof Number number) {
int size = number.intValue();
this.sizeFunc = (long l) -> size;
} else {
this.sizeFunc = VirtDataConversions.adaptFunction(sizer, LongToIntFunction.class);
}
this.valueFunc = VirtDataConversions.adaptFunction(valueFunc, LongToDoubleFunction.class);
this.rehasher = new Hash();
}
public HashedFloatVectors(Object sizer, double min, double max) {
this(sizer, new HashRange(min, max));
}
public HashedFloatVectors(Object sizer) {
this(sizer, new HashRange(0.0d, 1.0d));
}
@Override
public float[] apply(long value) {
int size = sizeFunc.applyAsInt(value);
float[] floats = new float[size];
long image = rehasher.applyAsLong(value);
for (int i = 0; i < floats.length; i++) { // don't consider overflow, hashing doesn't care
floats[i] = (float)(valueFunc.applyAsDouble(image + i));
}
return floats;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
@ThreadSafeMapper
@Categories(Category.experimental)
public class NormalizeDoubleVector implements Function<double[],double[]> {
@Override
public double[] apply(double[] doubles) {
double[] normalized = new double[doubles.length];
double accumulator = 0.0d;
for (int i = 0; i < doubles.length; i++) {
accumulator+=doubles[i]*doubles[i];
}
double scale = Math.sqrt(accumulator);
for (int i = 0; i < doubles.length; i++) {
normalized[i]=doubles[i]/scale;
}
return normalized;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
@ThreadSafeMapper
@Categories(Category.experimental)
public class NormalizeFloatVector implements Function<float[],float[]> {
@Override
public float[] apply(float[] floats) {
float[] normalized = new float[floats.length];
double accumulator = 0.0d;
for (int i = 0; i < floats.length; i++) {
accumulator+=floats[i]*floats[i];
}
double scale = Math.sqrt(accumulator);
for (int i = 0; i < floats.length; i++) {
normalized[i]=(float)(floats[i]/scale);
}
return normalized;
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.Function;
@ThreadSafeMapper
@Categories({Category.vectors,Category.experimental})
public class ToFloatVector implements Function<double[],float[]> {
@Override
public float[] apply(double[] doubles) {
float[] floats = new float[doubles.length];
for (int i = 0; i < doubles.length; i++) {
floats[i] = (float)doubles[i];
}
return floats;
}
}

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
public class UnitSteps extends DoubleSequence {
public UnitSteps(long cardinality) {
super(cardinality);
}
@Override
public double applyAsDouble(long value) {
return ((double)value%cardinality) / (double)cardinality;
}
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import java.util.function.LongFunction;
/**
* A VectorSequence is a sequence of vectors which are a deterministic
* map between a set of ordinal values and vector values. Ideally, they
* are computed with closed-form functions. If not, they should
* be pre-computed in cashed.
*/
public abstract class VectorSequence implements LongFunction<double[]> {
protected final long cardinality;
public VectorSequence(long cardinality) {
this.cardinality = cardinality;
}
/**
* @return the number of components per vector produced.
*/
public abstract long getDimensions();
/**
* @return the number of unique vectors produced.
*/
public long getCardinality() {
return this.cardinality;
}
/**
* This should return the computed minimum vector angle in radians
* @return the minimum vector angle in radians between adjacent vectors by ordinal value
*/
public double getMinimumVectorAngle() {
return 0.0d;
}
}

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.repeaters;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
public class RepeatListTest {
@Test
public void testRepeatList() {
List<Double> doubles = List.of(1.2, 3.4, 5.6);
RepeatList repeater = new RepeatList(7);
List repeated = repeater.apply(doubles);
assertThat(repeated).containsExactly(1.2, 3.4, 5.6, 1.2, 3.4, 5.6, 1.2);
}
}

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.util;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
public class CombinerTest {
@Test
public void testCharsetRangingBasic() {
char[][] chars = Combiner.parseSpec("a-e");
assertThat(chars).isEqualTo(new char[][]{{'a','b','c','d','e'}});
}
@Test
public void testCharsetRangingRepeat() {
char[][] chars = Combiner.parseSpec("a-c*3");
assertThat(chars).isEqualTo(new char[][]{{'a','b','c'},{'a','b','c'},{'a','b','c'}});
}
@Test
public void testInvertedIndex() {
char[][] chars = Combiner.parseSpec("a-c*3");
int[][] ints = Combiner.invertedIndexFor("a-c*3");
assertThat(chars[0][0]).isEqualTo('a');
assertThat(chars[1][1]).isEqualTo('b');
assertThat(chars[2][2]).isEqualTo('c');
assertThat(ints[0]['a']).isEqualTo(0);
assertThat(ints[1]['b']).isEqualTo(1);
assertThat(ints[2]['c']).isEqualTo(2);
}
@Test
public void testBasicSequence() {
Combiner<String> stringCombiner = new Combiner<>("0-9*10", String::valueOf, String.class);
assertThat(stringCombiner.apply(123456789L)).isEqualTo(new String[]{"0","1","2","3","4","5","6","7","8","9"});
long ordinal = stringCombiner.getOrdinal("0123456789");
assertThat(stringCombiner.getEncoding(ordinal)).isEqualTo("0123456789");
assertThat(ordinal).isEqualTo(123456789L);
assertThat(stringCombiner.getIndexes(123456789L)).isEqualTo(new int[]{0,1,2,3,4,5,6,7,8,9});
assertThat(stringCombiner.getOrdinal(new int[]{0,1,2,3,4,5,6,7,8,9})).isEqualTo(123456789L);
}
@Test
public void testRangeFor() {
assertThat(Combiner.rangeFor("3")).isEqualTo(new char[]{'3'});
assertThat(Combiner.rangeFor("3-5")).isEqualTo(new char[]{'3','4','5'});
assertThat(Combiner.rangeFor("345")).isEqualTo(new char[]{'3','4','5'});
assertThat(Combiner.rangeFor("3-45")).isEqualTo(new char[]{'3','4','5'});
}
@Test
public void testChains() {
Combiner<String> combiner = new Combiner<>("ab*2", String::valueOf, String.class);
long correctInput=3;
int[] correctIndexes = new int[]{1,1};
String correctEncoding = "bb";
String[] correctValues = new String[]{"1","1"};
assertThat(combiner.apply(correctInput)).isEqualTo(correctValues);
assertThat(combiner.getArray(correctIndexes)).isEqualTo(correctValues);
assertThat(combiner.getArray(correctEncoding)).isEqualTo(correctValues);
assertThat(combiner.getIndexes(correctInput)).isEqualTo(correctIndexes);
assertThat(combiner.getIndexes(correctEncoding)).isEqualTo(correctIndexes);
assertThat(combiner.getOrdinal(correctIndexes)).isEqualTo(correctInput);
assertThat(combiner.getOrdinal(correctEncoding)).isEqualTo(correctInput);
assertThat(combiner.getEncoding(correctIndexes)).isEqualTo(correctEncoding);
assertThat(combiner.getEncoding(correctInput)).isEqualTo(correctEncoding);
}
}

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.util;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
public class DoubleCombinerTest {
@Test
public void testBasicSequence() {
DoubleCombiner dc = new DoubleCombiner("0-9*10", l -> (double)l);
assertThat(dc.apply(123456789L)).isEqualTo(new double[]{0.0d,1.0d,2.0d,3.0d,4.0d,5.0d,6.0d,7.0d,8.0d,9.0d});
long ordinal = dc.getOrdinal("0123456789");
assertThat(dc.getEncoding(ordinal)).isEqualTo("0123456789");
assertThat(ordinal).isEqualTo(123456789L);
assertThat(dc.getIndexes(123456789L)).isEqualTo(new int[]{0,1,2,3,4,5,6,7,8,9});
assertThat(dc.getOrdinal(new int[]{0,1,2,3,4,5,6,7,8,9})).isEqualTo(123456789L);
}
@Test
public void testRangeFor() {
assertThat(Combiner.rangeFor("3")).isEqualTo(new char[]{'3'});
assertThat(Combiner.rangeFor("3-5")).isEqualTo(new char[]{'3','4','5'});
assertThat(Combiner.rangeFor("345")).isEqualTo(new char[]{'3','4','5'});
assertThat(Combiner.rangeFor("3-45")).isEqualTo(new char[]{'3','4','5'});
}
@Test
public void testChains() {
Combiner<String> combiner = new Combiner<>("ab*2", String::valueOf, String.class);
long correctInput=3;
int[] correctIndexes = new int[]{1,1};
String correctEncoding = "bb";
String[] correctValues = new String[]{"1","1"};
assertThat(combiner.apply(correctInput)).isEqualTo(correctValues);
assertThat(combiner.getArray(correctIndexes)).isEqualTo(correctValues);
assertThat(combiner.getArray(correctEncoding)).isEqualTo(correctValues);
assertThat(combiner.getIndexes(correctInput)).isEqualTo(correctIndexes);
assertThat(combiner.getIndexes(correctEncoding)).isEqualTo(correctIndexes);
assertThat(combiner.getOrdinal(correctIndexes)).isEqualTo(correctInput);
assertThat(combiner.getOrdinal(correctEncoding)).isEqualTo(correctInput);
assertThat(combiner.getEncoding(correctIndexes)).isEqualTo(correctEncoding);
assertThat(combiner.getEncoding(correctInput)).isEqualTo(correctEncoding);
}
}

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
public class CircleVectorsTest {
@Test
public void testCircleVectors() {
try {
CircleVectors circleVectors = new CircleVectors(10,
"io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.GoldenAngle");
assert (circleVectors.getCircleCount() == 10);
assert (circleVectors.getAlgorithm() instanceof
io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.GoldenAngle);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void whenExceptionThrownForWrongClass() {
Exception exception = assertThrows(RuntimeException.class, () -> {
CircleVectors circleVectors = new CircleVectors(10,"java.util.Date");
});
String expectedMessage = "The class 'java.util.Date' does not implement CircleAlgorithm";
String actualMessage = exception.getMessage();
assertTrue(actualMessage.contains(expectedMessage));
}
@Test
public void testGoldenAngle() {
try {
CircleVectors circleVectors = new CircleVectors(10,
"io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.GoldenAngle");
List<Object> result = circleVectors.apply(1000);
assert (result.size() == 3);
assertEquals(0.0f, result.get(0));
assertEquals(1.0f, result.get(1));
assertEquals(0.0f, result.get(2));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void testLatLonBased() {
try {
CircleVectors circleVectors = new CircleVectors(10,
"io.nosqlbench.virtdata.library.basics.shared.vectors.algorithms.LatLonBased");
List<Object> result = circleVectors.apply(1000);
assert (result.size() == 3);
assertEquals(6.123234E-17f, result.get(0));
assertEquals(0.0f, result.get(1));
assertEquals(1.0f, result.get(2));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
public class CharVectorsTest {
/**
* Verify radix mapping to aligned unit-interval step function. This shows the most conceptually
* direct mapping to a vector.
*/
@Test
public void testBase10CharVectors() {
DoubleVectors v10 = new DoubleVectors("0-9*12");
assertThat(v10.getEncoding(0L)).isEqualTo("000000000000");
assertThat(v10.apply(0L))
.isEqualTo(new double[]{0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d});
assertThat(v10.getEncoding(10L)).isEqualTo("000000000010");
assertThat(v10.apply(10L))
.isEqualTo(new double[]{0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.1d, 0.0d});
assertThat(v10.getEncoding(1000000000L)).isEqualTo("001000000000");
assertThat(v10.apply(1000000000L))
.isEqualTo(new double[]{0.0d, 0.0d, 0.1d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d, 0.0d});
assertThat(v10.getEncoding(999999999999L)).isEqualTo("999999999999");
assertThat(v10.apply(999999999999L))
.isEqualTo(new double[]{0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9});
}
}

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
public class DoubleVectorTest {
@Test
public void testCombinedDoubleVector() {
DoubleVectors doubleVector = new DoubleVectors("0-9*12");
assertThat(doubleVector.apply(1L)).isEqualTo(new double[]{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1});
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.primitive;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import static org.assertj.core.api.Assertions.assertThat;
public class HashedDoubleVectorsTest {
@Test
public void testHashedDoubleVectors() {
HashedDoubleVectors hdv1 = new HashedDoubleVectors(10000);
double[] doubles = hdv1.apply(1L);
System.out.println(Arrays.toString(doubles));
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 nosqlbench
* Copyright (c) 2022-2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -29,7 +29,17 @@ public class HashedDoubleRangeTest {
for(long i=1;i<1000;i++) {
assertThat(r.applyAsDouble(i)).isBetween(0.0D,100.0D);
}
}
@Test
public void testNegativeAndPositive() {
HashRange r = new HashRange(-1.0D, 1.0D);
for (long i = 1; i<1000; i++) {
double result = r.applyAsDouble(i);
System.out.println(result);
assertThat(result).isBetween(-1.0d, 1.0d);
}
}
}