nosqlbench-1295 Add vector generation bindings, initial support

This commit is contained in:
Jonathan Shook 2023-05-25 15:16:11 -05:00
parent a25994cb9d
commit dd1f039e09
4 changed files with 226 additions and 0 deletions

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
/**
* Normalize a vector.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class NormalizeVector implements Function<List<Double>,List<Double>> {
@Override
public List<Double> apply(List<Double> doubles) {
ArrayList<Double> unit = new ArrayList<>(doubles.size());
double accumulator = 0.0d;
for (Double scalar : doubles) {
accumulator+=scalar*scalar;
}
double vectorLen = Math.sqrt(accumulator);
for (double scalarComponent : doubles) {
unit.add(scalarComponent/vectorLen);
}
return unit;
}
}

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.Example;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashedDoubleRange;
import java.util.Arrays;
import java.util.List;
import java.util.function.LongFunction;
import java.util.function.LongToDoubleFunction;
/**
* Create a vector which consists of a number of uniform vector ranges.
* Each range is set as [min,max] inclusive by a pair of double values such as 3.0d, 5.0d, ...
* You may provide an initial integer to set the number of components in the vector.
* After the initial (optional) size integer, you may provide odd, even pairs of min, max.
* If a range is not specified for a component which is expected from the size, the it is
* automatically replaced with a unit interval double variate.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class UniformVectorSizedStepped implements LongFunction<List<Double>> {
private final int dim;
private final LongToDoubleFunction[] funcs;
@Example({"UniformVectorSizedStepped(3)","create a 3-component vector from unit interval variates"})
@Example({"UniformVectorSizedStepped(1.0d,100.0d,5.0d,6.0d)","create a 2-component vector from the specified uniform ranges [1.0d,100.0d] and [5.0d,6.0d]"})
@Example({"UniformVectorSizedStepped(2,3.0d,6.0d)","create a 2-component vector from ranges [3.0d,6.0d] and [0.0d,1.0d]"})
public UniformVectorSizedStepped(Number... dims) {
if (dims.length>=1 && (dims.length)%2==1 && dims[0] instanceof Integer) {
this.dim = dims[0].intValue();
dims = Arrays.copyOfRange(dims,1,dims.length);
} else {
dim=dims.length/2;
}
if ((dims.length%2)!=0) {
throw new RuntimeException("Unable to set uniform range as [min,max] for pairs when count is odd. You must provide complete [min, max] value pairs as a,b,c,d,...");
}
this.funcs = new LongToDoubleFunction[dim];
for (int i = 0; i < dim; i++) {
if (i<dims.length/2) {
funcs[i]=new HashedDoubleRange(dims[i<<1].doubleValue(),dims[i<<1].doubleValue()+1);
} else {
funcs[i]=new HashedDoubleRange(0.0d,1.0d);
}
}
}
@Override
public List<Double> apply(long value) {
Double[] vector = new Double[dim];
for (int idx = 0; idx < vector.length; idx++) {
vector[idx]=funcs[idx].applyAsDouble(value+idx);
}
return Arrays.asList(vector);
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
import org.assertj.core.data.Offset;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
public class ToNormalizedVectorTest {
@Test
public void testNormalizeBasic() {
NormalizeVector normalize = new NormalizeVector();
List<Double> normalized = normalize.apply(List.of(1.0d));
for (int i = 0; i < normalized.size(); i++) {
assertThat(normalized.get(i)).isCloseTo(1.0d, Offset.offset(0.00001d));
}
normalized = normalize.apply(List.of(1.0d,1.0d));
for (int i = 0; i < normalized.size(); i++) {
assertThat(normalized.get(i)).isCloseTo(0.7071, Offset.offset(0.001d));
}
normalized = normalize.apply(List.of(1.0d,1.0d,1.0d));
for (int i = 0; i < normalized.size(); i++) {
assertThat(normalized.get(i)).isCloseTo(0.5773, Offset.offset(0.001d));
}
normalized = normalize.apply(List.of(100.0d,0.0d,0.0d,0.0d,0.0d,0.0d));
assertThat(normalized.get(0)).isCloseTo(1.0d,Offset.offset(0.00001d));
for (int i = 1; i < normalized.size(); i++) {
assertThat(normalized.get(i)).isCloseTo(0.0, Offset.offset(0.0001d));
}
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
public class UniformVectorSteppedTest {
@Test
public void testUniformVectorSteppedIsEmpty() {
UniformVectorSizedStepped f1 = new UniformVectorSizedStepped();
List<Double> empty = f1.apply(1L);
assertThat(empty).isEmpty();
}
@Test
public void testUniformVectorSteppedHasDefaultFuncs() {
UniformVectorSizedStepped f2 = new UniformVectorSizedStepped(2);
List<Double> twoUniform = f2.apply(1L);
assertThat(twoUniform).hasSize(2);
}
@Test
public void testUniformVectorSteppedHasRanges() {
UniformVectorSizedStepped f3 = new UniformVectorSizedStepped(4,3.0,5.0,7.0,9.0);
for (int i = 0; i < 1000; i++) {
List<Double> v4 = f3.apply(i);
assertThat(v4.get(0)).isBetween(3.0d,5.0d);
assertThat(v4.get(1)).isBetween(7.0d,9.0d);
assertThat(v4.get(2)).isBetween(0.0d,1.0d);
assertThat(v4.get(3)).isBetween(0.0d,1.0d);
}
}
}