mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
nosqlbench-1295 Add vector generation bindings, initial support
This commit is contained in:
parent
a25994cb9d
commit
dd1f039e09
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
|
||||
|
||||
import io.nosqlbench.virtdata.api.annotations.Categories;
|
||||
import io.nosqlbench.virtdata.api.annotations.Category;
|
||||
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Normalize a vector.
|
||||
*/
|
||||
@ThreadSafeMapper
|
||||
@Categories(Category.experimental)
|
||||
public class NormalizeVector implements Function<List<Double>,List<Double>> {
|
||||
@Override
|
||||
public List<Double> apply(List<Double> doubles) {
|
||||
ArrayList<Double> unit = new ArrayList<>(doubles.size());
|
||||
double accumulator = 0.0d;
|
||||
for (Double scalar : doubles) {
|
||||
accumulator+=scalar*scalar;
|
||||
}
|
||||
double vectorLen = Math.sqrt(accumulator);
|
||||
for (double scalarComponent : doubles) {
|
||||
unit.add(scalarComponent/vectorLen);
|
||||
}
|
||||
return unit;
|
||||
}
|
||||
}
|
@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
|
||||
|
||||
import io.nosqlbench.virtdata.api.annotations.Categories;
|
||||
import io.nosqlbench.virtdata.api.annotations.Category;
|
||||
import io.nosqlbench.virtdata.api.annotations.Example;
|
||||
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashedDoubleRange;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.function.LongFunction;
|
||||
import java.util.function.LongToDoubleFunction;
|
||||
|
||||
/**
|
||||
* Create a vector which consists of a number of uniform vector ranges.
|
||||
* Each range is set as [min,max] inclusive by a pair of double values such as 3.0d, 5.0d, ...
|
||||
* You may provide an initial integer to set the number of components in the vector.
|
||||
* After the initial (optional) size integer, you may provide odd, even pairs of min, max.
|
||||
* If a range is not specified for a component which is expected from the size, the it is
|
||||
* automatically replaced with a unit interval double variate.
|
||||
*/
|
||||
@ThreadSafeMapper
|
||||
@Categories(Category.experimental)
|
||||
public class UniformVectorSizedStepped implements LongFunction<List<Double>> {
|
||||
|
||||
private final int dim;
|
||||
private final LongToDoubleFunction[] funcs;
|
||||
|
||||
@Example({"UniformVectorSizedStepped(3)","create a 3-component vector from unit interval variates"})
|
||||
@Example({"UniformVectorSizedStepped(1.0d,100.0d,5.0d,6.0d)","create a 2-component vector from the specified uniform ranges [1.0d,100.0d] and [5.0d,6.0d]"})
|
||||
@Example({"UniformVectorSizedStepped(2,3.0d,6.0d)","create a 2-component vector from ranges [3.0d,6.0d] and [0.0d,1.0d]"})
|
||||
public UniformVectorSizedStepped(Number... dims) {
|
||||
if (dims.length>=1 && (dims.length)%2==1 && dims[0] instanceof Integer) {
|
||||
this.dim = dims[0].intValue();
|
||||
dims = Arrays.copyOfRange(dims,1,dims.length);
|
||||
} else {
|
||||
dim=dims.length/2;
|
||||
}
|
||||
if ((dims.length%2)!=0) {
|
||||
throw new RuntimeException("Unable to set uniform range as [min,max] for pairs when count is odd. You must provide complete [min, max] value pairs as a,b,c,d,...");
|
||||
}
|
||||
this.funcs = new LongToDoubleFunction[dim];
|
||||
for (int i = 0; i < dim; i++) {
|
||||
if (i<dims.length/2) {
|
||||
funcs[i]=new HashedDoubleRange(dims[i<<1].doubleValue(),dims[i<<1].doubleValue()+1);
|
||||
} else {
|
||||
funcs[i]=new HashedDoubleRange(0.0d,1.0d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Double> apply(long value) {
|
||||
Double[] vector = new Double[dim];
|
||||
for (int idx = 0; idx < vector.length; idx++) {
|
||||
vector[idx]=funcs[idx].applyAsDouble(value+idx);
|
||||
}
|
||||
return Arrays.asList(vector);
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
|
||||
|
||||
import org.assertj.core.data.Offset;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
public class ToNormalizedVectorTest {
|
||||
|
||||
@Test
|
||||
public void testNormalizeBasic() {
|
||||
NormalizeVector normalize = new NormalizeVector();
|
||||
List<Double> normalized = normalize.apply(List.of(1.0d));
|
||||
for (int i = 0; i < normalized.size(); i++) {
|
||||
assertThat(normalized.get(i)).isCloseTo(1.0d, Offset.offset(0.00001d));
|
||||
}
|
||||
normalized = normalize.apply(List.of(1.0d,1.0d));
|
||||
for (int i = 0; i < normalized.size(); i++) {
|
||||
assertThat(normalized.get(i)).isCloseTo(0.7071, Offset.offset(0.001d));
|
||||
}
|
||||
normalized = normalize.apply(List.of(1.0d,1.0d,1.0d));
|
||||
for (int i = 0; i < normalized.size(); i++) {
|
||||
assertThat(normalized.get(i)).isCloseTo(0.5773, Offset.offset(0.001d));
|
||||
}
|
||||
normalized = normalize.apply(List.of(100.0d,0.0d,0.0d,0.0d,0.0d,0.0d));
|
||||
assertThat(normalized.get(0)).isCloseTo(1.0d,Offset.offset(0.00001d));
|
||||
for (int i = 1; i < normalized.size(); i++) {
|
||||
assertThat(normalized.get(i)).isCloseTo(0.0, Offset.offset(0.0001d));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
public class UniformVectorSteppedTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testUniformVectorSteppedIsEmpty() {
|
||||
UniformVectorSizedStepped f1 = new UniformVectorSizedStepped();
|
||||
List<Double> empty = f1.apply(1L);
|
||||
assertThat(empty).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUniformVectorSteppedHasDefaultFuncs() {
|
||||
UniformVectorSizedStepped f2 = new UniformVectorSizedStepped(2);
|
||||
List<Double> twoUniform = f2.apply(1L);
|
||||
assertThat(twoUniform).hasSize(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUniformVectorSteppedHasRanges() {
|
||||
UniformVectorSizedStepped f3 = new UniformVectorSizedStepped(4,3.0,5.0,7.0,9.0);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
List<Double> v4 = f3.apply(i);
|
||||
assertThat(v4.get(0)).isBetween(3.0d,5.0d);
|
||||
assertThat(v4.get(1)).isBetween(7.0d,9.0d);
|
||||
assertThat(v4.get(2)).isBetween(0.0d,1.0d);
|
||||
assertThat(v4.get(3)).isBetween(0.0d,1.0d);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user