From 8497665af34d8df3fa5b1fe8427e9099055f5820 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 25 May 2023 15:16:11 -0500 Subject: [PATCH] nosqlbench-1295 Add vector generation bindings, initial support --- .../from_long/to_vector/NormalizeVector.java | 46 +++++++++++ .../to_vector/UniformVectorSizedStepped.java | 76 +++++++++++++++++++ .../to_vector/ToNormalizedVectorTest.java | 50 ++++++++++++ .../to_vector/UniformVectorSteppedTest.java | 54 +++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/NormalizeVector.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSizedStepped.java create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/ToNormalizedVectorTest.java create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSteppedTest.java diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/NormalizeVector.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/NormalizeVector.java new file mode 100644 index 000000000..479386478 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/NormalizeVector.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +/** + * Normalize a vector. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class NormalizeVector implements Function,List> { + @Override + public List apply(List doubles) { + ArrayList unit = new ArrayList<>(doubles.size()); + double accumulator = 0.0d; + for (Double scalar : doubles) { + accumulator+=scalar*scalar; + } + double vectorLen = Math.sqrt(accumulator); + for (double scalarComponent : doubles) { + unit.add(scalarComponent/vectorLen); + } + return unit; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSizedStepped.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSizedStepped.java new file mode 100644 index 000000000..b9e9de8c8 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSizedStepped.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.Example; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_double.HashedDoubleRange; + +import java.util.Arrays; +import java.util.List; +import java.util.function.LongFunction; +import java.util.function.LongToDoubleFunction; + +/** + * Create a vector which consists of a number of uniform vector ranges. + * Each range is set as [min,max] inclusive by a pair of double values such as 3.0d, 5.0d, ... + * You may provide an initial integer to set the number of components in the vector. + * After the initial (optional) size integer, you may provide odd, even pairs of min, max. + * If a range is not specified for a component which is expected from the size, the it is + * automatically replaced with a unit interval double variate. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class UniformVectorSizedStepped implements LongFunction> { + + private final int dim; + private final LongToDoubleFunction[] funcs; + + @Example({"UniformVectorSizedStepped(3)","create a 3-component vector from unit interval variates"}) + @Example({"UniformVectorSizedStepped(1.0d,100.0d,5.0d,6.0d)","create a 2-component vector from the specified uniform ranges [1.0d,100.0d] and [5.0d,6.0d]"}) + @Example({"UniformVectorSizedStepped(2,3.0d,6.0d)","create a 2-component vector from ranges [3.0d,6.0d] and [0.0d,1.0d]"}) + public UniformVectorSizedStepped(Number... dims) { + if (dims.length>=1 && (dims.length)%2==1 && dims[0] instanceof Integer) { + this.dim = dims[0].intValue(); + dims = Arrays.copyOfRange(dims,1,dims.length); + } else { + dim=dims.length/2; + } + if ((dims.length%2)!=0) { + throw new RuntimeException("Unable to set uniform range as [min,max] for pairs when count is odd. You must provide complete [min, max] value pairs as a,b,c,d,..."); + } + this.funcs = new LongToDoubleFunction[dim]; + for (int i = 0; i < dim; i++) { + if (i apply(long value) { + Double[] vector = new Double[dim]; + for (int idx = 0; idx < vector.length; idx++) { + vector[idx]=funcs[idx].applyAsDouble(value+idx); + } + return Arrays.asList(vector); + } +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/ToNormalizedVectorTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/ToNormalizedVectorTest.java new file mode 100644 index 000000000..53a93700e --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/ToNormalizedVectorTest.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; + +import org.assertj.core.data.Offset; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class ToNormalizedVectorTest { + + @Test + public void testNormalizeBasic() { + NormalizeVector normalize = new NormalizeVector(); + List normalized = normalize.apply(List.of(1.0d)); + for (int i = 0; i < normalized.size(); i++) { + assertThat(normalized.get(i)).isCloseTo(1.0d, Offset.offset(0.00001d)); + } + normalized = normalize.apply(List.of(1.0d,1.0d)); + for (int i = 0; i < normalized.size(); i++) { + assertThat(normalized.get(i)).isCloseTo(0.7071, Offset.offset(0.001d)); + } + normalized = normalize.apply(List.of(1.0d,1.0d,1.0d)); + for (int i = 0; i < normalized.size(); i++) { + assertThat(normalized.get(i)).isCloseTo(0.5773, Offset.offset(0.001d)); + } + normalized = normalize.apply(List.of(100.0d,0.0d,0.0d,0.0d,0.0d,0.0d)); + assertThat(normalized.get(0)).isCloseTo(1.0d,Offset.offset(0.00001d)); + for (int i = 1; i < normalized.size(); i++) { + assertThat(normalized.get(i)).isCloseTo(0.0, Offset.offset(0.0001d)); + } + } + +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSteppedTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSteppedTest.java new file mode 100644 index 000000000..c73f30857 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/UniformVectorSteppedTest.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class UniformVectorSteppedTest { + + + @Test + public void testUniformVectorSteppedIsEmpty() { + UniformVectorSizedStepped f1 = new UniformVectorSizedStepped(); + List empty = f1.apply(1L); + assertThat(empty).isEmpty(); + } + + @Test + public void testUniformVectorSteppedHasDefaultFuncs() { + UniformVectorSizedStepped f2 = new UniformVectorSizedStepped(2); + List twoUniform = f2.apply(1L); + assertThat(twoUniform).hasSize(2); + } + + @Test + public void testUniformVectorSteppedHasRanges() { + UniformVectorSizedStepped f3 = new UniformVectorSizedStepped(4,3.0,5.0,7.0,9.0); + for (int i = 0; i < 1000; i++) { + List v4 = f3.apply(i); + assertThat(v4.get(0)).isBetween(3.0d,5.0d); + assertThat(v4.get(1)).isBetween(7.0d,9.0d); + assertThat(v4.get(2)).isBetween(0.0d,1.0d); + assertThat(v4.get(3)).isBetween(0.0d,1.0d); + } + } + +}