diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java index df955632a..351a520df 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java @@ -39,7 +39,7 @@ public class ToDouble implements LongToDoubleFunction { private final LongToDoubleFunction func; - ToDouble(Object func) { + public ToDouble(Object func) { if (func instanceof Number number) { final double aDouble = number.doubleValue(); this.func = l -> aDouble; diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java new file mode 100644 index 000000000..bfdbf9d7c --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.function.IntFunction; + +/** + * Compute the indices of the neighbors of a given v using DNN mapping. + * To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_neighbors implements IntFunction { + + private final int D; + private final int N; + private final int k; + + /** + * @param k + * The size of neighborhood + * @param N + * The number of total vectors, necessary for boundary conditions of defined vector + * @param D + * Number of dimensions in each vector + */ + public DNN_euclidean_neighbors(int k, int N, int D) { + this.D = D; + this.N = N; + this.k = k; + } + + /** + *

Compute neighbor indices with a (hopefully) fast implementation. There are surely some simplifications to be + * made in the functions below, but even in the current form it avoids a significant number of branches.

+ * + *

This code is not as simple as it could be. It was built more for speed than simplicity since it will be a hot + * spot for testing. The unit tests for this are essential.

+ * + *

The method is thus: + *

    + *
  1. Determine the sections of the neighborhood which aren't subject to boundary conditions, + * starting at the central vector (the index of the query vector).
  2. + *
  3. Layer these in rank order using closed-form index functions.
  4. + *
  5. Layer in any zero-boundary values which were deferred from above.
  6. + *
  7. Layer in an N-boundary values which were deferred from above.
  8. + *
+ *

+ * + *

The boundary conditions for zero and N are mutually exclusive. Even though there is some amount of + * ranging and book keeping in this approach, it should make the general case more stable, especially + * when there are many dimensions and many neighbors. + *

+ * + * @param value + * the function argument, or the index of the query vector for the DNN addressing scheme + * @return A ranked neighborhood of vector indices, using the DNN addressing scheme + */ + @Override + public int[] apply(int value) { + value = Math.min(Math.max(0,value),N-1); + int[] indices = new int[k]; + + int leftBoundary = (value << 1) + 1; + int rightBoundary = ((N - (value + 1)) << 1) + 1; + int insideNeighbors = Math.min(k, Math.min(leftBoundary, rightBoundary)); + for (int i = 0; i < insideNeighbors; i++) { + // Leave this here as an explainer, please + // int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted + // int offset = ((i + 1)>>1); // half rounded down, shifted by 1 + // offset *= sign; + // int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); + indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); + } + int leftFill = Math.max(0, k - leftBoundary); + // TODO: Evaluate optimization from Dave2Wave for reducing additions + for (int i = 0; i < leftFill; i++) { + indices[insideNeighbors + i] = insideNeighbors + i; + } + int rightFill = Math.max(0, k - rightBoundary); + for (int i = 0; i < rightFill; i++) { + indices[insideNeighbors + i] = (N - 1) - (insideNeighbors + i); + } + return indices; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java new file mode 100644 index 000000000..30f6ad5b9 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.function.LongFunction; + +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_v implements LongFunction { + + private final int D; + private final long N; + private final double scale; + + public DNN_euclidean_v(int D, long N) { + this(D,N,1.0d); + } + + public DNN_euclidean_v(int D, long N, double scale) { + this.D = D; + this.N = N; + this.scale = scale; + } + + @Override + public float[] apply(long value) { + if (value>= N) { + throw new RuntimeException("You can't generate a vector for ordinal " + value + " when your population is " + this.N); + } + float[] vector = new float[D]; + for (int idx = 0; idx < vector.length; idx++) { + vector[idx]= (float)(value+(idx*scale)); + } + return vector; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java new file mode 100644 index 000000000..5194bc0f8 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.Arrays; +import java.util.function.LongFunction; + +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_v_series implements LongFunction { + + private final int dimensions; + private final long population; + private final int k; + + public DNN_euclidean_v_series(int dimensions, long population, int k) { + this.dimensions = dimensions; + this.population = population; + this.k = k; + } + + @Override + public float[][] apply(long value) { + long nextInterval = value + k; + if (nextInterval > population) { + throw new RuntimeException("You can't generate a vector for ordinal " + value + " when your population is " + this.population); + } + int capacity = dimensions + k; + float[] image = new float[capacity]; + for (int imgidx = 0; imgidx < capacity; imgidx++) { + image[imgidx]=imgidx+value; + } + float[][] vectorSeq = new float[k][dimensions]; + for (int i = 0; i < vectorSeq.length; i++) { + vectorSeq[i]=Arrays.copyOfRange(image,i,i+dimensions); + } + return vectorSeq; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java new file mode 100644 index 000000000..3cd8fb911 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.function.LongFunction; + +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_v_wrap implements LongFunction { + + private final int D; + private final long N; + private final double scale; + + public DNN_euclidean_v_wrap(int D, long N, double scale) { + this.D = D; + this.N = N; + this.scale = scale; + } + + public DNN_euclidean_v_wrap(int D, long N) { + this(D,N,1.0d); + } + + @Override + public float[] apply(long value) { + value = value % N; + float[] vector = new float[D]; + for (int idx = 0; idx < vector.length; idx++) { + vector[idx]= (float)(value+(idx*scale)); + } + return vector; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java new file mode 100644 index 000000000..d72a81862 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This is an experimental package based on the DNN or "Das/Direct Nearest Neighbor" method. + */ +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java new file mode 100644 index 000000000..1a0e75084 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class DNNEuclideanNeighborsTest { + + @Test + public void test_DNN_K3_N7_D5() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(3, 7, 5); + assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2}); + assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2}); + assertThat(idxF.apply(2)).isEqualTo(new int[]{2,1,3}); + assertThat(idxF.apply(3)).isEqualTo(new int[]{3,2,4}); + assertThat(idxF.apply(4)).isEqualTo(new int[]{4,3,5}); + assertThat(idxF.apply(5)).isEqualTo(new int[]{5,4,6}); + assertThat(idxF.apply(6)).isEqualTo(new int[]{6,5,4}); + } + + @Test + public void test_DNN_k4_n7_d5() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(4, 7, 5); + assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2,3}); + assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2,3}); + assertThat(idxF.apply(2)).isEqualTo(new int[]{2,1,3,0}); + assertThat(idxF.apply(3)).isEqualTo(new int[]{3,2,4,1}); + assertThat(idxF.apply(4)).isEqualTo(new int[]{4,3,5,2}); + assertThat(idxF.apply(5)).isEqualTo(new int[]{5,4,6,3}); + assertThat(idxF.apply(6)).isEqualTo(new int[]{6,5,4,3}); + } + + @Test + public void test_DNN_k6_n100_d10() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 100, 10); + assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,97,96,95,94}); + } + + @Test + public void test_DNN_K6_N101_D10() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 101, 10); + assertThat(idxF.apply(101)).isEqualTo(new int[]{100,99,98,97,96,95}); + assertThat(idxF.apply(100)).isEqualTo(new int[]{100,99,98,97,96,95}); + assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,100,97,96,95}); + assertThat(idxF.apply(98)).isEqualTo(new int[]{98,97,99,96,100,95}); + } + +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java new file mode 100644 index 000000000..d35d2cd03 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +class DNNEuclideanVTest { + + @Test + public void testBasicVectors() { + DNN_euclidean_v vf = new DNN_euclidean_v(5, 7); + assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,4f,5f,6f,7f}); + assertThrows(RuntimeException.class, () -> vf.apply(7)); + } + + @Test + public void testBasicVectorsScaled() { + DNN_euclidean_v vf = new DNN_euclidean_v(5, 7, 3.0); + assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,6f,9f,12f,15f}); + assertThrows(RuntimeException.class, () -> vf.apply(7)); + } + + + @Test + public void testWrappingVectors() { + DNN_euclidean_v_wrap vf = new DNN_euclidean_v_wrap(5, 7); + assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,4f,5f,6f,7f}); + assertThat(vf.apply(0L)).isEqualTo(new float[]{0f,1f,2f,3f,4f}); + assertThat(vf.apply(7L)).isEqualTo(new float[]{0f,1f,2f,3f,4f}); + } + + @Test + public void testContiguousVectors() { + DNN_euclidean_v_series vf = new DNN_euclidean_v_series(4,10,2); + assertThat(vf.apply(7L)).isEqualTo( + new float[][] { + {7f,8f,9f,10f}, + {8f,9f,10f,11f} + } + ); + + assertThrows(RuntimeException.class, () -> vf.apply(10)); + + } + +}