From bcdcb7c71070c90f48953d50cf6e1dd0fb2d933d Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 20:59:07 -0600 Subject: [PATCH 01/11] expose ToDouble ctor --- .../library/basics/shared/conversions/from_long/ToDouble.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java index df955632a..351a520df 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java @@ -39,7 +39,7 @@ public class ToDouble implements LongToDoubleFunction { private final LongToDoubleFunction func; - ToDouble(Object func) { + public ToDouble(Object func) { if (func instanceof Number number) { final double aDouble = number.doubleValue(); this.func = l -> aDouble; From f925ab21ae6e8fcb6ab5fa348780f8017d5066bf Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 21:06:40 -0600 Subject: [PATCH 02/11] add DNN starter functions for euclidean distance --- .../vectors/dnn/DNN_euclidean_neighbors.java | 96 +++++++++++++++++++ .../shared/vectors/dnn/DNN_euclidean_v.java | 48 ++++++++++ .../vectors/dnn/DNN_euclidean_v_series.java | 57 +++++++++++ .../vectors/dnn/DNN_euclidean_v_wrap.java | 46 +++++++++ .../dnn/DNNEuclideanNeighborsTest.java | 62 ++++++++++++ .../shared/vectors/dnn/DNNEuclideanVTest.java | 55 +++++++++++ 6 files changed, 364 insertions(+) create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java new file mode 100644 index 000000000..e2ddb1d97 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import java.util.function.IntFunction; + +/** + * Compute the indices of the neighbors of a given v using DNN mapping. + * To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred. + */ +public class DNN_euclidean_neighbors implements IntFunction { + + private final int D; + private final int N; + private final int k; + + /** + * @param k + * The size of neighborhood + * @param N + * The number of total vectors, necessary for boundary conditions of defined vector + * @param D + * Number of dimensions in each vector + */ + public DNN_euclidean_neighbors(int k, int N, int D) { + this.D = D; + this.N = N; + this.k = k; + } + + /** + *

Compute neighbor indices with a (hopefully) fast implementation. There are surely some simplifications to be + * made in the functions below, but even in the current for it avoids a significant number of branches.

+ * + *

This code is not as simple as it could be. It was built more for speed than simplicity since it will be a hot + * spot for testing. The unit tests for this are essential.

+ * + *

The method is thus: + *

    + *
  1. Determine the sections of the neighborhood which aren't subject to boundary conditions, + * starting at the central vector (the index of the query vector).
  2. + *
  3. Layer these in rank order using closed-form index functions.
  4. + *
  5. Layer in any zero-boundary values which were deferred from above.
  6. + *
  7. Layer in an N-boundary values which were deferred above.
  8. + *
+ *

+ * + *

The boundary conditions for zero and N are mutually exclusive. Even though there is some amount of + * ranging and book keeping in this approach, it should make the general case more stable, especially + * when there are many dimensions and many neighbors. + *

+ * + * @param value + * the function argument, or the index of the query vector for the DNN addressing scheme + * @return A ranked neighborhood of vector indices, using the DNN addressing scheme + */ + @Override + public int[] apply(int value) { + int[] indices = new int[k]; + + int leftBoundary = (value << 1) + 1; + int rightBoundary = ((N - (value + 1)) << 1) + 1; + int unbounded = Math.min(k, Math.min(leftBoundary, rightBoundary)); + for (int i = 0; i < unbounded; i++) { + // Leave this here as an explainer, please + // int sign = ((((i + 1) & 1) << 1) - 1); + // int offset = ((i + 1)>>1); + // offset *= sign; + // int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); + indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); + } + int leftFill = Math.max(0, k - leftBoundary); + for (int i = 0; i < leftFill; i++) { + indices[unbounded + i] = unbounded + i; + } + int rightFill = Math.max(0, k - rightBoundary); + for (int i = 0; i < rightFill; i++) { + indices[unbounded + i] = (N - 1) - (unbounded + i); + } + return indices; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java new file mode 100644 index 000000000..1288672e1 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.function.LongFunction; + +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_v implements LongFunction { + + private final int D; + private final long N; + + public DNN_euclidean_v(int D, long N) { + this.D = D; + this.N = N; + } + + @Override + public float[] apply(long value) { + if (value>= N) { + throw new RuntimeException("You can't generate a vector for ordinal " + value + " when your population is " + this.N); + } + float[] vector = new float[D]; + for (int idx = 0; idx < vector.length; idx++) { + vector[idx]= (float)idx+value; + } + return vector; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java new file mode 100644 index 000000000..5194bc0f8 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.Arrays; +import java.util.function.LongFunction; + +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_v_series implements LongFunction { + + private final int dimensions; + private final long population; + private final int k; + + public DNN_euclidean_v_series(int dimensions, long population, int k) { + this.dimensions = dimensions; + this.population = population; + this.k = k; + } + + @Override + public float[][] apply(long value) { + long nextInterval = value + k; + if (nextInterval > population) { + throw new RuntimeException("You can't generate a vector for ordinal " + value + " when your population is " + this.population); + } + int capacity = dimensions + k; + float[] image = new float[capacity]; + for (int imgidx = 0; imgidx < capacity; imgidx++) { + image[imgidx]=imgidx+value; + } + float[][] vectorSeq = new float[k][dimensions]; + for (int i = 0; i < vectorSeq.length; i++) { + vectorSeq[i]=Arrays.copyOfRange(image,i,i+dimensions); + } + return vectorSeq; + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java new file mode 100644 index 000000000..7fe670ed1 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + +import java.util.function.LongFunction; + +@ThreadSafeMapper +@Categories(Category.experimental) +public class DNN_euclidean_v_wrap implements LongFunction { + + private final int dimensions; + private final long population; + + public DNN_euclidean_v_wrap(int dimensions, long population) { + this.dimensions = dimensions; + this.population = population; + } + + @Override + public float[] apply(long value) { + value = value % population; + float[] vector = new float[dimensions]; + for (int idx = 0; idx < vector.length; idx++) { + vector[idx]= (float)idx+value; + } + return vector; + } +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java new file mode 100644 index 000000000..6f31ec982 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class DNNEuclideanNeighborsTest { + + @Test + public void test_DNN_k3_p7_d5() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(3, 7, 5); + assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2}); + assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2}); + assertThat(idxF.apply(2)).isEqualTo(new int[]{2,1,3}); + assertThat(idxF.apply(3)).isEqualTo(new int[]{3,2,4}); + assertThat(idxF.apply(4)).isEqualTo(new int[]{4,3,5}); + assertThat(idxF.apply(5)).isEqualTo(new int[]{5,4,6}); + assertThat(idxF.apply(6)).isEqualTo(new int[]{6,5,4}); + } + + @Test + public void test_DNN_k4_n7_d5() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(4, 7, 5); + assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2,3}); + assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2,3}); + assertThat(idxF.apply(2)).isEqualTo(new int[]{2,1,3,0}); + assertThat(idxF.apply(3)).isEqualTo(new int[]{3,2,4,1}); + assertThat(idxF.apply(4)).isEqualTo(new int[]{4,3,5,2}); + assertThat(idxF.apply(5)).isEqualTo(new int[]{5,4,6,3}); + assertThat(idxF.apply(6)).isEqualTo(new int[]{6,5,4,3}); + } + + @Test + public void test_DNN_k6_n100_d10() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 100, 10); + assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,97,96,95,94}); + } + + @Test + public void test_DNN_k6_n101_d10() { + DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 101, 10); + assertThat(idxF.apply(100)).isEqualTo(new int[]{100,99,98,97,96,95}); + assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,100,97,96,95}); + } + +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java new file mode 100644 index 000000000..e62ac0120 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +class DNNEuclideanVTest { + + @Test + public void testBasicVectors() { + DNN_euclidean_v vf = new DNN_euclidean_v(5, 7); + assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,4f,5f,6f,7f}); + assertThrows(RuntimeException.class, () -> vf.apply(7)); + } + + @Test + public void testWrappingVectors() { + DNN_euclidean_v_wrap vf = new DNN_euclidean_v_wrap(5, 7); + assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,4f,5f,6f,7f}); + assertThat(vf.apply(0L)).isEqualTo(new float[]{0f,1f,2f,3f,4f}); + assertThat(vf.apply(7L)).isEqualTo(new float[]{0f,1f,2f,3f,4f}); + } + + @Test + public void testContiguousVectors() { + DNN_euclidean_v_series vf = new DNN_euclidean_v_series(4,10,2); + assertThat(vf.apply(7L)).isEqualTo( + new float[][] { + {7f,8f,9f,10f}, + {8f,9f,10f,11f} + } + ); + + assertThrows(RuntimeException.class, () -> vf.apply(10)); + + } + +} From 20c39350bf88e811786c5db70e04216bf5ee8733 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 20:59:18 -0600 Subject: [PATCH 03/11] add package info for provenance --- .../shared/vectors/dnn/package-info.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java new file mode 100644 index 000000000..d72a81862 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This is an experimental package based on the DNN or "Das/Direct Nearest Neighbor" method. + */ +package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; From 8294ff320f8800a5aabfb6367adebea9728d7d59 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 21:18:17 -0600 Subject: [PATCH 04/11] minor doc updates --- .../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java index e2ddb1d97..34918614a 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -55,7 +55,7 @@ public class DNN_euclidean_neighbors implements IntFunction { * starting at the central vector (the index of the query vector). *
  • Layer these in rank order using closed-form index functions.
  • *
  • Layer in any zero-boundary values which were deferred from above.
  • - *
  • Layer in an N-boundary values which were deferred above.
  • + *
  • Layer in an N-boundary values which were deferred from above.
  • * *

    * @@ -77,8 +77,8 @@ public class DNN_euclidean_neighbors implements IntFunction { int unbounded = Math.min(k, Math.min(leftBoundary, rightBoundary)); for (int i = 0; i < unbounded; i++) { // Leave this here as an explainer, please - // int sign = ((((i + 1) & 1) << 1) - 1); - // int offset = ((i + 1)>>1); + // int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted + // int offset = ((i + 1)>>1); // half rounded down, shifted biased by 1 // offset *= sign; // int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); From 19a624418b6dde75e3bbff61b32472228ad5a289 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 21:21:12 -0600 Subject: [PATCH 05/11] ninja phrasing fix --- .../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java index 34918614a..5b3f6c946 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -78,7 +78,7 @@ public class DNN_euclidean_neighbors implements IntFunction { for (int i = 0; i < unbounded; i++) { // Leave this here as an explainer, please // int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted - // int offset = ((i + 1)>>1); // half rounded down, shifted biased by 1 + // int offset = ((i + 1)>>1); // half rounded down, shifted by 1 // offset *= sign; // int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); From a3476059110b8ec49630db5ab65a60ee51cb0073 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 21:23:06 -0600 Subject: [PATCH 06/11] ninja phrasing fix --- .../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java index 5b3f6c946..7a3803d68 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -44,7 +44,7 @@ public class DNN_euclidean_neighbors implements IntFunction { /** *

    Compute neighbor indices with a (hopefully) fast implementation. There are surely some simplifications to be - * made in the functions below, but even in the current for it avoids a significant number of branches.

    + * made in the functions below, but even in the current form it avoids a significant number of branches.

    * *

    This code is not as simple as it could be. It was built more for speed than simplicity since it will be a hot * spot for testing. The unit tests for this are essential.

    From 83812ef8d3a066bd57e6fd267830d24254b0b592 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Thu, 7 Dec 2023 21:23:50 -0600 Subject: [PATCH 07/11] add missing annotations --- .../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java index 7a3803d68..cf9f86243 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -16,12 +16,18 @@ package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn; +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; + import java.util.function.IntFunction; /** * Compute the indices of the neighbors of a given v using DNN mapping. * To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred. */ +@ThreadSafeMapper +@Categories(Category.experimental) public class DNN_euclidean_neighbors implements IntFunction { private final int D; From 2db38fb0419701357fe8ddd03b40ae8e5211255d Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 8 Dec 2023 11:29:39 -0600 Subject: [PATCH 08/11] added optional scale factor to v --- .../basics/shared/vectors/dnn/DNN_euclidean_v.java | 8 +++++++- .../basics/shared/vectors/dnn/DNNEuclideanVTest.java | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java index 1288672e1..4ca94cd58 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java @@ -28,10 +28,16 @@ public class DNN_euclidean_v implements LongFunction { private final int D; private final long N; + private final double scale; public DNN_euclidean_v(int D, long N) { + this(D,N,1.0d); + } + + public DNN_euclidean_v(int D, long N, double scale) { this.D = D; this.N = N; + this.scale = scale; } @Override @@ -41,7 +47,7 @@ public class DNN_euclidean_v implements LongFunction { } float[] vector = new float[D]; for (int idx = 0; idx < vector.length; idx++) { - vector[idx]= (float)idx+value; + vector[idx]= (float)((idx+value)*scale); } return vector; } diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java index e62ac0120..b49b1edc9 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java @@ -30,6 +30,14 @@ class DNNEuclideanVTest { assertThrows(RuntimeException.class, () -> vf.apply(7)); } + @Test + public void testBasicVectorsScaled() { + DNN_euclidean_v vf = new DNN_euclidean_v(5, 7, 3.0); + assertThat(vf.apply(3L)).isEqualTo(new float[]{9f,12f,15f,18f,21f}); + assertThrows(RuntimeException.class, () -> vf.apply(7)); + } + + @Test public void testWrappingVectors() { DNN_euclidean_v_wrap vf = new DNN_euclidean_v_wrap(5, 7); From 799ff4846e3c9c9417c01be9671050424981088a Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 8 Dec 2023 11:57:55 -0600 Subject: [PATCH 09/11] added scale to v_wrap --- .../vectors/dnn/DNN_euclidean_v_wrap.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java index 7fe670ed1..c52b69297 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java @@ -26,20 +26,26 @@ import java.util.function.LongFunction; @Categories(Category.experimental) public class DNN_euclidean_v_wrap implements LongFunction { - private final int dimensions; - private final long population; + private final int D; + private final long N; + private final double scale; - public DNN_euclidean_v_wrap(int dimensions, long population) { - this.dimensions = dimensions; - this.population = population; + public DNN_euclidean_v_wrap(int D, long N, double scale) { + this.D = D; + this.N = N; + this.scale = scale; + } + + public DNN_euclidean_v_wrap(int D, long N) { + this(D,N,1.0d); } @Override public float[] apply(long value) { - value = value % population; - float[] vector = new float[dimensions]; + value = value % N; + float[] vector = new float[D]; for (int idx = 0; idx < vector.length; idx++) { - vector[idx]= (float)idx+value; + vector[idx]= (float)((idx+value)*scale); } return vector; } From 852f61ef6122ea0362b66fbffbe38b300ed864b7 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 8 Dec 2023 12:41:15 -0600 Subject: [PATCH 10/11] adjusted scale factors for DNN vectors --- .../library/basics/shared/vectors/dnn/DNN_euclidean_v.java | 2 +- .../library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java | 2 +- .../library/basics/shared/vectors/dnn/DNNEuclideanVTest.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java index 4ca94cd58..30f6ad5b9 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java @@ -47,7 +47,7 @@ public class DNN_euclidean_v implements LongFunction { } float[] vector = new float[D]; for (int idx = 0; idx < vector.length; idx++) { - vector[idx]= (float)((idx+value)*scale); + vector[idx]= (float)(value+(idx*scale)); } return vector; } diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java index c52b69297..3cd8fb911 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java @@ -45,7 +45,7 @@ public class DNN_euclidean_v_wrap implements LongFunction { value = value % N; float[] vector = new float[D]; for (int idx = 0; idx < vector.length; idx++) { - vector[idx]= (float)((idx+value)*scale); + vector[idx]= (float)(value+(idx*scale)); } return vector; } diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java index b49b1edc9..d35d2cd03 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java @@ -33,7 +33,7 @@ class DNNEuclideanVTest { @Test public void testBasicVectorsScaled() { DNN_euclidean_v vf = new DNN_euclidean_v(5, 7, 3.0); - assertThat(vf.apply(3L)).isEqualTo(new float[]{9f,12f,15f,18f,21f}); + assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,6f,9f,12f,15f}); assertThrows(RuntimeException.class, () -> vf.apply(7)); } From 1b2c9271d60048241691987fafc96fb2af8592aa Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 8 Dec 2023 14:04:49 -0600 Subject: [PATCH 11/11] add outside range corrections to neighborhood binding --- .../shared/vectors/dnn/DNN_euclidean_neighbors.java | 10 ++++++---- .../shared/vectors/dnn/DNNEuclideanNeighborsTest.java | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java index cf9f86243..bfdbf9d7c 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java @@ -76,12 +76,13 @@ public class DNN_euclidean_neighbors implements IntFunction { */ @Override public int[] apply(int value) { + value = Math.min(Math.max(0,value),N-1); int[] indices = new int[k]; int leftBoundary = (value << 1) + 1; int rightBoundary = ((N - (value + 1)) << 1) + 1; - int unbounded = Math.min(k, Math.min(leftBoundary, rightBoundary)); - for (int i = 0; i < unbounded; i++) { + int insideNeighbors = Math.min(k, Math.min(leftBoundary, rightBoundary)); + for (int i = 0; i < insideNeighbors; i++) { // Leave this here as an explainer, please // int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted // int offset = ((i + 1)>>1); // half rounded down, shifted by 1 @@ -90,12 +91,13 @@ public class DNN_euclidean_neighbors implements IntFunction { indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1)); } int leftFill = Math.max(0, k - leftBoundary); + // TODO: Evaluate optimization from Dave2Wave for reducing additions for (int i = 0; i < leftFill; i++) { - indices[unbounded + i] = unbounded + i; + indices[insideNeighbors + i] = insideNeighbors + i; } int rightFill = Math.max(0, k - rightBoundary); for (int i = 0; i < rightFill; i++) { - indices[unbounded + i] = (N - 1) - (unbounded + i); + indices[insideNeighbors + i] = (N - 1) - (insideNeighbors + i); } return indices; } diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java index 6f31ec982..1a0e75084 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java @@ -23,7 +23,7 @@ import static org.assertj.core.api.Assertions.assertThat; class DNNEuclideanNeighborsTest { @Test - public void test_DNN_k3_p7_d5() { + public void test_DNN_K3_N7_D5() { DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(3, 7, 5); assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2}); assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2}); @@ -53,10 +53,12 @@ class DNNEuclideanNeighborsTest { } @Test - public void test_DNN_k6_n101_d10() { + public void test_DNN_K6_N101_D10() { DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 101, 10); + assertThat(idxF.apply(101)).isEqualTo(new int[]{100,99,98,97,96,95}); assertThat(idxF.apply(100)).isEqualTo(new int[]{100,99,98,97,96,95}); assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,100,97,96,95}); + assertThat(idxF.apply(98)).isEqualTo(new int[]{98,97,99,96,100,95}); } }