From bcdcb7c71070c90f48953d50cf6e1dd0fb2d933d Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 20:59:07 -0600
Subject: [PATCH 01/11] expose ToDouble ctor
---
.../library/basics/shared/conversions/from_long/ToDouble.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java
index df955632a..351a520df 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/conversions/from_long/ToDouble.java
@@ -39,7 +39,7 @@ public class ToDouble implements LongToDoubleFunction {
private final LongToDoubleFunction func;
- ToDouble(Object func) {
+ public ToDouble(Object func) {
if (func instanceof Number number) {
final double aDouble = number.doubleValue();
this.func = l -> aDouble;
From f925ab21ae6e8fcb6ab5fa348780f8017d5066bf Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 21:06:40 -0600
Subject: [PATCH 02/11] add DNN starter functions for euclidean distance
---
.../vectors/dnn/DNN_euclidean_neighbors.java | 96 +++++++++++++++++++
.../shared/vectors/dnn/DNN_euclidean_v.java | 48 ++++++++++
.../vectors/dnn/DNN_euclidean_v_series.java | 57 +++++++++++
.../vectors/dnn/DNN_euclidean_v_wrap.java | 46 +++++++++
.../dnn/DNNEuclideanNeighborsTest.java | 62 ++++++++++++
.../shared/vectors/dnn/DNNEuclideanVTest.java | 55 +++++++++++
6 files changed, 364 insertions(+)
create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java
create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java
create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
new file mode 100644
index 000000000..e2ddb1d97
--- /dev/null
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+
+import java.util.function.IntFunction;
+
+/**
+ * Compute the indices of the neighbors of a given v using DNN mapping.
+ * To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred.
+ */
+public class DNN_euclidean_neighbors implements IntFunction {
+
+ private final int D;
+ private final int N;
+ private final int k;
+
+ /**
+ * @param k
+ * The size of neighborhood
+ * @param N
+ * The number of total vectors, necessary for boundary conditions of defined vector
+ * @param D
+ * Number of dimensions in each vector
+ */
+ public DNN_euclidean_neighbors(int k, int N, int D) {
+ this.D = D;
+ this.N = N;
+ this.k = k;
+ }
+
+ /**
+ * Compute neighbor indices with a (hopefully) fast implementation. There are surely some simplifications to be
+ * made in the functions below, but even in the current for it avoids a significant number of branches.
+ *
+ * This code is not as simple as it could be. It was built more for speed than simplicity since it will be a hot
+ * spot for testing. The unit tests for this are essential.
+ *
+ * The method is thus:
+ *
+ * - Determine the sections of the neighborhood which aren't subject to boundary conditions,
+ * starting at the central vector (the index of the query vector).
+ * - Layer these in rank order using closed-form index functions.
+ * - Layer in any zero-boundary values which were deferred from above.
+ * - Layer in an N-boundary values which were deferred above.
+ *
+ *
+ *
+ * The boundary conditions for zero and N are mutually exclusive. Even though there is some amount of
+ * ranging and book keeping in this approach, it should make the general case more stable, especially
+ * when there are many dimensions and many neighbors.
+ *
+ *
+ * @param value
+ * the function argument, or the index of the query vector for the DNN addressing scheme
+ * @return A ranked neighborhood of vector indices, using the DNN addressing scheme
+ */
+ @Override
+ public int[] apply(int value) {
+ int[] indices = new int[k];
+
+ int leftBoundary = (value << 1) + 1;
+ int rightBoundary = ((N - (value + 1)) << 1) + 1;
+ int unbounded = Math.min(k, Math.min(leftBoundary, rightBoundary));
+ for (int i = 0; i < unbounded; i++) {
+ // Leave this here as an explainer, please
+ // int sign = ((((i + 1) & 1) << 1) - 1);
+ // int offset = ((i + 1)>>1);
+ // offset *= sign;
+ // int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
+ indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
+ }
+ int leftFill = Math.max(0, k - leftBoundary);
+ for (int i = 0; i < leftFill; i++) {
+ indices[unbounded + i] = unbounded + i;
+ }
+ int rightFill = Math.max(0, k - rightBoundary);
+ for (int i = 0; i < rightFill; i++) {
+ indices[unbounded + i] = (N - 1) - (unbounded + i);
+ }
+ return indices;
+ }
+}
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
new file mode 100644
index 000000000..1288672e1
--- /dev/null
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+
+import java.util.function.LongFunction;
+
+@ThreadSafeMapper
+@Categories(Category.experimental)
+public class DNN_euclidean_v implements LongFunction {
+
+ private final int D;
+ private final long N;
+
+ public DNN_euclidean_v(int D, long N) {
+ this.D = D;
+ this.N = N;
+ }
+
+ @Override
+ public float[] apply(long value) {
+ if (value>= N) {
+ throw new RuntimeException("You can't generate a vector for ordinal " + value + " when your population is " + this.N);
+ }
+ float[] vector = new float[D];
+ for (int idx = 0; idx < vector.length; idx++) {
+ vector[idx]= (float)idx+value;
+ }
+ return vector;
+ }
+}
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java
new file mode 100644
index 000000000..5194bc0f8
--- /dev/null
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_series.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+
+import java.util.Arrays;
+import java.util.function.LongFunction;
+
+@ThreadSafeMapper
+@Categories(Category.experimental)
+public class DNN_euclidean_v_series implements LongFunction {
+
+ private final int dimensions;
+ private final long population;
+ private final int k;
+
+ public DNN_euclidean_v_series(int dimensions, long population, int k) {
+ this.dimensions = dimensions;
+ this.population = population;
+ this.k = k;
+ }
+
+ @Override
+ public float[][] apply(long value) {
+ long nextInterval = value + k;
+ if (nextInterval > population) {
+ throw new RuntimeException("You can't generate a vector for ordinal " + value + " when your population is " + this.population);
+ }
+ int capacity = dimensions + k;
+ float[] image = new float[capacity];
+ for (int imgidx = 0; imgidx < capacity; imgidx++) {
+ image[imgidx]=imgidx+value;
+ }
+ float[][] vectorSeq = new float[k][dimensions];
+ for (int i = 0; i < vectorSeq.length; i++) {
+ vectorSeq[i]=Arrays.copyOfRange(image,i,i+dimensions);
+ }
+ return vectorSeq;
+ }
+}
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
new file mode 100644
index 000000000..7fe670ed1
--- /dev/null
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+
+import java.util.function.LongFunction;
+
+@ThreadSafeMapper
+@Categories(Category.experimental)
+public class DNN_euclidean_v_wrap implements LongFunction {
+
+ private final int dimensions;
+ private final long population;
+
+ public DNN_euclidean_v_wrap(int dimensions, long population) {
+ this.dimensions = dimensions;
+ this.population = population;
+ }
+
+ @Override
+ public float[] apply(long value) {
+ value = value % population;
+ float[] vector = new float[dimensions];
+ for (int idx = 0; idx < vector.length; idx++) {
+ vector[idx]= (float)idx+value;
+ }
+ return vector;
+ }
+}
diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java
new file mode 100644
index 000000000..6f31ec982
--- /dev/null
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+class DNNEuclideanNeighborsTest {
+
+ @Test
+ public void test_DNN_k3_p7_d5() {
+ DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(3, 7, 5);
+ assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2});
+ assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2});
+ assertThat(idxF.apply(2)).isEqualTo(new int[]{2,1,3});
+ assertThat(idxF.apply(3)).isEqualTo(new int[]{3,2,4});
+ assertThat(idxF.apply(4)).isEqualTo(new int[]{4,3,5});
+ assertThat(idxF.apply(5)).isEqualTo(new int[]{5,4,6});
+ assertThat(idxF.apply(6)).isEqualTo(new int[]{6,5,4});
+ }
+
+ @Test
+ public void test_DNN_k4_n7_d5() {
+ DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(4, 7, 5);
+ assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2,3});
+ assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2,3});
+ assertThat(idxF.apply(2)).isEqualTo(new int[]{2,1,3,0});
+ assertThat(idxF.apply(3)).isEqualTo(new int[]{3,2,4,1});
+ assertThat(idxF.apply(4)).isEqualTo(new int[]{4,3,5,2});
+ assertThat(idxF.apply(5)).isEqualTo(new int[]{5,4,6,3});
+ assertThat(idxF.apply(6)).isEqualTo(new int[]{6,5,4,3});
+ }
+
+ @Test
+ public void test_DNN_k6_n100_d10() {
+ DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 100, 10);
+ assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,97,96,95,94});
+ }
+
+ @Test
+ public void test_DNN_k6_n101_d10() {
+ DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 101, 10);
+ assertThat(idxF.apply(100)).isEqualTo(new int[]{100,99,98,97,96,95});
+ assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,100,97,96,95});
+ }
+
+}
diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
new file mode 100644
index 000000000..e62ac0120
--- /dev/null
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.*;
+
+class DNNEuclideanVTest {
+
+ @Test
+ public void testBasicVectors() {
+ DNN_euclidean_v vf = new DNN_euclidean_v(5, 7);
+ assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,4f,5f,6f,7f});
+ assertThrows(RuntimeException.class, () -> vf.apply(7));
+ }
+
+ @Test
+ public void testWrappingVectors() {
+ DNN_euclidean_v_wrap vf = new DNN_euclidean_v_wrap(5, 7);
+ assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,4f,5f,6f,7f});
+ assertThat(vf.apply(0L)).isEqualTo(new float[]{0f,1f,2f,3f,4f});
+ assertThat(vf.apply(7L)).isEqualTo(new float[]{0f,1f,2f,3f,4f});
+ }
+
+ @Test
+ public void testContiguousVectors() {
+ DNN_euclidean_v_series vf = new DNN_euclidean_v_series(4,10,2);
+ assertThat(vf.apply(7L)).isEqualTo(
+ new float[][] {
+ {7f,8f,9f,10f},
+ {8f,9f,10f,11f}
+ }
+ );
+
+ assertThrows(RuntimeException.class, () -> vf.apply(10));
+
+ }
+
+}
From 20c39350bf88e811786c5db70e04216bf5ee8733 Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 20:59:18 -0600
Subject: [PATCH 03/11] add package info for provenance
---
.../shared/vectors/dnn/package-info.java | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java
new file mode 100644
index 000000000..d72a81862
--- /dev/null
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2023 nosqlbench
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This is an experimental package based on the DNN or "Das/Direct Nearest Neighbor" method.
+ */
+package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
From 8294ff320f8800a5aabfb6367adebea9728d7d59 Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 21:18:17 -0600
Subject: [PATCH 04/11] minor doc updates
---
.../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
index e2ddb1d97..34918614a 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
@@ -55,7 +55,7 @@ public class DNN_euclidean_neighbors implements IntFunction {
* starting at the central vector (the index of the query vector).
* Layer these in rank order using closed-form index functions.
* Layer in any zero-boundary values which were deferred from above.
- * Layer in an N-boundary values which were deferred above.
+ * Layer in an N-boundary values which were deferred from above.
*
*
*
@@ -77,8 +77,8 @@ public class DNN_euclidean_neighbors implements IntFunction {
int unbounded = Math.min(k, Math.min(leftBoundary, rightBoundary));
for (int i = 0; i < unbounded; i++) {
// Leave this here as an explainer, please
- // int sign = ((((i + 1) & 1) << 1) - 1);
- // int offset = ((i + 1)>>1);
+ // int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted
+ // int offset = ((i + 1)>>1); // half rounded down, shifted biased by 1
// offset *= sign;
// int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
From 19a624418b6dde75e3bbff61b32472228ad5a289 Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 21:21:12 -0600
Subject: [PATCH 05/11] ninja phrasing fix
---
.../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
index 34918614a..5b3f6c946 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
@@ -78,7 +78,7 @@ public class DNN_euclidean_neighbors implements IntFunction {
for (int i = 0; i < unbounded; i++) {
// Leave this here as an explainer, please
// int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted
- // int offset = ((i + 1)>>1); // half rounded down, shifted biased by 1
+ // int offset = ((i + 1)>>1); // half rounded down, shifted by 1
// offset *= sign;
// int v = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
From a3476059110b8ec49630db5ab65a60ee51cb0073 Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 21:23:06 -0600
Subject: [PATCH 06/11] ninja phrasing fix
---
.../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
index 5b3f6c946..7a3803d68 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
@@ -44,7 +44,7 @@ public class DNN_euclidean_neighbors implements IntFunction {
/**
* Compute neighbor indices with a (hopefully) fast implementation. There are surely some simplifications to be
- * made in the functions below, but even in the current for it avoids a significant number of branches.
+ * made in the functions below, but even in the current form it avoids a significant number of branches.
*
* This code is not as simple as it could be. It was built more for speed than simplicity since it will be a hot
* spot for testing. The unit tests for this are essential.
From 83812ef8d3a066bd57e6fd267830d24254b0b592 Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Thu, 7 Dec 2023 21:23:50 -0600
Subject: [PATCH 07/11] add missing annotations
---
.../basics/shared/vectors/dnn/DNN_euclidean_neighbors.java | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
index 7a3803d68..cf9f86243 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
@@ -16,12 +16,18 @@
package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
+import io.nosqlbench.virtdata.api.annotations.Categories;
+import io.nosqlbench.virtdata.api.annotations.Category;
+import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
+
import java.util.function.IntFunction;
/**
* Compute the indices of the neighbors of a given v using DNN mapping.
* To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred.
*/
+@ThreadSafeMapper
+@Categories(Category.experimental)
public class DNN_euclidean_neighbors implements IntFunction {
private final int D;
From 2db38fb0419701357fe8ddd03b40ae8e5211255d Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Fri, 8 Dec 2023 11:29:39 -0600
Subject: [PATCH 08/11] added optional scale factor to v
---
.../basics/shared/vectors/dnn/DNN_euclidean_v.java | 8 +++++++-
.../basics/shared/vectors/dnn/DNNEuclideanVTest.java | 8 ++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
index 1288672e1..4ca94cd58 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
@@ -28,10 +28,16 @@ public class DNN_euclidean_v implements LongFunction {
private final int D;
private final long N;
+ private final double scale;
public DNN_euclidean_v(int D, long N) {
+ this(D,N,1.0d);
+ }
+
+ public DNN_euclidean_v(int D, long N, double scale) {
this.D = D;
this.N = N;
+ this.scale = scale;
}
@Override
@@ -41,7 +47,7 @@ public class DNN_euclidean_v implements LongFunction {
}
float[] vector = new float[D];
for (int idx = 0; idx < vector.length; idx++) {
- vector[idx]= (float)idx+value;
+ vector[idx]= (float)((idx+value)*scale);
}
return vector;
}
diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
index e62ac0120..b49b1edc9 100644
--- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
@@ -30,6 +30,14 @@ class DNNEuclideanVTest {
assertThrows(RuntimeException.class, () -> vf.apply(7));
}
+ @Test
+ public void testBasicVectorsScaled() {
+ DNN_euclidean_v vf = new DNN_euclidean_v(5, 7, 3.0);
+ assertThat(vf.apply(3L)).isEqualTo(new float[]{9f,12f,15f,18f,21f});
+ assertThrows(RuntimeException.class, () -> vf.apply(7));
+ }
+
+
@Test
public void testWrappingVectors() {
DNN_euclidean_v_wrap vf = new DNN_euclidean_v_wrap(5, 7);
From 799ff4846e3c9c9417c01be9671050424981088a Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Fri, 8 Dec 2023 11:57:55 -0600
Subject: [PATCH 09/11] added scale to v_wrap
---
.../vectors/dnn/DNN_euclidean_v_wrap.java | 22 ++++++++++++-------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
index 7fe670ed1..c52b69297 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
@@ -26,20 +26,26 @@ import java.util.function.LongFunction;
@Categories(Category.experimental)
public class DNN_euclidean_v_wrap implements LongFunction {
- private final int dimensions;
- private final long population;
+ private final int D;
+ private final long N;
+ private final double scale;
- public DNN_euclidean_v_wrap(int dimensions, long population) {
- this.dimensions = dimensions;
- this.population = population;
+ public DNN_euclidean_v_wrap(int D, long N, double scale) {
+ this.D = D;
+ this.N = N;
+ this.scale = scale;
+ }
+
+ public DNN_euclidean_v_wrap(int D, long N) {
+ this(D,N,1.0d);
}
@Override
public float[] apply(long value) {
- value = value % population;
- float[] vector = new float[dimensions];
+ value = value % N;
+ float[] vector = new float[D];
for (int idx = 0; idx < vector.length; idx++) {
- vector[idx]= (float)idx+value;
+ vector[idx]= (float)((idx+value)*scale);
}
return vector;
}
From 852f61ef6122ea0362b66fbffbe38b300ed864b7 Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Fri, 8 Dec 2023 12:41:15 -0600
Subject: [PATCH 10/11] adjusted scale factors for DNN vectors
---
.../library/basics/shared/vectors/dnn/DNN_euclidean_v.java | 2 +-
.../library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java | 2 +-
.../library/basics/shared/vectors/dnn/DNNEuclideanVTest.java | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
index 4ca94cd58..30f6ad5b9 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v.java
@@ -47,7 +47,7 @@ public class DNN_euclidean_v implements LongFunction {
}
float[] vector = new float[D];
for (int idx = 0; idx < vector.length; idx++) {
- vector[idx]= (float)((idx+value)*scale);
+ vector[idx]= (float)(value+(idx*scale));
}
return vector;
}
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
index c52b69297..3cd8fb911 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_v_wrap.java
@@ -45,7 +45,7 @@ public class DNN_euclidean_v_wrap implements LongFunction {
value = value % N;
float[] vector = new float[D];
for (int idx = 0; idx < vector.length; idx++) {
- vector[idx]= (float)((idx+value)*scale);
+ vector[idx]= (float)(value+(idx*scale));
}
return vector;
}
diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
index b49b1edc9..d35d2cd03 100644
--- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanVTest.java
@@ -33,7 +33,7 @@ class DNNEuclideanVTest {
@Test
public void testBasicVectorsScaled() {
DNN_euclidean_v vf = new DNN_euclidean_v(5, 7, 3.0);
- assertThat(vf.apply(3L)).isEqualTo(new float[]{9f,12f,15f,18f,21f});
+ assertThat(vf.apply(3L)).isEqualTo(new float[]{3f,6f,9f,12f,15f});
assertThrows(RuntimeException.class, () -> vf.apply(7));
}
From 1b2c9271d60048241691987fafc96fb2af8592aa Mon Sep 17 00:00:00 2001
From: Jonathan Shook
Date: Fri, 8 Dec 2023 14:04:49 -0600
Subject: [PATCH 11/11] add outside range corrections to neighborhood binding
---
.../shared/vectors/dnn/DNN_euclidean_neighbors.java | 10 ++++++----
.../shared/vectors/dnn/DNNEuclideanNeighborsTest.java | 6 ++++--
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
index cf9f86243..bfdbf9d7c 100644
--- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
+++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNN_euclidean_neighbors.java
@@ -76,12 +76,13 @@ public class DNN_euclidean_neighbors implements IntFunction {
*/
@Override
public int[] apply(int value) {
+ value = Math.min(Math.max(0,value),N-1);
int[] indices = new int[k];
int leftBoundary = (value << 1) + 1;
int rightBoundary = ((N - (value + 1)) << 1) + 1;
- int unbounded = Math.min(k, Math.min(leftBoundary, rightBoundary));
- for (int i = 0; i < unbounded; i++) {
+ int insideNeighbors = Math.min(k, Math.min(leftBoundary, rightBoundary));
+ for (int i = 0; i < insideNeighbors; i++) {
// Leave this here as an explainer, please
// int sign = ((((i + 1) & 1) << 1) - 1); // this gives us -1 or +1 depending on odd or even, and is inverted
// int offset = ((i + 1)>>1); // half rounded down, shifted by 1
@@ -90,12 +91,13 @@ public class DNN_euclidean_neighbors implements IntFunction {
indices[i] = value + (((((i + 1) & 1) << 1) - 1) * ((i + 1) >> 1));
}
int leftFill = Math.max(0, k - leftBoundary);
+ // TODO: Evaluate optimization from Dave2Wave for reducing additions
for (int i = 0; i < leftFill; i++) {
- indices[unbounded + i] = unbounded + i;
+ indices[insideNeighbors + i] = insideNeighbors + i;
}
int rightFill = Math.max(0, k - rightBoundary);
for (int i = 0; i < rightFill; i++) {
- indices[unbounded + i] = (N - 1) - (unbounded + i);
+ indices[insideNeighbors + i] = (N - 1) - (insideNeighbors + i);
}
return indices;
}
diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java
index 6f31ec982..1a0e75084 100644
--- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java
+++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/vectors/dnn/DNNEuclideanNeighborsTest.java
@@ -23,7 +23,7 @@ import static org.assertj.core.api.Assertions.assertThat;
class DNNEuclideanNeighborsTest {
@Test
- public void test_DNN_k3_p7_d5() {
+ public void test_DNN_K3_N7_D5() {
DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(3, 7, 5);
assertThat(idxF.apply(0)).isEqualTo(new int[]{0,1,2});
assertThat(idxF.apply(1)).isEqualTo(new int[]{1,0,2});
@@ -53,10 +53,12 @@ class DNNEuclideanNeighborsTest {
}
@Test
- public void test_DNN_k6_n101_d10() {
+ public void test_DNN_K6_N101_D10() {
DNN_euclidean_neighbors idxF = new DNN_euclidean_neighbors(6, 101, 10);
+ assertThat(idxF.apply(101)).isEqualTo(new int[]{100,99,98,97,96,95});
assertThat(idxF.apply(100)).isEqualTo(new int[]{100,99,98,97,96,95});
assertThat(idxF.apply(99)).isEqualTo(new int[]{99,98,100,97,96,95});
+ assertThat(idxF.apply(98)).isEqualTo(new int[]{98,97,99,96,100,95});
}
}