Merge branch 'main' into mwolters/reset_cmd

This commit is contained in:
Mark Wolters 2024-02-01 11:21:35 -04:00
commit b883ec6e69
4 changed files with 168 additions and 10 deletions

View File

@ -315,7 +315,7 @@
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.12.5</version>
<version>2.12.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@ -0,0 +1,94 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import java.util.function.IntFunction;
/**
* Compute the indices of the neighbors of a given v using DNN mapping.
* To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class DNN_angular1_neighbors implements IntFunction<int[]> {
private final int N;
private final int k;
private final int modulus;
/**
* @param k
* The size of neighborhood
* @param N
* The number of total vectors, necessary for boundary conditions of defined vector
* @param module
* The modulus used during training of angular1 data; this corresponds to how periodically we cycle back
* to vectors with the same angle (hence have angular distance zero between them)
*/
public DNN_angular1_neighbors(int k, int N, int modulus) {
if (modulus <= 0) {
throw new IllegalArgumentException(
String.format(
"Invalid parameters: modulus=%d. modulus is required to be positive.",
modulus
)
);
}
// need to ensure each of the modulus clusters has size >= k, so that top-k nearest neighbors don't
// spill to another cluster with non-zero angle
if (k * modulus > N) {
throw new IllegalArgumentException(
String.format(
"Invalid parameters: N=%d, k=%d, modulus=%d. Vectors in a cluster = N / modulus >= k.",
N, k, modulus
)
);
}
this.N = N;
this.k = k;
this.modulus = modulus;
}
/**
* @param value
* the function argument, or the index of the query vector for the DNN addressing scheme
* @return A ranked neighborhood of vector indices, using the DNN addressing scheme
*/
@Override
public int[] apply(int value) {
// we created modulus clusters of our N vectors, of size N/modulus or N/modulus + 1
// (the latter case when modulus does not evenly divide N, and we get remainder)
int div = N / modulus;
int mod = N % modulus;
int cycleResidueClass = value % modulus;
// handle case of extra neighbor in the same cluster
if (cycleResidueClass < mod) {
div += 1;
}
int[] indices = new int[div];
int currIdx = cycleResidueClass;
for (int i = 0; i < div; i++) {
indices[i] = currIdx;
currIdx += modulus;
}
return indices;
}
}

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
class DNNAngular1NeighborsTest {
@Test
public void test_DNN_modulus_divides_training_population() {
int k = 3;
int N = 30;
int modulus = 5;
DNN_angular1_neighbors idxF = new DNN_angular1_neighbors(k, N, modulus);
// NOTE: we get more than k neighbors (N / modulus, precisely), due to not arbitrarily breaking ties
assertThat(idxF.apply(0)).isEqualTo(new int[]{0,5,10,15,20,25});
assertThat(idxF.apply(1)).isEqualTo(new int[]{1,6,11,16,21,26});
assertThat(idxF.apply(2)).isEqualTo(new int[]{2,7,12,17,22,27});
assertThat(idxF.apply(3)).isEqualTo(new int[]{3,8,13,18,23,28});
assertThat(idxF.apply(4)).isEqualTo(new int[]{4,9,14,19,24,29});
// verify we cycle back neighbors
for (int i = 1000; i < 1000 + modulus; i++) {
assertThat(idxF.apply(i)).isEqualTo(idxF.apply(i % modulus));
}
}
@Test
public void test_DNN_modulus_does_not_divide_training_population() {
int k = 3;
int N = 30;
int modulus = 7;
DNN_angular1_neighbors idxF = new DNN_angular1_neighbors(k, N, modulus);
// residue classes < N % modulus get an extra neighbor in their cluster
assertThat(idxF.apply(0)).isEqualTo(new int[]{0,7,14,21,28});
assertThat(idxF.apply(1)).isEqualTo(new int[]{1,8,15,22,29});
assertThat(idxF.apply(2)).isEqualTo(new int[]{2,9,16,23});
assertThat(idxF.apply(3)).isEqualTo(new int[]{3,10,17,24});
assertThat(idxF.apply(4)).isEqualTo(new int[]{4,11,18,25});
assertThat(idxF.apply(5)).isEqualTo(new int[]{5,12,19,26});
assertThat(idxF.apply(6)).isEqualTo(new int[]{6,13,20,27});
// verify we cycle back neighbors
for (int i = 1000; i < 1000 + modulus; i++) {
assertThat(idxF.apply(i)).isEqualTo(idxF.apply(i % modulus));
}
}
}

View File

@ -46,25 +46,23 @@ class DNNAngular1VTest {
public void testBasicAngularVectors() {
int M = 7;
DNN_angular1_v vf = new DNN_angular1_v(10, 100, M);
// populate 100 training cycles of DNN angular
float[][] vectors = new float[100][];
for (int i = 0; i < 100; i++) {
vectors[i] = vf.apply(i);
}
int[] same = new int[100];
Arrays.fill(same, -1);
for (int vidx = 0; vidx < same.length; vidx++) {
// pair-wise check of non-scaled cosine similarity between training vectors
for (int vidx = 0; vidx < vectors.length; vidx++) {
for (int compare_to = 0; compare_to <= vidx; compare_to++) {
double similarity = cosine_similarity(vectors[vidx], vectors[compare_to]);
// two of the generated vectors have angle 0 between them if and only if indexes are congruent % M
if (Math.abs(similarity - 1.0d) < 0.00000001d) {
same[vidx] = compare_to;
break;
assertThat(vidx % M).isEqualTo(compare_to % M);
} else {
assertThat(vidx % M).isNotEqualTo(compare_to % M);
}
}
}
for (int sameas = M; sameas < same.length; sameas++) {
// System.out.println("idx:" + sameas + ", same[sameas] -> " + same[sameas] + " sameas%7=" + sameas % M);
assertThat(same[sameas] % M).isEqualTo(sameas % M);
}
}
private double cosine_similarity(float[] a, float[] b) {