mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-01-11 00:12:04 -06:00
Merge branch 'main' into mwolters/reset_cmd
This commit is contained in:
commit
b883ec6e69
@ -315,7 +315,7 @@
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
<version>2.12.5</version>
|
||||
<version>2.12.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
|
||||
|
||||
import io.nosqlbench.virtdata.api.annotations.Categories;
|
||||
import io.nosqlbench.virtdata.api.annotations.Category;
|
||||
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
|
||||
|
||||
import java.util.function.IntFunction;
|
||||
|
||||
/**
|
||||
* Compute the indices of the neighbors of a given v using DNN mapping.
|
||||
* To avoid ambiguity on equidistant neighbors, odd neighborhood sizes are preferred.
|
||||
*/
|
||||
@ThreadSafeMapper
|
||||
@Categories(Category.experimental)
|
||||
public class DNN_angular1_neighbors implements IntFunction<int[]> {
|
||||
|
||||
private final int N;
|
||||
private final int k;
|
||||
private final int modulus;
|
||||
|
||||
/**
|
||||
* @param k
|
||||
* The size of neighborhood
|
||||
* @param N
|
||||
* The number of total vectors, necessary for boundary conditions of defined vector
|
||||
* @param module
|
||||
* The modulus used during training of angular1 data; this corresponds to how periodically we cycle back
|
||||
* to vectors with the same angle (hence have angular distance zero between them)
|
||||
*/
|
||||
public DNN_angular1_neighbors(int k, int N, int modulus) {
|
||||
if (modulus <= 0) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format(
|
||||
"Invalid parameters: modulus=%d. modulus is required to be positive.",
|
||||
modulus
|
||||
)
|
||||
);
|
||||
}
|
||||
// need to ensure each of the modulus clusters has size >= k, so that top-k nearest neighbors don't
|
||||
// spill to another cluster with non-zero angle
|
||||
if (k * modulus > N) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format(
|
||||
"Invalid parameters: N=%d, k=%d, modulus=%d. Vectors in a cluster = N / modulus >= k.",
|
||||
N, k, modulus
|
||||
)
|
||||
);
|
||||
}
|
||||
this.N = N;
|
||||
this.k = k;
|
||||
this.modulus = modulus;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value
|
||||
* the function argument, or the index of the query vector for the DNN addressing scheme
|
||||
* @return A ranked neighborhood of vector indices, using the DNN addressing scheme
|
||||
*/
|
||||
@Override
|
||||
public int[] apply(int value) {
|
||||
// we created modulus clusters of our N vectors, of size N/modulus or N/modulus + 1
|
||||
// (the latter case when modulus does not evenly divide N, and we get remainder)
|
||||
int div = N / modulus;
|
||||
int mod = N % modulus;
|
||||
int cycleResidueClass = value % modulus;
|
||||
// handle case of extra neighbor in the same cluster
|
||||
if (cycleResidueClass < mod) {
|
||||
div += 1;
|
||||
}
|
||||
int[] indices = new int[div];
|
||||
int currIdx = cycleResidueClass;
|
||||
for (int i = 0; i < div; i++) {
|
||||
indices[i] = currIdx;
|
||||
currIdx += modulus;
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
}
|
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2023 nosqlbench
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.nosqlbench.virtdata.library.basics.shared.vectors.dnn;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class DNNAngular1NeighborsTest {
|
||||
|
||||
@Test
|
||||
public void test_DNN_modulus_divides_training_population() {
|
||||
int k = 3;
|
||||
int N = 30;
|
||||
int modulus = 5;
|
||||
DNN_angular1_neighbors idxF = new DNN_angular1_neighbors(k, N, modulus);
|
||||
|
||||
// NOTE: we get more than k neighbors (N / modulus, precisely), due to not arbitrarily breaking ties
|
||||
assertThat(idxF.apply(0)).isEqualTo(new int[]{0,5,10,15,20,25});
|
||||
assertThat(idxF.apply(1)).isEqualTo(new int[]{1,6,11,16,21,26});
|
||||
assertThat(idxF.apply(2)).isEqualTo(new int[]{2,7,12,17,22,27});
|
||||
assertThat(idxF.apply(3)).isEqualTo(new int[]{3,8,13,18,23,28});
|
||||
assertThat(idxF.apply(4)).isEqualTo(new int[]{4,9,14,19,24,29});
|
||||
|
||||
// verify we cycle back neighbors
|
||||
for (int i = 1000; i < 1000 + modulus; i++) {
|
||||
assertThat(idxF.apply(i)).isEqualTo(idxF.apply(i % modulus));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_DNN_modulus_does_not_divide_training_population() {
|
||||
int k = 3;
|
||||
int N = 30;
|
||||
int modulus = 7;
|
||||
DNN_angular1_neighbors idxF = new DNN_angular1_neighbors(k, N, modulus);
|
||||
|
||||
// residue classes < N % modulus get an extra neighbor in their cluster
|
||||
assertThat(idxF.apply(0)).isEqualTo(new int[]{0,7,14,21,28});
|
||||
assertThat(idxF.apply(1)).isEqualTo(new int[]{1,8,15,22,29});
|
||||
assertThat(idxF.apply(2)).isEqualTo(new int[]{2,9,16,23});
|
||||
assertThat(idxF.apply(3)).isEqualTo(new int[]{3,10,17,24});
|
||||
assertThat(idxF.apply(4)).isEqualTo(new int[]{4,11,18,25});
|
||||
assertThat(idxF.apply(5)).isEqualTo(new int[]{5,12,19,26});
|
||||
assertThat(idxF.apply(6)).isEqualTo(new int[]{6,13,20,27});
|
||||
|
||||
// verify we cycle back neighbors
|
||||
for (int i = 1000; i < 1000 + modulus; i++) {
|
||||
assertThat(idxF.apply(i)).isEqualTo(idxF.apply(i % modulus));
|
||||
}
|
||||
}
|
||||
}
|
@ -46,25 +46,23 @@ class DNNAngular1VTest {
|
||||
public void testBasicAngularVectors() {
|
||||
int M = 7;
|
||||
DNN_angular1_v vf = new DNN_angular1_v(10, 100, M);
|
||||
// populate 100 training cycles of DNN angular
|
||||
float[][] vectors = new float[100][];
|
||||
for (int i = 0; i < 100; i++) {
|
||||
vectors[i] = vf.apply(i);
|
||||
}
|
||||
int[] same = new int[100];
|
||||
Arrays.fill(same, -1);
|
||||
for (int vidx = 0; vidx < same.length; vidx++) {
|
||||
// pair-wise check of non-scaled cosine similarity between training vectors
|
||||
for (int vidx = 0; vidx < vectors.length; vidx++) {
|
||||
for (int compare_to = 0; compare_to <= vidx; compare_to++) {
|
||||
double similarity = cosine_similarity(vectors[vidx], vectors[compare_to]);
|
||||
// two of the generated vectors have angle 0 between them if and only if indexes are congruent % M
|
||||
if (Math.abs(similarity - 1.0d) < 0.00000001d) {
|
||||
same[vidx] = compare_to;
|
||||
break;
|
||||
assertThat(vidx % M).isEqualTo(compare_to % M);
|
||||
} else {
|
||||
assertThat(vidx % M).isNotEqualTo(compare_to % M);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int sameas = M; sameas < same.length; sameas++) {
|
||||
// System.out.println("idx:" + sameas + ", same[sameas] -> " + same[sameas] + " sameas%7=" + sameas % M);
|
||||
assertThat(same[sameas] % M).isEqualTo(sameas % M);
|
||||
}
|
||||
}
|
||||
|
||||
private double cosine_similarity(float[] a, float[] b) {
|
||||
|
Loading…
Reference in New Issue
Block a user