Merge branch 'main' of github.com:nosqlbench/nosqlbench

This commit is contained in:
Jonathan Shook 2023-08-16 15:58:28 -05:00
commit da23c66622
33 changed files with 1525 additions and 2 deletions

101
hdf-loader/pom.xml Normal file
View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (c) 2023 nosqlbench
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
~
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hdf-loader</artifactId>
<packaging>jar</packaging>
<parent>
<artifactId>mvn-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>${revision}</version>
<relativePath>../mvn-defaults</relativePath>
</parent>
<name>${project.artifactId}</name>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.snakeyaml</groupId>
<artifactId>snakeyaml-engine</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-core</artifactId>
<version>4.16.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.15.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.deeplearning4j/deeplearning4j-core -->
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-nlp</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
<dependency>
<groupId>io.jhdf</groupId>
<artifactId>jhdf</artifactId>
<version>0.6.10</version>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>nb-api</artifactId>
<version>5.17.3-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf;
import io.nosqlbench.loader.hdf.config.LoaderConfig;
import io.nosqlbench.loader.hdf.readers.Hdf5Reader;
import io.nosqlbench.loader.hdf.readers.HdfReader;
import io.nosqlbench.loader.hdf.writers.AstraVectorWriter;
import io.nosqlbench.loader.hdf.writers.FileVectorWriter;
import io.nosqlbench.loader.hdf.writers.NoopVectorWriter;
import io.nosqlbench.loader.hdf.writers.VectorWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class HdfLoader {
private static final Logger logger = LogManager.getLogger(HdfLoader.class);
public static final String FILEWRITER = "filewriter";
public static final String ASTRA = "astra";
public static final String NOOP = "noop";
public static final String HDF5 = "hdf5";
public static final String HDF4 = "hdf4";
public static void main (String[] args) {
if (args.length == 0) {
System.out.println("Usage: hdf-loader <filename>");
System.exit(1);
}
try {
LoaderConfig config = new LoaderConfig(args[0]);
logger.info("Starting loader with config: " + config);
HdfReader reader = null;
VectorWriter writer = null;
String format = config.getFormat();
switch (format.toLowerCase()) {
case HDF4 -> {
logger.info("HDF4 format not yet supported");
System.exit(1);
}
case HDF5 -> {
logger.info("HDF5 format selected");
reader = new Hdf5Reader(config);
}
default -> {
logger.info("Unknown format: " + format);
System.exit(1);
}
}
String writerType = config.getWriter();
logger.info("Using writer type: " + writerType);
switch (writerType.toLowerCase()) {
case FILEWRITER -> writer = new FileVectorWriter(config);
case ASTRA -> writer = new AstraVectorWriter(config);
case NOOP -> writer = new NoopVectorWriter();
default -> {
logger.info("Unknown writer type: " + writerType);
System.exit(1);
}
}
reader.setWriter(writer);
logger.info("Starting main read loop");
reader.read();
} catch (Exception e) {
logger.error(e);
System.exit(1);
}
}
}

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.config;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.yaml.snakeyaml.Yaml;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class LoaderConfig {
private static final Logger logger = LogManager.getLogger(LoaderConfig.class);
private static final Yaml yaml = new Yaml();
private final Map<String, Object> configMap;
public LoaderConfig(String filePath) throws IOException {
FileReader fileReader = new FileReader(filePath);
configMap = yaml.load(fileReader);
for (Map.Entry<String, Object> entry : configMap.entrySet()) {
logger.debug(entry.getKey() + " : " + entry.getValue());
}
}
public Object getRawValue(String key) {
return configMap.get(key);
}
public String getStringValue(String key) {
return configMap.get(key).toString();
}
public List<String> getDatasets() {
return (List<String>) configMap.get("datasets");
}
public String getFormat() {
return (String) configMap.getOrDefault("format", "HD5");
}
public Map<String,String> getAstra() {
return (Map<String,String>) configMap.get("astra");
}
public String getEmbedding() {
return (String) configMap.getOrDefault("embedding", "Deeplearning4j");
}
public String getWriter() {
return (String) configMap.getOrDefault("writer", "filewriter");
}
public String getSourceFile() {
return (String) configMap.get("sourceFile");
}
public String getTargetFile() {
return (String) configMap.getOrDefault("targetFile", "./vectors.txt");
}
public int getThreads() {
return (int) configMap.getOrDefault("threads", 5);
}
public int getQueueSize() {
return (int) configMap.getOrDefault("queueSize", 1000);
}
}

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.embedding;
public class DoubleEmbeddingGenerator implements EmbeddingGenerator {
@Override
public float[][] generateEmbeddingFrom(Object o, int[] dims) {
return switch (dims.length) {
case 1 -> new float[][]{convertToFloat((double[]) o)};
case 2 -> convertToFloats((double[][]) o);
case 3 -> flatten(o, dims);
default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
};
}
private float[][] convertToFloats(double[][] o) {
float[][] floats = new float[o.length][];
for (int i = 0; i < o.length; i++) {
floats[i] = convertToFloat(o[i]);
}
return floats;
}
public float[] convertToFloat(double[] doubleArray) {
if (doubleArray == null) {
return null;
}
float[] floatArray = new float[doubleArray.length];
for (int i = 0; i < doubleArray.length; i++) {
floatArray[i] = (float) doubleArray[i];
}
return floatArray;
}
private float[][] flatten(Object o, int[] dims) {
double[][][] arr = (double[][][]) o;
float[][] flat = new float[dims[0]][dims[1] * dims[2]];
for (int i = 0; i < dims[0]; i++) {
for (int j = 0; j < dims[1]; j++) {
for (int k = 0; k < dims[2]; k++) {
flat[i][j * dims[2] + k] = (float)arr[i][j][k];
}
}
}
return flat;
}
}

View File

@ -0,0 +1,22 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.embedding;
public interface EmbeddingGenerator {
public float[][] generateEmbeddingFrom(Object o, int[] dims);
}

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.embedding;
import java.util.HashMap;
import java.util.Map;
public class EmbeddingGeneratorFactory {
private static final Map<String,EmbeddingGenerator> generators = new HashMap<>();
public static EmbeddingGenerator getGenerator(String type) {
String typeLower = type.equalsIgnoreCase("short") ? "int" : type.toLowerCase();
if (typeLower.equals("integer")) typeLower = "int";
switch (typeLower) {
case "string" -> {
if (!generators.containsKey(type)) {
generators.put(type, new StringEmbeddingGenerator());
}
return generators.get(type);
}
case "float" -> {
if (!generators.containsKey(type)) {
generators.put(type, new FloatEmbeddingGenerator());
}
return generators.get(type);
}
case "double" -> {
if (!generators.containsKey(type)) {
generators.put(type, new DoubleEmbeddingGenerator());
}
return generators.get(type);
}
case "int" -> {
if (!generators.containsKey(type)) {
generators.put(type, new IntEmbeddingGenerator());
}
return generators.get(type);
}
default -> throw new RuntimeException("Unknown embedding type: " + type);
}
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.embedding;
public class FloatEmbeddingGenerator implements EmbeddingGenerator {
@Override
public float[][] generateEmbeddingFrom(Object o, int[] dims) {
return switch (dims.length) {
case 1 -> new float[][]{(float[]) o};
case 2 -> (float[][]) o;
case 3 -> flatten(o, dims);
default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
};
}
private float[][] flatten(Object o, int[] dims) {
float[][][] arr = (float[][][]) o;
float[][] flat = new float[dims[0]][dims[1] * dims[2]];
for (int i = 0; i < dims[0]; i++) {
for (int j = 0; j < dims[1]; j++) {
for (int k = 0; k < dims[2]; k++) {
flat[i][j * dims[2] + k] = arr[i][j][k];
}
}
}
return flat;
}
}

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.embedding;
public class IntEmbeddingGenerator implements EmbeddingGenerator {
@Override
public float[][] generateEmbeddingFrom(Object o, int[] dims) {
switch (dims.length) {
case 1 -> {
float[] arr = new float[dims[0]];
for (int i = 0; i < dims[0]; i++) {
arr[i] = ((int[]) o)[i];
}
return new float[][]{arr};
}
case 2 -> {
float[][] arr = new float[dims[0]][dims[1]];
for (int i = 0; i < dims[0]; i++) {
for (int j = 0; j < dims[1]; j++) {
arr[i][j] = ((int[][]) o)[i][j];
}
}
return arr;
}
case 3 -> {
return flatten(o, dims);
}
default ->
throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
}
}
private float[][] flatten(Object o, int[] dims) {
int[][][] arr = (int[][][]) o;
float[][] flat = new float[dims[0]][dims[1] * dims[2]];
for (int i = 0; i < dims[0]; i++) {
for (int j = 0; j < dims[1]; j++) {
for (int k = 0; k < dims[2]; k++) {
flat[i][j * dims[2] + k] = arr[i][j][k];
}
}
}
return flat;
}
}

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.embedding;
import org.deeplearning4j.models.word2vec.Word2Vec;
import org.deeplearning4j.text.sentenceiterator.BasicLineIterator;
import org.deeplearning4j.text.sentenceiterator.CollectionSentenceIterator;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import java.util.Arrays;
public class StringEmbeddingGenerator implements EmbeddingGenerator {
private TokenizerFactory tokenizerFactory= new DefaultTokenizerFactory();
@Override
public float[][] generateEmbeddingFrom(Object o, int[] dims) {
switch (dims.length) {
case 1 -> {
return generateWordEmbeddings((String[]) o);
}
default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
}
}
private float[][] generateWordEmbeddings(String[] text) {
SentenceIterator iter = new CollectionSentenceIterator(Arrays.asList(text));
/*Word2Vec vec = new Word2Vec.Builder()
.minWordFrequency(1)
.iterations(1)
.layerSize(targetDims)
.seed(42)
.windowSize(5)
.iterate(iter)
.tokenizerFactory(tokenizerFactory)
.build();
*/
return null;
}
}

View File

@ -0,0 +1,148 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.readers;
import io.jhdf.HdfFile;
import io.jhdf.api.Dataset;
import io.jhdf.api.Group;
import io.jhdf.api.Node;
import io.nosqlbench.loader.hdf.config.LoaderConfig;
import io.nosqlbench.loader.hdf.embedding.EmbeddingGenerator;
import io.nosqlbench.loader.hdf.writers.VectorWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import static io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory.getGenerator;
public class Hdf5Reader implements HdfReader {
private static final Logger logger = LogManager.getLogger(Hdf5Reader.class);
public static final String ALL = "all";
private VectorWriter writer;
private final LoaderConfig config;
private final ExecutorService executorService;
private final LinkedBlockingQueue<float[]> queue;
private List<String> datasets;
private final float[] SHUTDOWN = new float[0];
public Hdf5Reader(LoaderConfig config) {
this.config = config;
executorService = Executors.newCachedThreadPool();
queue = new LinkedBlockingQueue<>(config.getQueueSize());
}
@Override
public void setWriter(VectorWriter writer) {
this.writer = writer;
writer.setQueue(queue);
}
public void extractDatasets(Group parent) {
Map<String, Node> nodes = parent.getChildren();
for (String key : nodes.keySet()) {
Node node = nodes.get(key);
if (node instanceof Dataset) {
datasets.add(node.getPath());
}
else if (node.isGroup()) {
extractDatasets((Group) node);
}
}
}
@Override
public void read() {
HdfFile hdfFile = new HdfFile(Paths.get(config.getSourceFile()));
datasets = config.getDatasets();
if (datasets.get(0).equalsIgnoreCase(ALL)) {
extractDatasets(hdfFile);
}
List<Future<?>> futures = new ArrayList<>();
executorService.submit(writer);
for (String ds : datasets) {
if (ds.equalsIgnoreCase(ALL)) {
continue;
}
Future<?> future = executorService.submit(() -> {
logger.info("Processing dataset: " + ds);
Dataset dataset = hdfFile.getDatasetByPath(ds);
int[] dims = dataset.getDimensions();
String type = dataset.getJavaType().getSimpleName().toLowerCase();
EmbeddingGenerator generator = getGenerator(type);
Object data;
if (dataset.getSizeInBytes() > Integer.MAX_VALUE) {
logger.info("slicing large dataset: " + ds);
// TODO: For now this will be implemented to handle numeric types with
// 2 dimensions where the 1st dimension is the number of vectors and the 2nd
// dimension is the number of dimensions in the vector.
long[] sliceOffset = new long[dims.length];
int[] sliceDimensions = new int[dims.length];
sliceDimensions[1] = dims[1];
int noOfSlices = (int) (dataset.getSizeInBytes() / Integer.MAX_VALUE) + 1;
int sliceSize = dims[0] / noOfSlices;
for (int i = 0; i < noOfSlices; i++) {
sliceOffset[0] = (long) i * sliceSize;
sliceDimensions[0] = sliceSize;
data = dataset.getData(sliceOffset, sliceDimensions);
float[][] vectors = generator.generateEmbeddingFrom(data, dims);
for (float[] vector : vectors) {
try {
queue.put(vector);
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
}
}
}
} else {
data = dataset.getData();
float[][] vectors = generator.generateEmbeddingFrom(data, dims);
for (float[] vector : vectors) {
try {
queue.put(vector);
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
}
}
}
});
futures.add(future);
}
for (Future<?> future : futures) {
try {
future.get();
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
hdfFile.close();
writer.shutdown();
try {
queue.put(SHUTDOWN);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
executorService.shutdown();
}
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.readers;
import io.nosqlbench.loader.hdf.writers.VectorWriter;
public interface HdfReader {
void setWriter(VectorWriter writer);
void read();
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.writers;
import java.util.concurrent.LinkedBlockingQueue;
public abstract class AbstractVectorWriter implements VectorWriter {
protected LinkedBlockingQueue<float[]> queue;
protected boolean shutdown = false;
public void setQueue(LinkedBlockingQueue<float[]> queue) {
this.queue = queue;
}
@Override
public void run() {
while (!shutdown || !queue.isEmpty()) {
try {
float[] vector = queue.take();
if (vector.length==0) {
break;
}
writeVector(vector);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
protected abstract void writeVector(float[] vector);
}

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.writers;
import com.datastax.oss.driver.api.core.CqlSession;
import com.datastax.oss.driver.api.core.cql.PreparedStatement;
import com.datastax.oss.driver.api.core.data.CqlVector;
import io.nosqlbench.loader.hdf.config.LoaderConfig;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.nio.file.Paths;
import java.util.Map;
public class AstraVectorWriter extends AbstractVectorWriter {
private static final Logger logger = LogManager.getLogger(AstraVectorWriter.class);
private final CqlSession session;
PreparedStatement insert_vector;
public AstraVectorWriter(LoaderConfig config) {
Map<String,String> astraParams = config.getAstra();
session = CqlSession.builder()
.withCloudSecureConnectBundle(Paths.get(astraParams.get("scb")))
.withAuthCredentials(astraParams.get("clientId"), astraParams.get("clientSecret"))
.withKeyspace(astraParams.get("keyspace"))
.build();
logger.info("Astra session initialized");
insert_vector = session.prepare(astraParams.get("query"));
}
//TODO: this is insanely slow. Needs work on threading/batching
@Override
protected void writeVector(float[] vector) {
Float[] vector2 = new Float[vector.length];
for (int i = 0; i < vector.length; i++) {
vector2[i] = vector[i];
}
CqlVector.Builder vectorBuilder = CqlVector.builder();
vectorBuilder.add(vector2);
session.execute(insert_vector.bind(getPartitionValue(vector), vectorBuilder.build()));
}
private String getPartitionValue(float[] vector) {
float sum = 0;
for (float f : vector) {
sum += f;
}
return String.valueOf(sum);
}
@Override
public void shutdown() {
shutdown = true;
}
}

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.writers;
import io.nosqlbench.loader.hdf.config.LoaderConfig;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.*;
public class FileVectorWriter extends AbstractVectorWriter {
private static final Logger logger = LogManager.getLogger(FileVectorWriter.class);
private final BufferedWriter targetFile;
public FileVectorWriter(LoaderConfig config) throws IOException {
String targetFileName = config.getTargetFile();
targetFile = new BufferedWriter(new FileWriter(targetFileName));
logger.info("Writing to file: " + targetFileName);
}
@Override
protected void writeVector(float[] vector) {
try {
targetFile.write("[");
for (int i = 0; i < vector.length; i++) {
targetFile.write(String.valueOf(vector[i]));
if (i < vector.length - 1) {
targetFile.write(",");
}
}
targetFile.write("]");
targetFile.write("\n");
targetFile.flush();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
@Override
public void shutdown() {
shutdown = true;
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.writers;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class NoopVectorWriter extends AbstractVectorWriter {
private static final Logger logger = LogManager.getLogger(NoopVectorWriter.class);
@Override
protected void writeVector(float[] vector) {
//No-op
logger.debug(vector);
}
@Override
public void shutdown() {
shutdown = true;
}
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.loader.hdf.writers;
import java.util.concurrent.LinkedBlockingQueue;
public interface VectorWriter extends Runnable {
void setQueue(LinkedBlockingQueue<float[]> queue);
void shutdown();
}

View File

@ -0,0 +1,13 @@
format: HDF5
sourceFile: /home/mwolters138/Downloads/h5ex_t_float.h5 #/home/mwolters138/Documents/hdf5/datasets/pass/glove-25-angular.hdf5
datasets:
- all
embedding: word2vec
writer: filewriter
astra:
scb: /home/mwolters138/Dev/testing/secure-connect-vector-correctness.zip
clientId: IvpdaZejwNuvWeupsIkWTHeL
clientSecret: .bxut2-OQL,dWunZeQbjZC0vMHd88UWXKS.xT,nl95zQC0B0xU9FzSWK3HSUGO11o_7pr7wG7+EMaZqegkKlr4fZ54__furPMtWPGiPp,2cZ1q15vrWwc9_-AcgeCbuf
keyspace: baselines128dot
query: INSERT INTO vectors25(key, value) VALUES (?,?)
targetFile: /home/mwolters138/vectors.txt

View File

@ -326,7 +326,7 @@
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<version>1.12.507</version>
<version>1.12.513</version>
</dependency>
<dependency>
<groupId>com.elega9t</groupId>
@ -341,7 +341,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
<version>3.13.0</version>
</dependency>
<dependency>
<groupId>com.squareup</groupId>

View File

@ -67,6 +67,7 @@
<module.adapter-kafka>adapter-kafka</module.adapter-kafka>
<module.adapter-kafka>adapter-amqp</module.adapter-kafka>
<module.adapter-jdbc>adapter-jdbc</module.adapter-jdbc>
<module.hdf-loader>hdf-loader</module.hdf-loader>
<!-- VIRTDATA MODULES -->
<module.virtdata-api>virtdata-api</module.virtdata-api>
@ -76,6 +77,7 @@
<module.virtdata-lib-random>virtdata-lib-random</module.virtdata-lib-random>
<module.virtdata-lib-curves4>virtdata-lib-curves4</module.virtdata-lib-curves4>
<module.virtdata-lib-realer>virtdata-lib-realer</module.virtdata-lib-realer>
<module.virtdata-lib-realer>virtdata-lib-hdf5</module.virtdata-lib-realer>
<module.virtdata-userlibs>virtdata-userlibs</module.virtdata-userlibs>
</properties>
<modules>
@ -114,6 +116,7 @@
<module>adapter-amqp</module>
<module>adapter-jdbc</module>
<module>adapter-pinecone</module>
<module>hdf-loader</module>
<!-- VIRTDATA MODULES -->
<module>virtdata-api</module>
@ -123,6 +126,7 @@
<module>virtdata-lib-random</module>
<module>virtdata-lib-curves4</module>
<module>virtdata-lib-realer</module>
<module>virtdata-lib-hdf5</module>
<module>virtdata-userlibs</module>
<!-- Documentation -->

View File

@ -82,6 +82,7 @@
<version>5.1.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

62
virtdata-lib-hdf5/pom.xml Normal file
View File

@ -0,0 +1,62 @@
<!--
~ Copyright (c) 2023 nosqlbench
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>mvn-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>${revision}</version>
<relativePath>../mvn-defaults</relativePath>
</parent>
<artifactId>virtdata-lib-hdf5</artifactId>
<packaging>jar</packaging>
<name>virtdata-lib-hdf5</name>
<url>http://nosqlbench.io/</url>
<description>With inspiration from other libraries</description>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>virtdata-lib-basics</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>io.jhdf</groupId>
<artifactId>jhdf</artifactId>
<version>0.6.10</version>
</dependency>
</dependencies>
<build>
<testResources>
<testResource>
<directory>src/test/resources</directory>
<excludes>
<exclude>h5ex_t_float.h5</exclude>
</excludes>
<filtering>true</filtering>
</testResource>
</testResources>
</build>
</project>

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long;
import io.jhdf.HdfFile;
import io.jhdf.api.Dataset;
import io.nosqlbench.api.content.NBIO;
import java.nio.file.Paths;
public abstract class AbstractHdfFileToVector {
protected final HdfFile hdfFile;
protected final Dataset dataset;
protected final int[] dims;
public AbstractHdfFileToVector(String filename, String datasetName) {
//hdfFile = new HdfFile(NBIO.all().search(filename).first().get().asPath());
hdfFile = new HdfFile(Paths.get(filename));
//TODO: implement a function to get the dataset by name only without needing the full path
dataset = hdfFile.getDatasetByPath(datasetName);
dims = dataset.getDimensions();
}
protected Object getDataFrom(long l) {
long[] sliceOffset = new long[dims.length];
sliceOffset[0] = (l % dims[0]);
int[] sliceDimensions = new int[dims.length];
sliceDimensions[0] = 1;
// Do we want to give the option of reducing vector dimensions here?
sliceDimensions[1] = dims[1];
return dataset.getData(sliceOffset, sliceDimensions);
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_array;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVector;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory;
import java.util.function.LongFunction;
/**
* This function reads a vector dataset from an HDF5 file. The dataset itself is not
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
* reads a single vector from the dataset, based on the long input value. As currently
* written this class will only work for datasets with 2 dimensions where the 1st dimension
* specifies the number of vectors and the 2nd dimension specifies the number of elements in
* each vector. Only datatypes short, int, and float are supported at this time.
* <p>
* This implementation is specific to returning an array of floats
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfFileToVectorArray extends AbstractHdfFileToVector implements LongFunction<float[]> {
private final EmbeddingGenerator embeddingGenerator;
public HdfFileToVectorArray(String filename, String datasetName) {
super(filename, datasetName);
embeddingGenerator = EmbeddingGeneratorFactory.getGenerator(dataset.getJavaType().getSimpleName().toLowerCase());
}
@Override
public float[] apply(long l) {
Object data = getDataFrom(l);
return embeddingGenerator.generateArrayEmbeddingFrom(data, dims);
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_list;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVector;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator;
import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory;
import java.util.List;
import java.util.function.LongFunction;
/**
* This function reads a vector dataset from an HDF5 file. The dataset itself is not
* read into memory, only the metadata (the "dataset" Java Object). The lambda function
* reads a single vector from the dataset, based on the long input value. As currently
* written this class will only work for datasets with 2 dimensions where the 1st dimension
* specifies the number of vectors and the 2nd dimension specifies the number of elements in
* each vector. Only datatypes short, int, and float are supported at this time.
* <p>
* This implementation is specific to returning a List of Floats, so as to work with the
* normalization functions e.g. NormalizeListVector and its variants.
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfFileToVectorList extends AbstractHdfFileToVector implements LongFunction<List<Float>> {
private final EmbeddingGenerator embeddingGenerator;
public HdfFileToVectorList(String filename, String datasetName) {
super(filename, datasetName);
embeddingGenerator = EmbeddingGeneratorFactory.getGenerator(dataset.getJavaType().getSimpleName().toLowerCase());
}
@Override
public List<Float> apply(long l) {
Object data = getDataFrom(l);
return embeddingGenerator.generateListEmbeddingFrom(data, dims);
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.helpers;
import java.util.List;
public class DoubleEmbeddingGenerator implements EmbeddingGenerator {
@Override
public List<Float> generateListEmbeddingFrom(Object o, int[] dims) {
// in this case o will always be double[1][x]
double[] vector = ((double[][]) o)[0];
Float[] vector2 = new Float[vector.length];
for (int i = 0; i < vector.length; i++) {
vector2[i] = (float) vector[i];
}
return List.of(vector2);
}
@Override
public float[] generateArrayEmbeddingFrom(Object o, int[] dims) {
double[] vector = ((double[][]) o)[0];
float[] vector2 = new float[vector.length];
for (int i = 0; i < vector.length; i++) {
vector2[i] = (float) vector[i];
}
return vector2;
}
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.helpers;
import java.util.List;
public interface EmbeddingGenerator {
List<Float> generateListEmbeddingFrom(Object o, int[] dims);
float[] generateArrayEmbeddingFrom(Object o, int[] dims);
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.helpers;
import java.util.HashMap;
import java.util.Map;
public class EmbeddingGeneratorFactory {
private static final Map<String, EmbeddingGenerator> generators = new HashMap<>();
public static EmbeddingGenerator getGenerator(String type) {
String typeLower = type.equalsIgnoreCase("short") ? "int" : type.toLowerCase();
switch (typeLower) {
case "float" -> {
if (!generators.containsKey(type)) {
generators.put(type, new FloatEmbeddingGenerator());
}
return generators.get(type);
}
case "int" -> {
if (!generators.containsKey(type)) {
generators.put(type, new IntEmbeddingGenerator());
}
return generators.get(type);
}
case "double" -> {
if (!generators.containsKey(type)) {
generators.put(type, new DoubleEmbeddingGenerator());
}
return generators.get(type);
}
default -> throw new RuntimeException("Unknown embedding type: " + type);
}
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.helpers;
import java.util.List;
public class FloatEmbeddingGenerator implements EmbeddingGenerator {
@Override
public List<Float> generateListEmbeddingFrom(Object o, int[] dims) {
// in this case o will always be float[1][x]
float[] vector = ((float[][]) o)[0];
Float[] vector2 = new Float[vector.length];
for (int i = 0; i < vector.length; i++) {
vector2[i] = vector[i];
}
return List.of(vector2);
}
@Override
public float[] generateArrayEmbeddingFrom(Object o, int[] dims) {
return ((float[][]) o)[0];
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.helpers;
import java.util.List;
public class IntEmbeddingGenerator implements EmbeddingGenerator {
@Override
public List<Float> generateListEmbeddingFrom(Object o, int[] dims) {
// in this case o will always be int[1][x]
int[] vector = ((int[][]) o)[0];
Float[] vector2 = new Float[vector.length];
for (int i = 0; i < vector.length; i++) {
vector2[i] = (float) vector[i];
}
return List.of(vector2);
}
@Override
public float[] generateArrayEmbeddingFrom(Object o, int[] dims) {
int[] vector = ((int[][]) o)[0];
float[] vector2 = new float[vector.length];
for (int i = 0; i < vector.length; i++) {
vector2[i] = (float) vector[i];
}
return vector2;
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_array;
import org.junit.jupiter.api.Test;
public class HdfFileToArrayTest {
@Test
public void testHdfFileToVector() {
final float[][] results = new float[][]{
{0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f},
{2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f},
{4.0f,2.3333333f,2.8f,3.5714285f,4.4444447f,5.3636365f,6.3076925f},
{6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f}
};
HdfFileToVectorArray hdfFileToVector = new HdfFileToVectorArray(
"src/test/resources/h5ex_t_float.h5",
"/DS1");
float[] read;
for (int i = 0; i < 4; i++) {
read = hdfFileToVector.apply(i);
for (int j = 0; j < 7; j++) {
assert (read[j] == results[i][j]);
}
}
}
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2023 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_list;
import org.junit.jupiter.api.Test;
import java.util.List;
public class HdfFileToVectorTest {
@Test
public void testHdfFileToVector() {
final float[][] results = new float[][]{
{0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f},
{2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f},
{4.0f,2.3333333f,2.8f,3.5714285f,4.4444447f,5.3636365f,6.3076925f},
{6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f}
};
HdfFileToVectorList hdfFileToVector = new HdfFileToVectorList(
"src/test/resources/h5ex_t_float.h5",
"/DS1");
List<Float> read;
for (int i = 0; i < 4; i++) {
read = hdfFileToVector.apply(i);
for (int j = 0; j < 7; j++) {
assert (read.get(j) == results[i][j]);
}
}
}
}

Binary file not shown.

View File

@ -66,6 +66,13 @@
<artifactId>virtdata-lib-curves4</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>virtdata-lib-hdf5</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>docsys</artifactId>