diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml
deleted file mode 100644
index 3c4f3e463..000000000
--- a/hdf-loader/pom.xml
+++ /dev/null
@@ -1,94 +0,0 @@
-
-
-
-
- 4.0.0
- hdf-loader
-
- jar
-
-
- mvn-defaults
- io.nosqlbench
- ${revision}
- ../mvn-defaults
-
-
- ${project.artifactId}
-
-
-
-
- org.snakeyaml
- snakeyaml-engine
- 2.6
-
-
- org.yaml
- snakeyaml
- 2.0
-
-
-
- com.datastax.oss
- java-driver-core
- 4.16.0
-
-
-
-
- com.fasterxml.jackson.core
- jackson-core
- 2.15.2
-
-
-
-
- org.deeplearning4j
- deeplearning4j-core
- 1.0.0-M2.1
-
-
-
- org.nd4j
- nd4j-native
- 1.0.0-M2.1
-
-
-
- org.deeplearning4j
- deeplearning4j-nlp
- 1.0.0-M2.1
-
-
-
- io.jhdf
- jhdf
- 0.6.10
-
-
- io.nosqlbench
- nb-api
- 5.17.3-SNAPSHOT
- compile
-
-
-
-
-
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java
deleted file mode 100644
index 1a6ba6fa6..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf;
-
-import io.nosqlbench.loader.hdf.config.LoaderConfig;
-import io.nosqlbench.loader.hdf.readers.Hdf5Reader;
-import io.nosqlbench.loader.hdf.readers.HdfReader;
-import io.nosqlbench.loader.hdf.writers.AstraVectorWriter;
-import io.nosqlbench.loader.hdf.writers.FileVectorWriter;
-import io.nosqlbench.loader.hdf.writers.NoopVectorWriter;
-import io.nosqlbench.loader.hdf.writers.VectorWriter;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-public class HdfLoader {
- private static final Logger logger = LogManager.getLogger(HdfLoader.class);
- public static final String FILEWRITER = "filewriter";
- public static final String ASTRA = "astra";
- public static final String NOOP = "noop";
- public static final String HDF5 = "hdf5";
- public static final String HDF4 = "hdf4";
-
- public static void main (String[] args) {
- if (args.length == 0) {
- System.out.println("Usage: hdf-loader ");
- System.exit(1);
- }
- try {
- LoaderConfig config = new LoaderConfig(args[0]);
- logger.info("Starting loader with config: " + config);
- HdfReader reader = null;
- VectorWriter writer = null;
-
- String format = config.getFormat();
- switch (format.toLowerCase()) {
- case HDF4 -> {
- logger.info("HDF4 format not yet supported");
- System.exit(1);
- }
- case HDF5 -> {
- logger.info("HDF5 format selected");
- reader = new Hdf5Reader(config);
- }
- default -> {
- logger.info("Unknown format: " + format);
- System.exit(1);
- }
- }
-
- String writerType = config.getWriter();
- logger.info("Using writer type: " + writerType);
- switch (writerType.toLowerCase()) {
- case FILEWRITER -> writer = new FileVectorWriter(config);
- case ASTRA -> writer = new AstraVectorWriter(config);
- case NOOP -> writer = new NoopVectorWriter();
- default -> {
- logger.info("Unknown writer type: " + writerType);
- System.exit(1);
- }
- }
- reader.setWriter(writer);
- logger.info("Starting main read loop");
- reader.read();
- } catch (Exception e) {
- logger.error(e);
- System.exit(1);
- }
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java
deleted file mode 100644
index f8c02137f..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.config;
-
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.yaml.snakeyaml.Yaml;
-
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-public class LoaderConfig {
- private static final Logger logger = LogManager.getLogger(LoaderConfig.class);
- private static final Yaml yaml = new Yaml();
- private final Map configMap;
-
- public LoaderConfig(String filePath) throws IOException {
- FileReader fileReader = new FileReader(filePath);
- configMap = yaml.load(fileReader);
- for (Map.Entry entry : configMap.entrySet()) {
- logger.debug(entry.getKey() + " : " + entry.getValue());
- }
- }
-
- public Object getRawValue(String key) {
- return configMap.get(key);
- }
-
- public String getStringValue(String key) {
- return configMap.get(key).toString();
- }
-
- public List getDatasets() {
- return (List) configMap.get("datasets");
- }
-
- public String getFormat() {
- return (String) configMap.getOrDefault("format", "HD5");
- }
-
- public Map getAstra() {
- return (Map) configMap.get("astra");
- }
-
- public String getEmbedding() {
- return (String) configMap.getOrDefault("embedding", "Deeplearning4j");
- }
-
- public String getWriter() {
- return (String) configMap.getOrDefault("writer", "filewriter");
- }
-
- public String getSourceFile() {
- return (String) configMap.get("sourceFile");
- }
-
- public String getTargetFile() {
- return (String) configMap.getOrDefault("targetFile", "./vectors.txt");
- }
-
- public int getThreads() {
- return (int) configMap.getOrDefault("threads", 5);
- }
-
- public int getQueueSize() {
- return (int) configMap.getOrDefault("queueSize", 1000);
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java
deleted file mode 100644
index 07b96dfea..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.embedding;
-
-public class DoubleEmbeddingGenerator implements EmbeddingGenerator {
-
- @Override
- public float[][] generateEmbeddingFrom(Object o, int[] dims) {
- return switch (dims.length) {
- case 1 -> new float[][]{convertToFloat((double[]) o)};
- case 2 -> convertToFloats((double[][]) o);
- case 3 -> flatten(o, dims);
- default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
- };
- }
-
- private float[][] convertToFloats(double[][] o) {
- float[][] floats = new float[o.length][];
- for (int i = 0; i < o.length; i++) {
- floats[i] = convertToFloat(o[i]);
- }
- return floats;
- }
-
- public float[] convertToFloat(double[] doubleArray) {
- if (doubleArray == null) {
- return null;
- }
- float[] floatArray = new float[doubleArray.length];
- for (int i = 0; i < doubleArray.length; i++) {
- floatArray[i] = (float) doubleArray[i];
- }
- return floatArray;
- }
-
- private float[][] flatten(Object o, int[] dims) {
- double[][][] arr = (double[][][]) o;
- float[][] flat = new float[dims[0]][dims[1] * dims[2]];
- for (int i = 0; i < dims[0]; i++) {
- for (int j = 0; j < dims[1]; j++) {
- for (int k = 0; k < dims[2]; k++) {
- flat[i][j * dims[2] + k] = (float)arr[i][j][k];
- }
- }
- }
- return flat;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java
deleted file mode 100644
index 22fcad5ed..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.embedding;
-
-public interface EmbeddingGenerator {
- float[][] generateEmbeddingFrom(Object o, int[] dims);
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java
deleted file mode 100644
index a7b677e65..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.embedding;
-
-import java.util.HashMap;
-import java.util.Map;
-
-public class EmbeddingGeneratorFactory {
- private static final Map generators = new HashMap<>();
-
- public static EmbeddingGenerator getGenerator(String type) {
- String typeLower = type.equalsIgnoreCase("short") ? "int" : type.toLowerCase();
- if (typeLower.equals("integer")) typeLower = "int";
- switch (typeLower) {
- case "string" -> {
- if (!generators.containsKey(type)) {
- generators.put(type, new StringEmbeddingGenerator());
- }
- return generators.get(type);
- }
- case "float" -> {
- if (!generators.containsKey(type)) {
- generators.put(type, new FloatEmbeddingGenerator());
- }
- return generators.get(type);
- }
- case "double" -> {
- if (!generators.containsKey(type)) {
- generators.put(type, new DoubleEmbeddingGenerator());
- }
- return generators.get(type);
- }
- case "int" -> {
- if (!generators.containsKey(type)) {
- generators.put(type, new IntEmbeddingGenerator());
- }
- return generators.get(type);
- }
- default -> throw new RuntimeException("Unknown embedding type: " + type);
- }
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java
deleted file mode 100644
index 9245e53f5..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.embedding;
-
-public class FloatEmbeddingGenerator implements EmbeddingGenerator {
-
- @Override
- public float[][] generateEmbeddingFrom(Object o, int[] dims) {
- return switch (dims.length) {
- case 1 -> new float[][]{(float[]) o};
- case 2 -> (float[][]) o;
- case 3 -> flatten(o, dims);
- default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
- };
- }
-
- private float[][] flatten(Object o, int[] dims) {
- float[][][] arr = (float[][][]) o;
- float[][] flat = new float[dims[0]][dims[1] * dims[2]];
- for (int i = 0; i < dims[0]; i++) {
- for (int j = 0; j < dims[1]; j++) {
- if (dims[2] >= 0) System.arraycopy(arr[i][j], 0, flat[i], j * dims[2] + 0, dims[2]);
- }
- }
- return flat;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/IntEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/IntEmbeddingGenerator.java
deleted file mode 100644
index c4f0c1988..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/IntEmbeddingGenerator.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.embedding;
-
-public class IntEmbeddingGenerator implements EmbeddingGenerator {
- @Override
- public float[][] generateEmbeddingFrom(Object o, int[] dims) {
- switch (dims.length) {
- case 1 -> {
- float[] arr = new float[dims[0]];
- for (int i = 0; i < dims[0]; i++) {
- arr[i] = ((int[]) o)[i];
- }
- return new float[][]{arr};
- }
- case 2 -> {
- float[][] arr = new float[dims[0]][dims[1]];
- for (int i = 0; i < dims[0]; i++) {
- for (int j = 0; j < dims[1]; j++) {
- arr[i][j] = ((int[][]) o)[i][j];
- }
- }
- return arr;
- }
- case 3 -> {
- return flatten(o, dims);
- }
- default ->
- throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
- }
- }
-
- private float[][] flatten(Object o, int[] dims) {
- int[][][] arr = (int[][][]) o;
- float[][] flat = new float[dims[0]][dims[1] * dims[2]];
- for (int i = 0; i < dims[0]; i++) {
- for (int j = 0; j < dims[1]; j++) {
- for (int k = 0; k < dims[2]; k++) {
- flat[i][j * dims[2] + k] = arr[i][j][k];
- }
- }
- }
- return flat;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java
deleted file mode 100644
index 01ffb9af4..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.embedding;
-
-import org.deeplearning4j.models.word2vec.Word2Vec;
-import org.deeplearning4j.text.sentenceiterator.BasicLineIterator;
-import org.deeplearning4j.text.sentenceiterator.CollectionSentenceIterator;
-import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
-import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
-import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
-
-import java.util.Arrays;
-import java.util.Collections;
-
-public class StringEmbeddingGenerator implements EmbeddingGenerator {
- private final TokenizerFactory tokenizerFactory= new DefaultTokenizerFactory();
-
- @Override
- public float[][] generateEmbeddingFrom(Object o, int[] dims) {
- switch (dims.length) {
- case 1 -> {
- return generateWordEmbeddings((String[]) o);
- }
- default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length);
- }
-
- }
-
- private float[][] generateWordEmbeddings(String[] text) {
- SentenceIterator iter = new CollectionSentenceIterator(Collections.singletonList(text));
- /*Word2Vec vec = new Word2Vec.Builder()
- .minWordFrequency(1)
- .iterations(1)
- .layerSize(targetDims)
- .seed(42)
- .windowSize(5)
- .iterate(iter)
- .tokenizerFactory(tokenizerFactory)
- .build();
-*/
- return null;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java
deleted file mode 100644
index af3810202..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.readers;
-
-import io.jhdf.HdfFile;
-import io.jhdf.api.Dataset;
-import io.jhdf.api.Group;
-import io.jhdf.api.Node;
-import io.nosqlbench.loader.hdf.config.LoaderConfig;
-import io.nosqlbench.loader.hdf.embedding.EmbeddingGenerator;
-import io.nosqlbench.loader.hdf.writers.VectorWriter;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import static io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory.getGenerator;
-
-public class Hdf5Reader implements HdfReader {
- private static final Logger logger = LogManager.getLogger(Hdf5Reader.class);
- public static final String ALL = "all";
- private VectorWriter writer;
- private final LoaderConfig config;
- private final ExecutorService executorService;
- private final LinkedBlockingQueue queue;
- private List datasets;
- private final float[] SHUTDOWN = new float[0];
- public Hdf5Reader(LoaderConfig config) {
- this.config = config;
- executorService = Executors.newCachedThreadPool();
- queue = new LinkedBlockingQueue<>(config.getQueueSize());
- }
-
- @Override
- public void setWriter(VectorWriter writer) {
- this.writer = writer;
- writer.setQueue(queue);
- }
-
- public void extractDatasets(Group parent) {
- Map nodes = parent.getChildren();
- for (String key : nodes.keySet()) {
- Node node = nodes.get(key);
- if (node instanceof Dataset) {
- datasets.add(node.getPath());
- }
- else if (node.isGroup()) {
- extractDatasets((Group) node);
- }
- }
- }
-
- @Override
- public void read() {
- HdfFile hdfFile = new HdfFile(Paths.get(config.getSourceFile()));
- datasets = config.getDatasets();
- if (datasets.get(0).equalsIgnoreCase(ALL)) {
- extractDatasets(hdfFile);
- }
- List> futures = new ArrayList<>();
- executorService.submit(writer);
- for (String ds : datasets) {
- if (ds.equalsIgnoreCase(ALL)) {
- continue;
- }
- Future> future = executorService.submit(() -> {
- logger.info("Processing dataset: " + ds);
- Dataset dataset = hdfFile.getDatasetByPath(ds);
- int[] dims = dataset.getDimensions();
- String type = dataset.getJavaType().getSimpleName().toLowerCase();
- EmbeddingGenerator generator = getGenerator(type);
- Object data;
- if (dataset.getSizeInBytes() > Integer.MAX_VALUE) {
- logger.info("slicing large dataset: " + ds);
- // TODO: For now this will be implemented to handle numeric types with
- // 2 dimensions where the 1st dimension is the number of vectors and the 2nd
- // dimension is the number of dimensions in the vector.
- long[] sliceOffset = new long[dims.length];
- int[] sliceDimensions = new int[dims.length];
- sliceDimensions[1] = dims[1];
- int noOfSlices = (int) (dataset.getSizeInBytes() / Integer.MAX_VALUE) + 1;
- int sliceSize = dims[0] / noOfSlices;
- for (int i = 0; i < noOfSlices; i++) {
- sliceOffset[0] = (long) i * sliceSize;
- sliceDimensions[0] = sliceSize;
- data = dataset.getData(sliceOffset, sliceDimensions);
- float[][] vectors = generator.generateEmbeddingFrom(data, dims);
- for (float[] vector : vectors) {
- try {
- queue.put(vector);
- } catch (InterruptedException e) {
- logger.error(e.getMessage(), e);
- }
- }
- }
- } else {
- data = dataset.getData();
- float[][] vectors = generator.generateEmbeddingFrom(data, dims);
- for (float[] vector : vectors) {
- try {
- queue.put(vector);
- } catch (InterruptedException e) {
- logger.error(e.getMessage(), e);
- }
- }
- }
- });
- futures.add(future);
- }
- for (Future> future : futures) {
- try {
- future.get();
- } catch (Exception e) {
- logger.error(e.getMessage(), e);
- }
- }
- hdfFile.close();
- writer.shutdown();
- try {
- queue.put(SHUTDOWN);
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- executorService.shutdown();
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java
deleted file mode 100644
index f9304e6c9..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.readers;
-
-import io.nosqlbench.loader.hdf.writers.VectorWriter;
-
-public interface HdfReader {
- void setWriter(VectorWriter writer);
-
- void read();
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java
deleted file mode 100644
index 4c1c070e3..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.writers;
-
-import java.util.concurrent.LinkedBlockingQueue;
-
-public abstract class AbstractVectorWriter implements VectorWriter {
- protected LinkedBlockingQueue queue;
- protected boolean shutdown = false;
-
- public void setQueue(LinkedBlockingQueue queue) {
- this.queue = queue;
- }
-
- @Override
- public void run() {
- while (!shutdown || !queue.isEmpty()) {
- try {
- float[] vector = queue.take();
- if (vector.length==0) {
- break;
- }
- writeVector(vector);
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- }
- }
-
- protected abstract void writeVector(float[] vector);
-
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java
deleted file mode 100644
index 29bbf6191..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.writers;
-
-import com.datastax.oss.driver.api.core.CqlSession;
-import com.datastax.oss.driver.api.core.cql.PreparedStatement;
-import com.datastax.oss.driver.api.core.data.CqlVector;
-import io.nosqlbench.loader.hdf.config.LoaderConfig;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.nio.file.Paths;
-import java.util.Map;
-
-public class AstraVectorWriter extends AbstractVectorWriter {
- private static final Logger logger = LogManager.getLogger(AstraVectorWriter.class);
- private final CqlSession session;
- PreparedStatement insert_vector;
-
- public AstraVectorWriter(LoaderConfig config) {
- Map astraParams = config.getAstra();
- session = CqlSession.builder()
- .withCloudSecureConnectBundle(Paths.get(astraParams.get("scb")))
- .withAuthCredentials(astraParams.get("clientId"), astraParams.get("clientSecret"))
- .withKeyspace(astraParams.get("keyspace"))
- .build();
- logger.info("Astra session initialized");
- insert_vector = session.prepare(astraParams.get("query"));
- }
-//TODO: this is insanely slow. Needs work on threading/batching
- @Override
- protected void writeVector(float[] vector) {
- Float[] vector2 = new Float[vector.length];
- for (int i = 0; i < vector.length; i++) {
- vector2[i] = vector[i];
- }
- CqlVector.Builder vectorBuilder = CqlVector.builder();
- vectorBuilder.add(vector2);
- session.execute(insert_vector.bind(getPartitionValue(vector), vectorBuilder.build()));
- }
-
- private String getPartitionValue(float[] vector) {
- float sum = 0;
- for (float f : vector) {
- sum += f;
- }
- return String.valueOf(sum);
- }
-
- @Override
- public void shutdown() {
- shutdown = true;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java
deleted file mode 100644
index 710b419d3..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.writers;
-
-import io.nosqlbench.loader.hdf.config.LoaderConfig;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.io.*;
-
-public class FileVectorWriter extends AbstractVectorWriter {
- private static final Logger logger = LogManager.getLogger(FileVectorWriter.class);
- private final BufferedWriter targetFile;
- public FileVectorWriter(LoaderConfig config) throws IOException {
- String targetFileName = config.getTargetFile();
- targetFile = new BufferedWriter(new FileWriter(targetFileName));
- logger.info("Writing to file: " + targetFileName);
- }
-
- @Override
- protected void writeVector(float[] vector) {
- try {
- targetFile.write("[");
- for (int i = 0; i < vector.length; i++) {
- targetFile.write(String.valueOf(vector[i]));
- if (i < vector.length - 1) {
- targetFile.write(",");
- }
- }
- targetFile.write("]");
- targetFile.write("\n");
- targetFile.flush();
- } catch (IOException e) {
- logger.error(e.getMessage(), e);
- }
- }
-
- @Override
- public void shutdown() {
- shutdown = true;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java
deleted file mode 100644
index 51788ac4f..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.writers;
-
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-public class NoopVectorWriter extends AbstractVectorWriter {
- private static final Logger logger = LogManager.getLogger(NoopVectorWriter.class);
-
- @Override
- protected void writeVector(float[] vector) {
- //No-op
- logger.debug(vector);
- }
-
- @Override
- public void shutdown() {
- shutdown = true;
- }
-}
diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java
deleted file mode 100644
index 7e1da2edb..000000000
--- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2023 nosqlbench
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.nosqlbench.loader.hdf.writers;
-
-import java.util.concurrent.LinkedBlockingQueue;
-
-public interface VectorWriter extends Runnable {
- void setQueue(LinkedBlockingQueue queue);
-
- void shutdown();
-}
diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml
deleted file mode 100644
index 4a117f73b..000000000
--- a/hdf-loader/src/main/resources/config.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-format: HDF5
-sourceFile: <>
-datasets:
- - all
-embedding: word2vec
-writer: filewriter
-astra:
- scb: <>
- clientId: <>
- clientSecret: <>
- keyspace: <>
- query: INSERT INTO vectors25(key, value) VALUES (?,?)
-targetFile: <>
-
diff --git a/pom.xml b/pom.xml
index c1aee8050..35ff05c37 100644
--- a/pom.xml
+++ b/pom.xml
@@ -66,8 +66,6 @@
adapter-amqp
adapter-jdbc
-
-
virtdata-api
virtdata-lang
@@ -114,7 +112,6 @@
adapter-jdbc
adapter-pgvector
adapter-pinecone
-
virtdata-api