From 04bab325db124b1402ee083dbc5b26ebe4f26649 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Tue, 25 Jul 2023 21:19:02 -0500 Subject: [PATCH 01/52] fix: upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.500 to 1.12.501 (#1417) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.500 to 1.12.501. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-dynamodb/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/db3dfb82-467b-4263-94f8-28f933540a6d?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- adapter-dynamodb/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-dynamodb/pom.xml b/adapter-dynamodb/pom.xml index f796250e5..0c710d880 100644 --- a/adapter-dynamodb/pom.xml +++ b/adapter-dynamodb/pom.xml @@ -43,7 +43,7 @@ com.amazonaws aws-java-sdk-dynamodb - 1.12.500 + 1.12.501 From 80a23ffedc42c992f591dd3e3134291d6a331b1e Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Wed, 26 Jul 2023 08:22:57 -0500 Subject: [PATCH 02/52] fix: upgrade com.github.oshi:oshi-core-java11 from 6.4.3 to 6.4.4 (#1419) Snyk has created this PR to upgrade com.github.oshi:oshi-core-java11 from 6.4.3 to 6.4.4. See this package in Maven Repository: https://mvnrepository.com/artifact/com.github.oshi/oshi-core-java11/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 377bbf1c2..3557097ac 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -316,7 +316,7 @@ com.github.oshi oshi-core-java11 - 6.4.3 + 6.4.4 com.google.code.gson From b186ea5ce06af5afa9906192b1c7d44fa6619399 Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Wed, 26 Jul 2023 08:54:23 -0500 Subject: [PATCH 03/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.498 to 1.12.501 (#1418) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.498 to 1.12.501. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 3557097ac..609ae08ae 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.498 + 1.12.501 com.elega9t From 654267b524cc552a8898ea6f93ed4bb44a8c76bd Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Thu, 27 Jul 2023 14:59:01 -0400 Subject: [PATCH 04/52] initial commit --- hdf-loader/pom.xml | 63 ++++++++++++++ .../io/nosqlbench/loader/hdf/HdfLoader.java | 82 +++++++++++++++++++ .../loader/hdf/config/LoaderConfig.java | 77 +++++++++++++++++ .../loader/hdf/readers/Hdf5Reader.java | 70 ++++++++++++++++ .../loader/hdf/readers/HdfReader.java | 27 ++++++ .../loader/hdf/readers/HdfReaders.java | 23 ++++++ .../loader/hdf/writers/AstraVectorWriter.java | 25 ++++++ .../loader/hdf/writers/FileVectorWriter.java | 25 ++++++ .../loader/hdf/writers/VectorWriter.java | 21 +++++ .../loader/hdf/writers/VectorWriters.java | 23 ++++++ hdf-loader/src/main/resources/config.yaml | 15 ++++ 11 files changed, 451 insertions(+) create mode 100644 hdf-loader/pom.xml create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java create mode 100644 hdf-loader/src/main/resources/config.yaml diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml new file mode 100644 index 000000000..9917f6091 --- /dev/null +++ b/hdf-loader/pom.xml @@ -0,0 +1,63 @@ + + + + + 4.0.0 + hdf-loader + + jar + + + mvn-defaults + io.nosqlbench + ${revision} + ../mvn-defaults + + + ${project.artifactId} + + + 17 + 17 + UTF-8 + + + + + + org.snakeyaml + snakeyaml-engine + 2.6 + + + org.yaml + snakeyaml + 2.0 + + + + org.hdfgroup + hdf-java + 2.6.1 + + + + + diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java new file mode 100644 index 000000000..51114adda --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf; + +import io.nosqlbench.loader.hdf.config.LoaderConfig; +import io.nosqlbench.loader.hdf.readers.HdfReaders; +import io.nosqlbench.loader.hdf.readers.Hdf5Reader; +import io.nosqlbench.loader.hdf.readers.HdfReader; +import io.nosqlbench.loader.hdf.writers.AstraVectorWriter; +import io.nosqlbench.loader.hdf.writers.FileVectorWriter; +import io.nosqlbench.loader.hdf.writers.VectorWriter; +import io.nosqlbench.loader.hdf.writers.VectorWriters; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; + +public class HdfLoader { + private static final Logger logger = LogManager.getLogger(HdfLoader.class); + + public static void main (String[] args) { + if (args.length == 0) { + System.out.println("Usage: hdf-loader "); + System.exit(1); + } + try { + LoaderConfig config = new LoaderConfig(args[0]); + HdfReader reader = null; + VectorWriter writer = null; + + String format = config.getFormat(); + switch (HdfReaders.valueOf(format)) { + case HDF4 -> { + logger.info("HDF4 format not yet supported"); + System.exit(1); + } + case HDF5 -> { + reader = new Hdf5Reader(config); + } + default -> { + logger.info("Unknown format: " + format); + System.exit(1); + } + } + + String writerType = config.getWriter(); + switch (VectorWriters.valueOf(writerType)) { + case filewriter -> { + writer = new FileVectorWriter(config); + } + case astra -> { + writer = new AstraVectorWriter(config); + } + default -> { + logger.info("Unknown writer type: " + writerType); + System.exit(1); + } + } + reader.setWriter(writer); + reader.read(); + } catch (Exception e) { + logger.error(e); + System.exit(1); + } + + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java new file mode 100644 index 000000000..5816cf925 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.config; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.yaml.snakeyaml.Yaml; + +import java.io.FileReader; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public class LoaderConfig { + private static final Logger logger = LogManager.getLogger(LoaderConfig.class); + private static final Yaml yaml = new Yaml(); + private final Map configMap; + + public LoaderConfig(String filePath) throws IOException { + FileReader fileReader = new FileReader(filePath); + configMap = yaml.load(fileReader); + for (Map.Entry entry : configMap.entrySet()) { + logger.debug(entry.getKey() + " : " + entry.getValue()); + } + } + + public Object getRawValue(String key) { + return configMap.get(key); + } + + public String getStringValue(String key) { + return configMap.get(key).toString(); + } + + public List> getDatasets() { + return (List>) configMap.get("datasets"); + } + + public String getFormat() { + return (String) configMap.getOrDefault("format", "HD5"); + } + + public Map getAstra() { + return (Map) configMap.get("astra"); + } + + public String getEmbedding() { + return (String) configMap.getOrDefault("embedding", "Deeplearning4j"); + } + + public String getWriter() { + return (String) configMap.getOrDefault("writer", "filewriter"); + } + + public String getSourceFile() { + return (String) configMap.get("sourceFile"); + } + + public int getThreads() { + return (int) configMap.getOrDefault("threads", 1); + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java new file mode 100644 index 000000000..1adcdfc3a --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.readers; + +import io.nosqlbench.loader.hdf.config.LoaderConfig; +import io.nosqlbench.loader.hdf.writers.VectorWriter; + +import ncsa.hdf.hdf5lib.H5; +import ncsa.hdf.hdf5lib.HDF5Constants; +import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class Hdf5Reader implements HdfReader { + private static final Logger logger = LogManager.getLogger(Hdf5Reader.class); + private VectorWriter writer; + private final LoaderConfig config; + private final ExecutorService executorService; + public Hdf5Reader(LoaderConfig config) { + this.config = config; + executorService = Executors.newFixedThreadPool(config.getThreads()); + } + + @Override + public void setWriter(VectorWriter writer) { + this.writer = writer; + } + + @Override + public void read() throws HDF5LibraryException { + String sourceFile = config.getSourceFile(); + int fileId = H5.H5Fopen(sourceFile, HDF5Constants.H5F_ACC_RDONLY, HDF5Constants.H5P_DEFAULT); + for (Map dataset : config.getDatasets()) { + executorService.submit(() -> { + // Your lambda code that runs in a separate thread for each object + logger.info("Processing dataset: " + dataset.get("name")); + try { + int datasetId = H5.H5Dopen(fileId, dataset.get("name")); + // Get the dataspace of the dataset + int dataspaceId = H5.H5Dget_space(datasetId); + + // Get the number of dimensions in the dataspace + int numDimensions = H5.H5Sget_simple_extent_ndims(dataspaceId); + long[] dims = new long[numDimensions]; + } catch (HDF5LibraryException e) { + logger.error(e); + } + }); + } + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java new file mode 100644 index 000000000..48c9b5a49 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.readers; + +import io.nosqlbench.loader.hdf.writers.VectorWriter; +import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; + +public interface HdfReader { + void setWriter(VectorWriter writer); + + void read() throws HDF5LibraryException; +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java new file mode 100644 index 000000000..59aa29ebf --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.readers; + +public enum HdfReaders { + HDF4, + HDF5 +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java new file mode 100644 index 000000000..6c1d46ade --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.writers; + +import io.nosqlbench.loader.hdf.config.LoaderConfig; + +public class AstraVectorWriter implements VectorWriter { + public AstraVectorWriter(LoaderConfig config) { + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java new file mode 100644 index 000000000..f8038ea60 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.writers; + +import io.nosqlbench.loader.hdf.config.LoaderConfig; + +public class FileVectorWriter implements VectorWriter { + public FileVectorWriter(LoaderConfig config) { + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java new file mode 100644 index 000000000..ad4d1c542 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.writers; + +public interface VectorWriter { +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java new file mode 100644 index 000000000..8aafb7043 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.writers; + +public enum VectorWriters { + astra, + filewriter +} diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml new file mode 100644 index 000000000..c43385d35 --- /dev/null +++ b/hdf-loader/src/main/resources/config.yaml @@ -0,0 +1,15 @@ +format: HD5 +sourceFile: /home/username/data.h5 +datasets: + - name: dataset1 + type: string + - name: dataset2 + type: int +embedding: word2vec +writer: [astra, file] +astra: + database: test + keyspace: test + table: test + scb: /home/username/scb +file: /home/username/data From db56e8e145542c36ad6e5f8f3a93c49b6b807ae6 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 28 Jul 2023 08:09:07 -0500 Subject: [PATCH 05/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.501 to 1.12.502 (#1420) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.501 to 1.12.502. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 609ae08ae..eb6d1517d 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.501 + 1.12.502 com.elega9t From e5d7c7fa07271b2f4df829da2b09522efb4e010b Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 1 Aug 2023 09:24:48 -0400 Subject: [PATCH 06/52] filling out reader functionality --- hdf-loader/pom.xml | 6 +++++ .../io/nosqlbench/loader/hdf/HdfLoader.java | 8 +++--- .../loader/hdf/config/LoaderConfig.java | 4 +++ .../loader/hdf/readers/Hdf5Reader.java | 27 +++++++++++++++++-- .../{HdfReaders.java => HdfReaderTypes.java} | 2 +- .../hdf/writers/AbstractVectorWriter.java | 24 +++++++++++++++++ .../loader/hdf/writers/AstraVectorWriter.java | 3 ++- ...torWriters.java => VectorWriterTypes.java} | 2 +- pom.xml | 2 ++ 9 files changed, 69 insertions(+), 9 deletions(-) rename hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/{HdfReaders.java => HdfReaderTypes.java} (95%) create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java rename hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/{VectorWriters.java => VectorWriterTypes.java} (95%) diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml index 9917f6091..ddc0aaf99 100644 --- a/hdf-loader/pom.xml +++ b/hdf-loader/pom.xml @@ -58,6 +58,12 @@ 2.6.1 + + com.datastax.oss + java-driver-core + 4.16.0 + + diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java index 51114adda..ef253c6bd 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -18,13 +18,13 @@ package io.nosqlbench.loader.hdf; import io.nosqlbench.loader.hdf.config.LoaderConfig; -import io.nosqlbench.loader.hdf.readers.HdfReaders; +import io.nosqlbench.loader.hdf.readers.HdfReaderTypes; import io.nosqlbench.loader.hdf.readers.Hdf5Reader; import io.nosqlbench.loader.hdf.readers.HdfReader; import io.nosqlbench.loader.hdf.writers.AstraVectorWriter; import io.nosqlbench.loader.hdf.writers.FileVectorWriter; import io.nosqlbench.loader.hdf.writers.VectorWriter; -import io.nosqlbench.loader.hdf.writers.VectorWriters; +import io.nosqlbench.loader.hdf.writers.VectorWriterTypes; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -44,7 +44,7 @@ public class HdfLoader { VectorWriter writer = null; String format = config.getFormat(); - switch (HdfReaders.valueOf(format)) { + switch (HdfReaderTypes.valueOf(format)) { case HDF4 -> { logger.info("HDF4 format not yet supported"); System.exit(1); @@ -59,7 +59,7 @@ public class HdfLoader { } String writerType = config.getWriter(); - switch (VectorWriters.valueOf(writerType)) { + switch (VectorWriterTypes.valueOf(writerType)) { case filewriter -> { writer = new FileVectorWriter(config); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java index 5816cf925..3b706faff 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java @@ -74,4 +74,8 @@ public class LoaderConfig { public int getThreads() { return (int) configMap.getOrDefault("threads", 1); } + + public int getQueueSize() { + return (int) configMap.getOrDefault("queueSize", 1000); + } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 1adcdfc3a..3a89f107b 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -22,6 +22,7 @@ import io.nosqlbench.loader.hdf.writers.VectorWriter; import ncsa.hdf.hdf5lib.H5; import ncsa.hdf.hdf5lib.HDF5Constants; +import ncsa.hdf.hdf5lib.exceptions.HDF5Exception; import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -29,15 +30,18 @@ import org.apache.logging.log4j.Logger; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; public class Hdf5Reader implements HdfReader { private static final Logger logger = LogManager.getLogger(Hdf5Reader.class); private VectorWriter writer; private final LoaderConfig config; private final ExecutorService executorService; + private final LinkedBlockingQueue queue; public Hdf5Reader(LoaderConfig config) { this.config = config; executorService = Executors.newFixedThreadPool(config.getThreads()); + queue = new LinkedBlockingQueue<>(config.getQueueSize()); } @Override @@ -57,12 +61,31 @@ public class Hdf5Reader implements HdfReader { int datasetId = H5.H5Dopen(fileId, dataset.get("name")); // Get the dataspace of the dataset int dataspaceId = H5.H5Dget_space(datasetId); - // Get the number of dimensions in the dataspace int numDimensions = H5.H5Sget_simple_extent_ndims(dataspaceId); + float[] vector = new float[numDimensions]; long[] dims = new long[numDimensions]; - } catch (HDF5LibraryException e) { + // Get the datatype of the dataset + int datatypeId = H5.H5Dget_type(datasetId); + // Get the size of each dimension + H5.H5Sget_simple_extent_dims(dataspaceId, dims, null); + + // Read the data from the dataset + double[] data = new double[(int) dims[0]]; + H5.H5Dread(datasetId, datatypeId, HDF5Constants.H5S_ALL, HDF5Constants.H5S_ALL, + HDF5Constants.H5P_DEFAULT, data); + + // Close the dataspace, datatype, and dataset + H5.H5Sclose(dataspaceId); + H5.H5Tclose(datatypeId); + H5.H5Dclose(datasetId); + + + queue.put(vector); + } catch (HDF5Exception e) { logger.error(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); } }); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java similarity index 95% rename from hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java rename to hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java index 59aa29ebf..3d4676045 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaders.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java @@ -17,7 +17,7 @@ package io.nosqlbench.loader.hdf.readers; -public enum HdfReaders { +public enum HdfReaderTypes { HDF4, HDF5 } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java new file mode 100644 index 000000000..15d3a6207 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.writers; + +import java.util.concurrent.LinkedBlockingQueue; + +public abstract class AbstractVectorWriter implements VectorWriter { + protected LinkedBlockingQueue queue; +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java index 6c1d46ade..13f99fa82 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java @@ -18,8 +18,9 @@ package io.nosqlbench.loader.hdf.writers; import io.nosqlbench.loader.hdf.config.LoaderConfig; +import com.datastax.oss.driver.api.core.data.CqlVector; -public class AstraVectorWriter implements VectorWriter { +public class AstraVectorWriter extends AbstractVectorWriter { public AstraVectorWriter(LoaderConfig config) { } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java similarity index 95% rename from hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java rename to hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java index 8aafb7043..a066722d7 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriters.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java @@ -17,7 +17,7 @@ package io.nosqlbench.loader.hdf.writers; -public enum VectorWriters { +public enum VectorWriterTypes { astra, filewriter } diff --git a/pom.xml b/pom.xml index a79b86a73..29573c851 100644 --- a/pom.xml +++ b/pom.xml @@ -67,6 +67,7 @@ adapter-kafka adapter-amqp adapter-jdbc + hdf-loader virtdata-api @@ -114,6 +115,7 @@ adapter-amqp adapter-jdbc adapter-pinecone + hdf-loader virtdata-api From 29fe0e455bc4fb25446134743974f89441112432 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 1 Aug 2023 09:59:18 -0400 Subject: [PATCH 07/52] removing unused classes --- .../io/nosqlbench/loader/hdf/HdfLoader.java | 20 +++++++--------- .../loader/hdf/readers/Hdf5Reader.java | 5 ++++ .../loader/hdf/readers/HdfReaderTypes.java | 23 ------------------- .../loader/hdf/writers/VectorWriterTypes.java | 23 ------------------- hdf-loader/src/main/resources/config.yaml | 6 ++--- 5 files changed, 16 insertions(+), 61 deletions(-) delete mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java delete mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java index ef253c6bd..6850b3f85 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -18,20 +18,20 @@ package io.nosqlbench.loader.hdf; import io.nosqlbench.loader.hdf.config.LoaderConfig; -import io.nosqlbench.loader.hdf.readers.HdfReaderTypes; import io.nosqlbench.loader.hdf.readers.Hdf5Reader; import io.nosqlbench.loader.hdf.readers.HdfReader; import io.nosqlbench.loader.hdf.writers.AstraVectorWriter; import io.nosqlbench.loader.hdf.writers.FileVectorWriter; import io.nosqlbench.loader.hdf.writers.VectorWriter; -import io.nosqlbench.loader.hdf.writers.VectorWriterTypes; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.io.IOException; - public class HdfLoader { private static final Logger logger = LogManager.getLogger(HdfLoader.class); + public static final String FILEWRITER = "filewriter"; + public static final String ASTRA = "astra"; + public static final String HDF5 = "hdf5"; + public static final String HDF4 = "hdf4"; public static void main (String[] args) { if (args.length == 0) { @@ -44,7 +44,7 @@ public class HdfLoader { VectorWriter writer = null; String format = config.getFormat(); - switch (HdfReaderTypes.valueOf(format)) { + switch (format.toLowerCase()) { case HDF4 -> { logger.info("HDF4 format not yet supported"); System.exit(1); @@ -59,13 +59,9 @@ public class HdfLoader { } String writerType = config.getWriter(); - switch (VectorWriterTypes.valueOf(writerType)) { - case filewriter -> { - writer = new FileVectorWriter(config); - } - case astra -> { - writer = new AstraVectorWriter(config); - } + switch (writerType.toLowerCase()) { + case FILEWRITER -> writer = new FileVectorWriter(config); + case ASTRA -> writer = new AstraVectorWriter(config); default -> { logger.info("Unknown writer type: " + writerType); System.exit(1); diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 3a89f107b..657fd7b0b 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -80,6 +80,11 @@ public class Hdf5Reader implements HdfReader { H5.H5Tclose(datatypeId); H5.H5Dclose(datasetId); + // Now you have the data, and you can convert it into vector embeddings + //INDArray dataArray = Nd4j.create(data); + //WordVectors wordVectors = new WordVectorsImpl(); + //wordVectors.setLookupTable(dataArray); + //WordVectorSerializer.writeWordVectors(wordVectors, "vector_embeddings.txt"); queue.put(vector); } catch (HDF5Exception e) { diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java deleted file mode 100644 index 3d4676045..000000000 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReaderTypes.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2023 nosqlbench - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package io.nosqlbench.loader.hdf.readers; - -public enum HdfReaderTypes { - HDF4, - HDF5 -} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java deleted file mode 100644 index a066722d7..000000000 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriterTypes.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2023 nosqlbench - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package io.nosqlbench.loader.hdf.writers; - -public enum VectorWriterTypes { - astra, - filewriter -} diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index c43385d35..11f1be3e5 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,12 +1,12 @@ -format: HD5 -sourceFile: /home/username/data.h5 +format: HDF5 +sourceFile: /home/mwolters138/Downloads/embeddings.h5 datasets: - name: dataset1 type: string - name: dataset2 type: int embedding: word2vec -writer: [astra, file] +writer: filewriter astra: database: test keyspace: test From 8ff93cf71741deee5559637afaefbed51e68b7bd Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Tue, 1 Aug 2023 09:59:20 -0500 Subject: [PATCH 08/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.502 to 1.12.503 (#1421) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.502 to 1.12.503. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index eb6d1517d..9cd0ea7de 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.502 + 1.12.503 com.elega9t From f265a39f02391db7d01e4d07c19b8de69976c399 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Tue, 1 Aug 2023 09:59:45 -0500 Subject: [PATCH 09/52] fix: upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.501 to 1.12.504 (#1422) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.501 to 1.12.504. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-dynamodb/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/db3dfb82-467b-4263-94f8-28f933540a6d?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- adapter-dynamodb/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-dynamodb/pom.xml b/adapter-dynamodb/pom.xml index 0c710d880..69e83c510 100644 --- a/adapter-dynamodb/pom.xml +++ b/adapter-dynamodb/pom.xml @@ -43,7 +43,7 @@ com.amazonaws aws-java-sdk-dynamodb - 1.12.501 + 1.12.504 From 81f04737660e9b9b075b77c9cef7ff1d41a50c40 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 1 Aug 2023 11:17:04 -0400 Subject: [PATCH 10/52] adding in functionality for all datasets in a file --- .../loader/hdf/config/LoaderConfig.java | 4 +-- .../loader/hdf/readers/Hdf5Reader.java | 32 +++++++++++++++++-- hdf-loader/src/main/resources/config.yaml | 7 ++-- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java index 3b706faff..ac1c05a76 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java @@ -47,8 +47,8 @@ public class LoaderConfig { return configMap.get(key).toString(); } - public List> getDatasets() { - return (List>) configMap.get("datasets"); + public List getDatasets() { + return (List) configMap.get("datasets"); } public String getFormat() { diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 657fd7b0b..a02575a79 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -27,6 +27,7 @@ import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -34,6 +35,7 @@ import java.util.concurrent.LinkedBlockingQueue; public class Hdf5Reader implements HdfReader { private static final Logger logger = LogManager.getLogger(Hdf5Reader.class); + public static final String ALL = "all"; private VectorWriter writer; private final LoaderConfig config; private final ExecutorService executorService; @@ -53,12 +55,36 @@ public class Hdf5Reader implements HdfReader { public void read() throws HDF5LibraryException { String sourceFile = config.getSourceFile(); int fileId = H5.H5Fopen(sourceFile, HDF5Constants.H5F_ACC_RDONLY, HDF5Constants.H5P_DEFAULT); - for (Map dataset : config.getDatasets()) { + List datasets = config.getDatasets(); + if (datasets.get(0).equalsIgnoreCase(ALL)) { + try { + int numObjects = H5.H5Fget_obj_count(fileId, HDF5Constants.H5F_OBJ_ALL); + String[] objNames = new String[numObjects]; + int[] objTypes = new int[numObjects]; + long[] refArray = new long[numObjects]; + //H5.H5Fget_obj_ids(fileId, HDF5Constants.H5F_OBJ_ALL, numObjects, objNames, objTypes); + H5.H5Gget_obj_info_all(fileId, null, objNames, objTypes, refArray); + + for (int i = 0; i < numObjects; i++) { + String objName = objNames[i]; + int objType = objTypes[i]; + if (objType == HDF5Constants.H5G_DATASET) { + datasets.add(objName); + } + } + } catch (HDF5Exception e) { + logger.error("Error getting all datasets from file: " + sourceFile, e); + } + } + for (String dataset : config.getDatasets()) { + if (dataset.equalsIgnoreCase(ALL)) { + continue; + } executorService.submit(() -> { // Your lambda code that runs in a separate thread for each object - logger.info("Processing dataset: " + dataset.get("name")); + logger.info("Processing dataset: " + dataset); try { - int datasetId = H5.H5Dopen(fileId, dataset.get("name")); + int datasetId = H5.H5Dopen(fileId, dataset); // Get the dataspace of the dataset int dataspaceId = H5.H5Dget_space(datasetId); // Get the number of dimensions in the dataspace diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index 11f1be3e5..16869b8c6 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,10 +1,7 @@ format: HDF5 -sourceFile: /home/mwolters138/Downloads/embeddings.h5 +sourceFile: /home/mwolters138/Downloads/NEONDSTowerTemperatureData.hdf5 datasets: - - name: dataset1 - type: string - - name: dataset2 - type: int + - all embedding: word2vec writer: filewriter astra: From 1d1f1543d5a789556431b7687fbb46363c07576e Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Wed, 2 Aug 2023 16:43:33 -0500 Subject: [PATCH 11/52] fix: upgrade com.github.docker-java:docker-java-transport-okhttp from 3.3.1 to 3.3.2 (#1423) Snyk has created this PR to upgrade com.github.docker-java:docker-java-transport-okhttp from 3.3.1 to 3.3.2. See this package in Maven Repository: https://mvnrepository.com/artifact/com.github.docker-java/docker-java-transport-okhttp/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/d8f400a4-82ed-4d21-b83c-a584d8df78a4?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot Co-authored-by: Jeff Banks --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 9cd0ea7de..567d45e62 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -293,7 +293,7 @@ com.github.docker-java docker-java-transport-okhttp - 3.3.1 + 3.3.2 org.slf4j From 4354f8ec9db4ab07d6f66af2ad31e77a70d98bd3 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Wed, 2 Aug 2023 16:43:50 -0500 Subject: [PATCH 12/52] fix: upgrade com.github.docker-java:docker-java-api from 3.3.1 to 3.3.2 (#1424) Snyk has created this PR to upgrade com.github.docker-java:docker-java-api from 3.3.1 to 3.3.2. See this package in Maven Repository: https://mvnrepository.com/artifact/com.github.docker-java/docker-java-api/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/d8f400a4-82ed-4d21-b83c-a584d8df78a4?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 567d45e62..e6f3efbf7 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -231,7 +231,7 @@ com.github.docker-java docker-java-api - 3.3.1 + 3.3.2 org.slf4j From 27b759c0f69d82e9cddb5e2c8a777f822a0c0a31 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Wed, 2 Aug 2023 16:44:07 -0500 Subject: [PATCH 13/52] fix: upgrade com.github.docker-java:docker-java from 3.3.1 to 3.3.2 (#1425) Snyk has created this PR to upgrade com.github.docker-java:docker-java from 3.3.1 to 3.3.2. See this package in Maven Repository: https://mvnrepository.com/artifact/com.github.docker-java/docker-java/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/d8f400a4-82ed-4d21-b83c-a584d8df78a4?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index e6f3efbf7..476a567cf 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -304,7 +304,7 @@ com.github.docker-java docker-java - 3.3.1 + 3.3.2 org.slf4j From c5d46e1fbbc48bae98bab34a1441a541f9e74f2f Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Wed, 2 Aug 2023 16:44:47 -0500 Subject: [PATCH 14/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.503 to 1.12.505 (#1426) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.503 to 1.12.505. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 476a567cf..6f8d0c06a 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.503 + 1.12.505 com.elega9t From 2cce01afa303f1cc9a0d8383d18cf8925ca3e092 Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Fri, 4 Aug 2023 07:54:12 -0500 Subject: [PATCH 15/52] fix: upgrade org.mongodb:mongodb-driver-sync from 4.10.1 to 4.10.2 (#1427) Snyk has created this PR to upgrade org.mongodb:mongodb-driver-sync from 4.10.1 to 4.10.2. See this package in Maven Repository: https://mvnrepository.com/artifact/org.mongodb/mongodb-driver-sync/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/cb68505f-64d0-4ebc-accd-d7d9ac15c3c2?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- adapter-mongodb/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-mongodb/pom.xml b/adapter-mongodb/pom.xml index 25b523b89..8692fa642 100644 --- a/adapter-mongodb/pom.xml +++ b/adapter-mongodb/pom.xml @@ -42,7 +42,7 @@ org.mongodb mongodb-driver-sync - 4.10.1 + 4.10.2 From 5f66063086b849cd5960322379b2ae7ed7b10403 Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Fri, 4 Aug 2023 07:54:41 -0500 Subject: [PATCH 16/52] fix: upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.504 to 1.12.505 (#1428) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.504 to 1.12.505. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-dynamodb/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/db3dfb82-467b-4263-94f8-28f933540a6d?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- adapter-dynamodb/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-dynamodb/pom.xml b/adapter-dynamodb/pom.xml index 69e83c510..d2e8732aa 100644 --- a/adapter-dynamodb/pom.xml +++ b/adapter-dynamodb/pom.xml @@ -43,7 +43,7 @@ com.amazonaws aws-java-sdk-dynamodb - 1.12.504 + 1.12.505 From a78ef4b2a3738b112334298f310052f6673b39ce Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Fri, 4 Aug 2023 09:02:59 -0400 Subject: [PATCH 17/52] removing non-functional library reference --- hdf-loader/pom.xml | 11 +- .../io/nosqlbench/loader/hdf/HdfLoader.java | 4 +- .../loader/hdf/config/LoaderConfig.java | 4 + .../loader/hdf/readers/Hdf5Reader.java | 102 +++++++----------- .../loader/hdf/readers/HdfReader.java | 3 +- .../hdf/writers/AbstractVectorWriter.java | 22 ++++ .../loader/hdf/writers/AstraVectorWriter.java | 6 +- .../loader/hdf/writers/FileVectorWriter.java | 29 ++++- .../loader/hdf/writers/VectorWriter.java | 5 +- hdf-loader/src/main/resources/config.yaml | 2 +- 10 files changed, 106 insertions(+), 82 deletions(-) diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml index ddc0aaf99..97d590947 100644 --- a/hdf-loader/pom.xml +++ b/hdf-loader/pom.xml @@ -52,18 +52,17 @@ 2.0 - - org.hdfgroup - hdf-java - 2.6.1 - - com.datastax.oss java-driver-core 4.16.0 + + io.jhdf + jhdf + 0.6.10 + diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java index 6850b3f85..3e0e8000a 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -49,9 +49,7 @@ public class HdfLoader { logger.info("HDF4 format not yet supported"); System.exit(1); } - case HDF5 -> { - reader = new Hdf5Reader(config); - } + case HDF5 -> reader = new Hdf5Reader(config); default -> { logger.info("Unknown format: " + format); System.exit(1); diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java index ac1c05a76..c2e7645c5 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java @@ -71,6 +71,10 @@ public class LoaderConfig { return (String) configMap.get("sourceFile"); } + public String getTargetFile() { + return (String) configMap.getOrDefault("targetFile", "./vectors.txt"); + } + public int getThreads() { return (int) configMap.getOrDefault("threads", 1); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index a02575a79..9b8da5bd3 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -17,16 +17,17 @@ package io.nosqlbench.loader.hdf.readers; +import io.jhdf.HdfFile; +import io.jhdf.api.Dataset; +import io.jhdf.api.Group; +import io.jhdf.api.Node; +import io.jhdf.object.datatype.DataType; import io.nosqlbench.loader.hdf.config.LoaderConfig; import io.nosqlbench.loader.hdf.writers.VectorWriter; - -import ncsa.hdf.hdf5lib.H5; -import ncsa.hdf.hdf5lib.HDF5Constants; -import ncsa.hdf.hdf5lib.exceptions.HDF5Exception; -import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.nio.file.Paths; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -40,6 +41,7 @@ public class Hdf5Reader implements HdfReader { private final LoaderConfig config; private final ExecutorService executorService; private final LinkedBlockingQueue queue; + private List datasets; public Hdf5Reader(LoaderConfig config) { this.config = config; executorService = Executors.newFixedThreadPool(config.getThreads()); @@ -49,76 +51,44 @@ public class Hdf5Reader implements HdfReader { @Override public void setWriter(VectorWriter writer) { this.writer = writer; + writer.setQueue(queue); + Thread t = new Thread(writer); + t.start(); + } + + public void extractDatasets(Group parent) { + Map nodes = parent.getChildren(); + for (String key : nodes.keySet()) { + Node node = nodes.get(key); + if (node instanceof Dataset) { + datasets.add(((Dataset)node).getPath()); + } else if (node.isGroup()) { + extractDatasets((Group) node); + } + } } @Override - public void read() throws HDF5LibraryException { - String sourceFile = config.getSourceFile(); - int fileId = H5.H5Fopen(sourceFile, HDF5Constants.H5F_ACC_RDONLY, HDF5Constants.H5P_DEFAULT); - List datasets = config.getDatasets(); + public void read() { + HdfFile hdfFile = new HdfFile(Paths.get(config.getSourceFile())); + datasets = config.getDatasets(); if (datasets.get(0).equalsIgnoreCase(ALL)) { - try { - int numObjects = H5.H5Fget_obj_count(fileId, HDF5Constants.H5F_OBJ_ALL); - String[] objNames = new String[numObjects]; - int[] objTypes = new int[numObjects]; - long[] refArray = new long[numObjects]; - //H5.H5Fget_obj_ids(fileId, HDF5Constants.H5F_OBJ_ALL, numObjects, objNames, objTypes); - H5.H5Gget_obj_info_all(fileId, null, objNames, objTypes, refArray); - - for (int i = 0; i < numObjects; i++) { - String objName = objNames[i]; - int objType = objTypes[i]; - if (objType == HDF5Constants.H5G_DATASET) { - datasets.add(objName); - } - } - } catch (HDF5Exception e) { - logger.error("Error getting all datasets from file: " + sourceFile, e); - } + extractDatasets(hdfFile); } - for (String dataset : config.getDatasets()) { - if (dataset.equalsIgnoreCase(ALL)) { + for (String ds : datasets) { + if (ds.equalsIgnoreCase(ALL)) { continue; } - executorService.submit(() -> { - // Your lambda code that runs in a separate thread for each object - logger.info("Processing dataset: " + dataset); - try { - int datasetId = H5.H5Dopen(fileId, dataset); - // Get the dataspace of the dataset - int dataspaceId = H5.H5Dget_space(datasetId); - // Get the number of dimensions in the dataspace - int numDimensions = H5.H5Sget_simple_extent_ndims(dataspaceId); - float[] vector = new float[numDimensions]; - long[] dims = new long[numDimensions]; - // Get the datatype of the dataset - int datatypeId = H5.H5Dget_type(datasetId); - // Get the size of each dimension - H5.H5Sget_simple_extent_dims(dataspaceId, dims, null); + //executorService.submit(() -> { + logger.info("Processing dataset: " + ds); + Dataset dataset = hdfFile.getDatasetByPath(ds); + DataType dataType = dataset.getDataType(); + long l = dataset.getSize(); + int[] dims = dataset.getDimensions(); - // Read the data from the dataset - double[] data = new double[(int) dims[0]]; - H5.H5Dread(datasetId, datatypeId, HDF5Constants.H5S_ALL, HDF5Constants.H5S_ALL, - HDF5Constants.H5P_DEFAULT, data); + //queue.put(vector); - // Close the dataspace, datatype, and dataset - H5.H5Sclose(dataspaceId); - H5.H5Tclose(datatypeId); - H5.H5Dclose(datasetId); - - // Now you have the data, and you can convert it into vector embeddings - //INDArray dataArray = Nd4j.create(data); - //WordVectors wordVectors = new WordVectorsImpl(); - //wordVectors.setLookupTable(dataArray); - //WordVectorSerializer.writeWordVectors(wordVectors, "vector_embeddings.txt"); - - queue.put(vector); - } catch (HDF5Exception e) { - logger.error(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - }); + // }); } } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java index 48c9b5a49..8dfd58bb8 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/HdfReader.java @@ -18,10 +18,9 @@ package io.nosqlbench.loader.hdf.readers; import io.nosqlbench.loader.hdf.writers.VectorWriter; -import ncsa.hdf.hdf5lib.exceptions.HDF5LibraryException; public interface HdfReader { void setWriter(VectorWriter writer); - void read() throws HDF5LibraryException; + void read(); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java index 15d3a6207..b8f9cdd4f 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java @@ -21,4 +21,26 @@ import java.util.concurrent.LinkedBlockingQueue; public abstract class AbstractVectorWriter implements VectorWriter { protected LinkedBlockingQueue queue; + + public void setQueue(LinkedBlockingQueue queue) { + this.queue = queue; + } + + @Override + public void run() { + while (true) { + try { + float[] vector = queue.take(); + if (vector.length==0) { + break; + } + writeVector(vector); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + protected abstract void writeVector(float[] vector); + } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java index 13f99fa82..c7a36cfef 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java @@ -18,9 +18,13 @@ package io.nosqlbench.loader.hdf.writers; import io.nosqlbench.loader.hdf.config.LoaderConfig; -import com.datastax.oss.driver.api.core.data.CqlVector; public class AstraVectorWriter extends AbstractVectorWriter { public AstraVectorWriter(LoaderConfig config) { } + + @Override + protected void writeVector(float[] vector) { + + } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java index f8038ea60..3e8c2b7bd 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java @@ -18,8 +18,33 @@ package io.nosqlbench.loader.hdf.writers; import io.nosqlbench.loader.hdf.config.LoaderConfig; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; -public class FileVectorWriter implements VectorWriter { - public FileVectorWriter(LoaderConfig config) { +import java.io.*; + +public class FileVectorWriter extends AbstractVectorWriter { + private static final Logger logger = LogManager.getLogger(FileVectorWriter.class); + private final BufferedWriter targetFile; + public FileVectorWriter(LoaderConfig config) throws IOException { + String targetFileName = config.getTargetFile(); + targetFile = new BufferedWriter(new FileWriter(targetFileName)); + } + + @Override + protected void writeVector(float[] vector) { + try { + targetFile.write("["); + for (int i = 0; i < vector.length; i++) { + targetFile.write(String.valueOf(vector[i])); + if (i < vector.length - 1) { + targetFile.write(","); + } + } + targetFile.write("]"); + targetFile.write("\n"); + } catch (IOException e) { + logger.error(e.getMessage(), e); + } } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java index ad4d1c542..e840ccf3c 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java @@ -17,5 +17,8 @@ package io.nosqlbench.loader.hdf.writers; -public interface VectorWriter { +import java.util.concurrent.LinkedBlockingQueue; + +public interface VectorWriter extends Runnable { + void setQueue(LinkedBlockingQueue queue); } diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index 16869b8c6..9208569ea 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,5 +1,5 @@ format: HDF5 -sourceFile: /home/mwolters138/Downloads/NEONDSTowerTemperatureData.hdf5 +sourceFile: /home/mwolters138/Downloads/NEONDSImagingSpectrometerData.h5 #h5ex_t_arrayatt.h5 datasets: - all embedding: word2vec From 52debee3f642ee66da3155a08a13568fcf51f6fb Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 4 Aug 2023 16:11:47 -0500 Subject: [PATCH 18/52] fix: upgrade org.xerial.snappy:snappy-java from 1.1.10.1 to 1.1.10.2 (#1431) Snyk has created this PR to upgrade org.xerial.snappy:snappy-java from 1.1.10.1 to 1.1.10.2. See this package in Maven Repository: https://mvnrepository.com/artifact/org.xerial.snappy/snappy-java/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/73081f87-c1b7-4c91-b407-1cb299565642?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 6f8d0c06a..6236bfd08 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -184,7 +184,7 @@ org.xerial.snappy snappy-java - 1.1.10.1 + 1.1.10.2 com.datastax.oss @@ -199,7 +199,7 @@ org.xerial.snappy snappy-java - 1.1.10.1 + 1.1.10.2 com.esri.geometry From dd0f942a27a0d3cfc26292b1e634b868a72401ed Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Fri, 4 Aug 2023 16:12:29 -0500 Subject: [PATCH 19/52] fix: adapter-pulsar/pom.xml to reduce vulnerabilities (#1432) The following vulnerabilities are fixed with an upgrade: - https://snyk.io/vuln/SNYK-JAVA-COMGOOGLEGUAVA-5710356 Co-authored-by: snyk-bot --- adapter-pulsar/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-pulsar/pom.xml b/adapter-pulsar/pom.xml index fd4ba41f1..1b5f8e91d 100644 --- a/adapter-pulsar/pom.xml +++ b/adapter-pulsar/pom.xml @@ -34,7 +34,7 @@ - 3.0.0 + 3.0.1 From bd4c02a6c7585be15a4bae28b5723ebd3315094b Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Fri, 4 Aug 2023 16:12:46 -0500 Subject: [PATCH 20/52] fix: upgrade com.datastax.oss:java-driver-query-builder from 4.16.0 to 4.17.0 (#1430) Snyk has created this PR to upgrade com.datastax.oss:java-driver-query-builder from 4.16.0 to 4.17.0. See this package in Maven Repository: https://mvnrepository.com/artifact/com.datastax.oss/java-driver-query-builder/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/73081f87-c1b7-4c91-b407-1cb299565642?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 6236bfd08..fa043bfa2 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -189,7 +189,7 @@ com.datastax.oss java-driver-query-builder - 4.16.0 + 4.17.0 org.snakeyaml From 9af0abbd04e9f388bc34d9bcde8f71870fbc1b28 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Mon, 7 Aug 2023 14:15:43 -0400 Subject: [PATCH 21/52] working on writers --- hdf-loader/pom.xml | 7 +++ .../hdf/embedding/EmbeddingGenerator.java | 22 ++++++++++ .../embedding/EmbeddingGeneratorFactory.java | 43 +++++++++++++++++++ .../embedding/FloatEmbeddingGenerator.java | 26 +++++++++++ .../embedding/StringEmbeddingGenerator.java | 26 +++++++++++ .../loader/hdf/readers/Hdf5Reader.java | 22 ++++++++-- .../hdf/writers/AbstractVectorWriter.java | 3 +- .../loader/hdf/writers/AstraVectorWriter.java | 28 ++++++++++++ .../loader/hdf/writers/FileVectorWriter.java | 5 +++ .../loader/hdf/writers/VectorWriter.java | 2 + hdf-loader/src/main/resources/config.yaml | 15 ++++--- 11 files changed, 188 insertions(+), 11 deletions(-) create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml index 97d590947..1031a1f00 100644 --- a/hdf-loader/pom.xml +++ b/hdf-loader/pom.xml @@ -63,6 +63,13 @@ jhdf 0.6.10 + + io.nosqlbench + nb-api + 5.17.3-SNAPSHOT + compile + + diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java new file mode 100644 index 000000000..643d66e40 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.embedding; + +public interface EmbeddingGenerator { + public float[][] generateEmbeddingFrom(Object o); +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java new file mode 100644 index 000000000..afcef452e --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.embedding; + +import java.util.HashMap; +import java.util.Map; + +public class EmbeddingGeneratorFactory { + private static final Map generators = new HashMap<>(); + + public static EmbeddingGenerator getGenerator(String type) { + switch (type) { + case "string" -> { + if (!generators.containsKey(type)) { + generators.put(type, new StringEmbeddingGenerator()); + } + return generators.get(type); + } + case "float" -> { + if (!generators.containsKey(type)) { + generators.put(type, new FloatEmbeddingGenerator()); + } + return generators.get(type); + } + default -> throw new RuntimeException("Unknown embedding type: " + type); + } + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java new file mode 100644 index 000000000..f5d6dcbca --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.embedding; + +public class FloatEmbeddingGenerator implements EmbeddingGenerator { + + @Override + public float[][] generateEmbeddingFrom(Object o) { + return (float[][]) o; + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java new file mode 100644 index 000000000..54a8d6f59 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.embedding; + +public class StringEmbeddingGenerator implements EmbeddingGenerator { + + @Override + public float[][] generateEmbeddingFrom(Object o) { + return null; + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 9b8da5bd3..893f02dc5 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -23,6 +23,8 @@ import io.jhdf.api.Group; import io.jhdf.api.Node; import io.jhdf.object.datatype.DataType; import io.nosqlbench.loader.hdf.config.LoaderConfig; +import io.nosqlbench.loader.hdf.embedding.EmbeddingGenerator; +import io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory; import io.nosqlbench.loader.hdf.writers.VectorWriter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -34,6 +36,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; +import static io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory.*; + public class Hdf5Reader implements HdfReader { private static final Logger logger = LogManager.getLogger(Hdf5Reader.class); public static final String ALL = "all"; @@ -83,12 +87,24 @@ public class Hdf5Reader implements HdfReader { logger.info("Processing dataset: " + ds); Dataset dataset = hdfFile.getDatasetByPath(ds); DataType dataType = dataset.getDataType(); - long l = dataset.getSize(); - int[] dims = dataset.getDimensions(); - //queue.put(vector); + int[] dims = dataset.getDimensions(); + Object data = dataset.getData(); + + String type = dataset.getJavaType().getSimpleName(); + EmbeddingGenerator generator = getGenerator(dataset.getJavaType().getSimpleName()); + float[][] vectors = generator.generateEmbeddingFrom(data); + for (int i = 0; i < dims[0]; i++) { + try { + queue.put(vectors[i]); + } catch (InterruptedException e) { + logger.error(e.getMessage(), e); + } + } // }); } + hdfFile.close(); + writer.shutdown(); } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java index b8f9cdd4f..c01801e46 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AbstractVectorWriter.java @@ -21,6 +21,7 @@ import java.util.concurrent.LinkedBlockingQueue; public abstract class AbstractVectorWriter implements VectorWriter { protected LinkedBlockingQueue queue; + protected boolean shutdown = false; public void setQueue(LinkedBlockingQueue queue) { this.queue = queue; @@ -28,7 +29,7 @@ public abstract class AbstractVectorWriter implements VectorWriter { @Override public void run() { - while (true) { + while (!shutdown || !queue.isEmpty()) { try { float[] vector = queue.take(); if (vector.length==0) { diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java index c7a36cfef..9a44a654a 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java @@ -17,14 +17,42 @@ package io.nosqlbench.loader.hdf.writers; +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; import io.nosqlbench.loader.hdf.config.LoaderConfig; +import java.nio.file.Paths; +import java.util.Map; + public class AstraVectorWriter extends AbstractVectorWriter { + private CqlSession session; + PreparedStatement insert_vector; + public AstraVectorWriter(LoaderConfig config) { + Map astraParams = config.getAstra(); + session = CqlSession.builder() + .withCloudSecureConnectBundle(Paths.get(astraParams.get("scb"))) + .withAuthCredentials(astraParams.get("clientId"), astraParams.get("clientSecret")) + .withKeyspace(astraParams.get("keyspace")) + .build(); + insert_vector = session.prepare(astraParams.get("query")); } @Override protected void writeVector(float[] vector) { + session.execute(insert_vector.bind(getPartitionValue(vector), vector)); + } + private String getPartitionValue(float[] vector) { + float sum = 0; + for (float f : vector) { + sum += f; + } + return String.valueOf(sum); + } + + @Override + public void shutdown() { + shutdown = true; } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java index 3e8c2b7bd..2c6cb89a4 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java @@ -47,4 +47,9 @@ public class FileVectorWriter extends AbstractVectorWriter { logger.error(e.getMessage(), e); } } + + @Override + public void shutdown() { + shutdown = true; + } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java index e840ccf3c..31f8993d3 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/VectorWriter.java @@ -21,4 +21,6 @@ import java.util.concurrent.LinkedBlockingQueue; public interface VectorWriter extends Runnable { void setQueue(LinkedBlockingQueue queue); + + void shutdown(); } diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index 9208569ea..6948dddf8 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,12 +1,13 @@ format: HDF5 -sourceFile: /home/mwolters138/Downloads/NEONDSImagingSpectrometerData.h5 #h5ex_t_arrayatt.h5 +sourceFile: /home/mwolters138/Downloads/embeddings.h5 #NEONDSImagingSpectrometerData.h5 #h5ex_t_arrayatt.h5 datasets: - all embedding: word2vec -writer: filewriter +writer: astra #filewriter astra: - database: test - keyspace: test - table: test - scb: /home/username/scb -file: /home/username/data + scb: /home/mwolters138/Dev/testing/secure-connect-vector-correctness.zip + clientId: IvpdaZejwNuvWeupsIkWTHeL + clientSecret: .bxut2-OQL,dWunZeQbjZC0vMHd88UWXKS.xT,nl95zQC0B0xU9FzSWK3HSUGO11o_7pr7wG7+EMaZqegkKlr4fZ54__furPMtWPGiPp,2cZ1q15vrWwc9_-AcgeCbuf + keyspace: baselines768dot + query: INSERT INTO vectors(key, value) VALUES (?,?) +targetFile: /home/mwolters138/vectors.txt From c7a051e036bdad41652a2eb1a5a668b5fc713f90 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Mon, 7 Aug 2023 16:39:53 -0400 Subject: [PATCH 22/52] updated jackson-core for missing class --- hdf-loader/pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml index 1031a1f00..1cb25d919 100644 --- a/hdf-loader/pom.xml +++ b/hdf-loader/pom.xml @@ -58,6 +58,14 @@ 4.16.0 + + + com.fasterxml.jackson.core + jackson-core + 2.15.2 + + + io.jhdf jhdf From 5fe19f2487a50d769dd66fd82729f84765472484 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Mon, 7 Aug 2023 17:23:01 -0400 Subject: [PATCH 23/52] starting work on string embeddings --- hdf-loader/pom.xml | 12 ++++++++++++ .../hdf/embedding/StringEmbeddingGenerator.java | 3 +++ .../loader/hdf/writers/AstraVectorWriter.java | 9 ++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml index 1cb25d919..930cc6720 100644 --- a/hdf-loader/pom.xml +++ b/hdf-loader/pom.xml @@ -65,6 +65,18 @@ 2.15.2 + + + org.deeplearning4j + deeplearning4j-core + 1.0.0-M2.1 + + + + org.nd4j + nd4j-native + 1.0.0-M2.1 + io.jhdf diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java index 54a8d6f59..cfa4f94c8 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java @@ -17,6 +17,9 @@ package io.nosqlbench.loader.hdf.embedding; +//import org.deeplearning4j.models.word2vec.Word2Vec; +//import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory; + public class StringEmbeddingGenerator implements EmbeddingGenerator { @Override diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java index 9a44a654a..a272ad1ef 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java @@ -19,6 +19,7 @@ package io.nosqlbench.loader.hdf.writers; import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.data.CqlVector; import io.nosqlbench.loader.hdf.config.LoaderConfig; import java.nio.file.Paths; @@ -40,7 +41,13 @@ public class AstraVectorWriter extends AbstractVectorWriter { @Override protected void writeVector(float[] vector) { - session.execute(insert_vector.bind(getPartitionValue(vector), vector)); + Float[] vector2 = new Float[vector.length]; + for (int i = 0; i < vector.length; i++) { + vector2[i] = vector[i]; + } + CqlVector.Builder vectorBuilder = CqlVector.builder(); + vectorBuilder.add((Object[]) vector2); + session.execute(insert_vector.bind(getPartitionValue(vector), vectorBuilder.build())); } private String getPartitionValue(float[] vector) { From 4953aada47e3443f8e77949517588f9b650b5dd4 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 8 Aug 2023 09:44:05 -0400 Subject: [PATCH 24/52] added threading and support for 3 dim arrays --- .../loader/hdf/config/LoaderConfig.java | 2 +- .../hdf/embedding/EmbeddingGenerator.java | 2 +- .../embedding/EmbeddingGeneratorFactory.java | 8 ++- .../embedding/FloatEmbeddingGenerator.java | 24 +++++++- .../embedding/ShortEmbeddingGenerator.java | 60 +++++++++++++++++++ .../embedding/StringEmbeddingGenerator.java | 2 +- .../loader/hdf/readers/Hdf5Reader.java | 23 ++++--- hdf-loader/src/main/resources/config.yaml | 4 +- 8 files changed, 110 insertions(+), 15 deletions(-) create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java index c2e7645c5..9b5d25840 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/config/LoaderConfig.java @@ -76,7 +76,7 @@ public class LoaderConfig { } public int getThreads() { - return (int) configMap.getOrDefault("threads", 1); + return (int) configMap.getOrDefault("threads", 5); } public int getQueueSize() { diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java index 643d66e40..66d7e1ad4 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGenerator.java @@ -18,5 +18,5 @@ package io.nosqlbench.loader.hdf.embedding; public interface EmbeddingGenerator { - public float[][] generateEmbeddingFrom(Object o); + public float[][] generateEmbeddingFrom(Object o, int[] dims); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java index afcef452e..44a12d8dc 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java @@ -24,7 +24,7 @@ public class EmbeddingGeneratorFactory { private static final Map generators = new HashMap<>(); public static EmbeddingGenerator getGenerator(String type) { - switch (type) { + switch (type.toLowerCase()) { case "string" -> { if (!generators.containsKey(type)) { generators.put(type, new StringEmbeddingGenerator()); @@ -37,6 +37,12 @@ public class EmbeddingGeneratorFactory { } return generators.get(type); } + case "short" -> { + if (!generators.containsKey(type)) { + generators.put(type, new ShortEmbeddingGenerator()); + } + return generators.get(type); + } default -> throw new RuntimeException("Unknown embedding type: " + type); } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java index f5d6dcbca..a6614eac6 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java @@ -20,7 +20,27 @@ package io.nosqlbench.loader.hdf.embedding; public class FloatEmbeddingGenerator implements EmbeddingGenerator { @Override - public float[][] generateEmbeddingFrom(Object o) { - return (float[][]) o; + public float[][] generateEmbeddingFrom(Object o, int[] dims) { + switch (dims.length) { + case 1: + return new float[][]{new float[]{(float) o}}; + case 2: return (float[][]) o; + case 3: return flatten(o, dims); + default: + throw new RuntimeException("unsupported embedding dimensionality: " + dims.length); + } + } + + private float[][] flatten(Object o, int[] dims) { + float[][][] arr = (float[][][]) o; + float[][] flat = new float[dims[0]][dims[1] * dims[2]]; + for (int i = 0; i < dims[0]; i++) { + for (int j = 0; j < dims[1]; j++) { + for (int k = 0; k < dims[2]; k++) { + flat[i][j * dims[2] + k] = arr[i][j][k]; + } + } + } + return flat; } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java new file mode 100644 index 000000000..16775cef8 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.embedding; + +public class ShortEmbeddingGenerator implements EmbeddingGenerator { + @Override + public float[][] generateEmbeddingFrom(Object o, int[] dims) { + switch (dims.length) { + case 1 -> { + float[] arr = new float[dims[0]]; + for (int i = 0; i < dims[0]; i++) { + arr[i] = ((short[]) o)[i]; + } + return new float[][]{arr}; + } + case 2 -> { + float[][] arr = new float[dims[0]][dims[1]]; + for (int i = 0; i < dims[0]; i++) { + for (int j = 0; j < dims[1]; j++) { + arr[i][j] = ((short[][]) o)[i][j]; + } + } + return arr; + } + case 3 -> { + return flatten(o, dims); + } + default -> + throw new RuntimeException("unsupported embedding dimensionality: " + dims.length); + } + } + + private float[][] flatten(Object o, int[] dims) { + short[][][] arr = (short[][][]) o; + float[][] flat = new float[dims[0]][dims[1] * dims[2]]; + for (int i = 0; i < dims[0]; i++) { + for (int j = 0; j < dims[1]; j++) { + for (int k = 0; k < dims[2]; k++) { + flat[i][j * dims[2] + k] = arr[i][j][k]; + } + } + } + return flat; + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java index cfa4f94c8..8626a5699 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java @@ -23,7 +23,7 @@ package io.nosqlbench.loader.hdf.embedding; public class StringEmbeddingGenerator implements EmbeddingGenerator { @Override - public float[][] generateEmbeddingFrom(Object o) { + public float[][] generateEmbeddingFrom(Object o, int[] dims) { return null; } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 893f02dc5..126611bfa 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -24,19 +24,20 @@ import io.jhdf.api.Node; import io.jhdf.object.datatype.DataType; import io.nosqlbench.loader.hdf.config.LoaderConfig; import io.nosqlbench.loader.hdf.embedding.EmbeddingGenerator; -import io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory; import io.nosqlbench.loader.hdf.writers.VectorWriter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; -import static io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory.*; +import static io.nosqlbench.loader.hdf.embedding.EmbeddingGeneratorFactory.getGenerator; public class Hdf5Reader implements HdfReader { private static final Logger logger = LogManager.getLogger(Hdf5Reader.class); @@ -48,7 +49,7 @@ public class Hdf5Reader implements HdfReader { private List datasets; public Hdf5Reader(LoaderConfig config) { this.config = config; - executorService = Executors.newFixedThreadPool(config.getThreads()); + executorService = Executors.newCachedThreadPool(); queue = new LinkedBlockingQueue<>(config.getQueueSize()); } @@ -79,11 +80,12 @@ public class Hdf5Reader implements HdfReader { if (datasets.get(0).equalsIgnoreCase(ALL)) { extractDatasets(hdfFile); } + List> futures = new ArrayList<>(); for (String ds : datasets) { if (ds.equalsIgnoreCase(ALL)) { continue; } - //executorService.submit(() -> { + Future future = executorService.submit(() -> { logger.info("Processing dataset: " + ds); Dataset dataset = hdfFile.getDatasetByPath(ds); DataType dataType = dataset.getDataType(); @@ -93,7 +95,7 @@ public class Hdf5Reader implements HdfReader { String type = dataset.getJavaType().getSimpleName(); EmbeddingGenerator generator = getGenerator(dataset.getJavaType().getSimpleName()); - float[][] vectors = generator.generateEmbeddingFrom(data); + float[][] vectors = generator.generateEmbeddingFrom(data, dims); for (int i = 0; i < dims[0]; i++) { try { queue.put(vectors[i]); @@ -101,8 +103,15 @@ public class Hdf5Reader implements HdfReader { logger.error(e.getMessage(), e); } } - - // }); + }); + futures.add(future); + } + for (Future future : futures) { + try { + future.get(); + } catch (Exception e) { + logger.error(e.getMessage(), e); + } } hdfFile.close(); writer.shutdown(); diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index 6948dddf8..f6d0ae928 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,9 +1,9 @@ format: HDF5 -sourceFile: /home/mwolters138/Downloads/embeddings.h5 #NEONDSImagingSpectrometerData.h5 #h5ex_t_arrayatt.h5 +sourceFile: /home/mwolters138/Downloads/NEONDSImagingSpectrometerData.h5 #h5ex_t_arrayatt.h5 datasets: - all embedding: word2vec -writer: astra #filewriter +writer: filewriter astra: scb: /home/mwolters138/Dev/testing/secure-connect-vector-correctness.zip clientId: IvpdaZejwNuvWeupsIkWTHeL From 0f3ec7f0f0a9b11a306a378be4f8b708a1c39fdc Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 8 Aug 2023 10:01:28 -0400 Subject: [PATCH 25/52] minor updates to stub out string processing --- .../loader/hdf/embedding/StringEmbeddingGenerator.java | 2 +- .../java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java index 8626a5699..8b8a72c98 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java @@ -24,6 +24,6 @@ public class StringEmbeddingGenerator implements EmbeddingGenerator { @Override public float[][] generateEmbeddingFrom(Object o, int[] dims) { - return null; + return new float[][]{{0.0f, 1.0f},{1.0f, 0.0f}}; //TODO } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java index 2c6cb89a4..78e327d8e 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java @@ -43,6 +43,7 @@ public class FileVectorWriter extends AbstractVectorWriter { } targetFile.write("]"); targetFile.write("\n"); + targetFile.flush(); } catch (IOException e) { logger.error(e.getMessage(), e); } From 35ff25aa99430ccfd30bebea3fe6dfd216c1d267 Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Tue, 8 Aug 2023 10:13:23 -0500 Subject: [PATCH 26/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.505 to 1.12.506 (#1433) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.505 to 1.12.506. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index fa043bfa2..8bcf43b15 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.505 + 1.12.506 com.elega9t From d700979b742248725d4e7d1e2025656dd149895d Mon Sep 17 00:00:00 2001 From: Jeff Banks Date: Tue, 8 Aug 2023 10:13:40 -0500 Subject: [PATCH 27/52] fix: upgrade org.apache.kafka:kafka-clients from 3.5.0 to 3.5.1 (#1434) Snyk has created this PR to upgrade org.apache.kafka:kafka-clients from 3.5.0 to 3.5.1. See this package in Maven Repository: https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/bb64938c-e68b-4c38-9e2f-d9d8336c07ee?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- adapter-kafka/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-kafka/pom.xml b/adapter-kafka/pom.xml index 552ea0131..ffb5db38f 100644 --- a/adapter-kafka/pom.xml +++ b/adapter-kafka/pom.xml @@ -34,7 +34,7 @@ - 3.5.0 + 3.5.1 From e5d9f66b5400ba767e36ae53f11406f87c23ca69 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Tue, 8 Aug 2023 10:14:01 -0500 Subject: [PATCH 28/52] fix: upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.505 to 1.12.506 (#1435) Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.505 to 1.12.506. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-dynamodb/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/db3dfb82-467b-4263-94f8-28f933540a6d?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot --- adapter-dynamodb/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-dynamodb/pom.xml b/adapter-dynamodb/pom.xml index d2e8732aa..5243a1348 100644 --- a/adapter-dynamodb/pom.xml +++ b/adapter-dynamodb/pom.xml @@ -43,7 +43,7 @@ com.amazonaws aws-java-sdk-dynamodb - 1.12.505 + 1.12.506 From 1f53f03a5099fea7ca73473aa4c77c515d776b17 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Wed, 9 Aug 2023 10:29:58 -0400 Subject: [PATCH 29/52] cleanup and added support for shorts as ints --- hdf-loader/pom.xml | 6 ++++ .../io/nosqlbench/loader/hdf/HdfLoader.java | 1 - .../embedding/EmbeddingGeneratorFactory.java | 7 ++-- ...erator.java => IntEmbeddingGenerator.java} | 8 ++--- .../embedding/StringEmbeddingGenerator.java | 34 +++++++++++++++++-- .../loader/hdf/readers/Hdf5Reader.java | 7 +--- hdf-loader/src/main/resources/config.yaml | 2 +- 7 files changed, 47 insertions(+), 18 deletions(-) rename hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/{ShortEmbeddingGenerator.java => IntEmbeddingGenerator.java} (89%) diff --git a/hdf-loader/pom.xml b/hdf-loader/pom.xml index 930cc6720..d5ce8a0f8 100644 --- a/hdf-loader/pom.xml +++ b/hdf-loader/pom.xml @@ -78,6 +78,12 @@ 1.0.0-M2.1 + + org.deeplearning4j + deeplearning4j-nlp + 1.0.0-M2.1 + + io.jhdf jhdf diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java index 3e0e8000a..ac0962cd8 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -71,6 +71,5 @@ public class HdfLoader { logger.error(e); System.exit(1); } - } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java index 44a12d8dc..266655232 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java @@ -24,7 +24,8 @@ public class EmbeddingGeneratorFactory { private static final Map generators = new HashMap<>(); public static EmbeddingGenerator getGenerator(String type) { - switch (type.toLowerCase()) { + String typeLower = type.equalsIgnoreCase("short") ? "int" : type.toLowerCase(); + switch (typeLower) { case "string" -> { if (!generators.containsKey(type)) { generators.put(type, new StringEmbeddingGenerator()); @@ -37,9 +38,9 @@ public class EmbeddingGeneratorFactory { } return generators.get(type); } - case "short" -> { + case "int" -> { if (!generators.containsKey(type)) { - generators.put(type, new ShortEmbeddingGenerator()); + generators.put(type, new IntEmbeddingGenerator()); } return generators.get(type); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/IntEmbeddingGenerator.java similarity index 89% rename from hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java rename to hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/IntEmbeddingGenerator.java index 16775cef8..e5de526ad 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/ShortEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/IntEmbeddingGenerator.java @@ -17,14 +17,14 @@ package io.nosqlbench.loader.hdf.embedding; -public class ShortEmbeddingGenerator implements EmbeddingGenerator { +public class IntEmbeddingGenerator implements EmbeddingGenerator { @Override public float[][] generateEmbeddingFrom(Object o, int[] dims) { switch (dims.length) { case 1 -> { float[] arr = new float[dims[0]]; for (int i = 0; i < dims[0]; i++) { - arr[i] = ((short[]) o)[i]; + arr[i] = ((int[]) o)[i]; } return new float[][]{arr}; } @@ -32,7 +32,7 @@ public class ShortEmbeddingGenerator implements EmbeddingGenerator { float[][] arr = new float[dims[0]][dims[1]]; for (int i = 0; i < dims[0]; i++) { for (int j = 0; j < dims[1]; j++) { - arr[i][j] = ((short[][]) o)[i][j]; + arr[i][j] = ((int[][]) o)[i][j]; } } return arr; @@ -46,7 +46,7 @@ public class ShortEmbeddingGenerator implements EmbeddingGenerator { } private float[][] flatten(Object o, int[] dims) { - short[][][] arr = (short[][][]) o; + int[][][] arr = (int[][][]) o; float[][] flat = new float[dims[0]][dims[1] * dims[2]]; for (int i = 0; i < dims[0]; i++) { for (int j = 0; j < dims[1]; j++) { diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java index 8b8a72c98..f72845cf6 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/StringEmbeddingGenerator.java @@ -17,13 +17,41 @@ package io.nosqlbench.loader.hdf.embedding; -//import org.deeplearning4j.models.word2vec.Word2Vec; -//import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory; +import org.deeplearning4j.models.word2vec.Word2Vec; +import org.deeplearning4j.text.sentenceiterator.BasicLineIterator; +import org.deeplearning4j.text.sentenceiterator.CollectionSentenceIterator; +import org.deeplearning4j.text.sentenceiterator.SentenceIterator; +import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory; +import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; + +import java.util.Arrays; public class StringEmbeddingGenerator implements EmbeddingGenerator { + private TokenizerFactory tokenizerFactory= new DefaultTokenizerFactory(); @Override public float[][] generateEmbeddingFrom(Object o, int[] dims) { - return new float[][]{{0.0f, 1.0f},{1.0f, 0.0f}}; //TODO + switch (dims.length) { + case 1 -> { + return generateWordEmbeddings((String[]) o); + } + default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length); + } + } + + private float[][] generateWordEmbeddings(String[] text) { + SentenceIterator iter = new CollectionSentenceIterator(Arrays.asList(text)); + /*Word2Vec vec = new Word2Vec.Builder() + .minWordFrequency(1) + .iterations(1) + .layerSize(targetDims) + .seed(42) + .windowSize(5) + .iterate(iter) + .tokenizerFactory(tokenizerFactory) + .build(); +*/ + return null; + } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 126611bfa..010de23d8 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -88,14 +88,9 @@ public class Hdf5Reader implements HdfReader { Future future = executorService.submit(() -> { logger.info("Processing dataset: " + ds); Dataset dataset = hdfFile.getDatasetByPath(ds); - DataType dataType = dataset.getDataType(); - int[] dims = dataset.getDimensions(); - Object data = dataset.getData(); - - String type = dataset.getJavaType().getSimpleName(); EmbeddingGenerator generator = getGenerator(dataset.getJavaType().getSimpleName()); - float[][] vectors = generator.generateEmbeddingFrom(data, dims); + float[][] vectors = generator.generateEmbeddingFrom(dataset.getData(), dims); for (int i = 0; i < dims[0]; i++) { try { queue.put(vectors[i]); diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index f6d0ae928..e854cae52 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,5 +1,5 @@ format: HDF5 -sourceFile: /home/mwolters138/Downloads/NEONDSImagingSpectrometerData.h5 #h5ex_t_arrayatt.h5 +sourceFile: /home/mwolters138/Documents/hdf5/datasets/deep-image-96-angular.hdf5 datasets: - all embedding: word2vec From ba6f1db2d7585b9f80ceb7dfc4bf5d7f7b452651 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 9 Aug 2023 18:12:03 +0000 Subject: [PATCH 30/52] fix: upgrade org.graalvm.js:js-scriptengine from 22.3.2 to 22.3.3 Snyk has created this PR to upgrade org.graalvm.js:js-scriptengine from 22.3.2 to 22.3.3. See this package in Maven Repository: https://mvnrepository.com/artifact/org.graalvm.js/js-scriptengine/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/6a966a50-08ee-405a-ae9a-1cfab95ff2c5?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..2a2318c5e 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -408,7 +408,7 @@ org.graalvm.js js-scriptengine - 22.3.2 + 22.3.3 org.graalvm.tools From 0c2522b0f54112581e0b7b7b82443eb4e46c76b2 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 9 Aug 2023 18:12:07 +0000 Subject: [PATCH 31/52] fix: upgrade org.graalvm.tools:profiler from 22.3.2 to 22.3.3 Snyk has created this PR to upgrade org.graalvm.tools:profiler from 22.3.2 to 22.3.3. See this package in Maven Repository: https://mvnrepository.com/artifact/org.graalvm.tools/profiler/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/6a966a50-08ee-405a-ae9a-1cfab95ff2c5?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..165336860 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -413,7 +413,7 @@ org.graalvm.tools profiler - 22.3.2 + 22.3.3 runtime From a40681bc067a1a0f7594e5d4bf828645d4177a54 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 9 Aug 2023 19:44:14 +0000 Subject: [PATCH 32/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.506 to 1.12.507 Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.506 to 1.12.507. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..03bda85cc 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.506 + 1.12.507 com.elega9t From 570f96ddb5aa32de92ab9926ff4b4cf41aa53d28 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 9 Aug 2023 19:44:18 +0000 Subject: [PATCH 33/52] fix: upgrade org.graalvm.sdk:graal-sdk from 22.3.2 to 22.3.3 Snyk has created this PR to upgrade org.graalvm.sdk:graal-sdk from 22.3.2 to 22.3.3. See this package in Maven Repository: https://mvnrepository.com/artifact/org.graalvm.sdk/graal-sdk/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..8b231f158 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -397,7 +397,7 @@ org.graalvm.sdk graal-sdk - 22.3.2 + 22.3.3 org.graalvm.js From 1a4af7789b5b843695be94df979fdb0207285758 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 9 Aug 2023 19:56:00 +0000 Subject: [PATCH 34/52] fix: upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.506 to 1.12.507 Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-dynamodb from 1.12.506 to 1.12.507. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-dynamodb/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/db3dfb82-467b-4263-94f8-28f933540a6d?utm_source=github&utm_medium=referral&page=upgrade-pr --- adapter-dynamodb/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-dynamodb/pom.xml b/adapter-dynamodb/pom.xml index 5243a1348..b46909ea0 100644 --- a/adapter-dynamodb/pom.xml +++ b/adapter-dynamodb/pom.xml @@ -43,7 +43,7 @@ com.amazonaws aws-java-sdk-dynamodb - 1.12.506 + 1.12.507 From 54471402c3521df2ef914442ff1915d03e2e29c2 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Wed, 9 Aug 2023 16:57:21 -0400 Subject: [PATCH 35/52] added slicing for large datasets --- .../io/nosqlbench/loader/hdf/HdfLoader.java | 3 ++ .../loader/hdf/readers/Hdf5Reader.java | 53 ++++++++++++++----- .../loader/hdf/writers/NoopVectorWriter.java | 33 ++++++++++++ hdf-loader/src/main/resources/config.yaml | 4 +- 4 files changed, 79 insertions(+), 14 deletions(-) create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java index ac0962cd8..d4c05a85d 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -22,6 +22,7 @@ import io.nosqlbench.loader.hdf.readers.Hdf5Reader; import io.nosqlbench.loader.hdf.readers.HdfReader; import io.nosqlbench.loader.hdf.writers.AstraVectorWriter; import io.nosqlbench.loader.hdf.writers.FileVectorWriter; +import io.nosqlbench.loader.hdf.writers.NoopVectorWriter; import io.nosqlbench.loader.hdf.writers.VectorWriter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -30,6 +31,7 @@ public class HdfLoader { private static final Logger logger = LogManager.getLogger(HdfLoader.class); public static final String FILEWRITER = "filewriter"; public static final String ASTRA = "astra"; + public static final String NOOP = "noop"; public static final String HDF5 = "hdf5"; public static final String HDF4 = "hdf4"; @@ -60,6 +62,7 @@ public class HdfLoader { switch (writerType.toLowerCase()) { case FILEWRITER -> writer = new FileVectorWriter(config); case ASTRA -> writer = new AstraVectorWriter(config); + case NOOP -> writer = new NoopVectorWriter(); default -> { logger.info("Unknown writer type: " + writerType); System.exit(1); diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index 010de23d8..bbca714c0 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -21,7 +21,6 @@ import io.jhdf.HdfFile; import io.jhdf.api.Dataset; import io.jhdf.api.Group; import io.jhdf.api.Node; -import io.jhdf.object.datatype.DataType; import io.nosqlbench.loader.hdf.config.LoaderConfig; import io.nosqlbench.loader.hdf.embedding.EmbeddingGenerator; import io.nosqlbench.loader.hdf.writers.VectorWriter; @@ -57,8 +56,6 @@ public class Hdf5Reader implements HdfReader { public void setWriter(VectorWriter writer) { this.writer = writer; writer.setQueue(queue); - Thread t = new Thread(writer); - t.start(); } public void extractDatasets(Group parent) { @@ -66,8 +63,9 @@ public class Hdf5Reader implements HdfReader { for (String key : nodes.keySet()) { Node node = nodes.get(key); if (node instanceof Dataset) { - datasets.add(((Dataset)node).getPath()); - } else if (node.isGroup()) { + datasets.add(node.getPath()); + } + else if (node.isGroup()) { extractDatasets((Group) node); } } @@ -81,6 +79,7 @@ public class Hdf5Reader implements HdfReader { extractDatasets(hdfFile); } List> futures = new ArrayList<>(); + Future writerFuture = executorService.submit(writer); for (String ds : datasets) { if (ds.equalsIgnoreCase(ALL)) { continue; @@ -89,13 +88,42 @@ public class Hdf5Reader implements HdfReader { logger.info("Processing dataset: " + ds); Dataset dataset = hdfFile.getDatasetByPath(ds); int[] dims = dataset.getDimensions(); - EmbeddingGenerator generator = getGenerator(dataset.getJavaType().getSimpleName()); - float[][] vectors = generator.generateEmbeddingFrom(dataset.getData(), dims); - for (int i = 0; i < dims[0]; i++) { - try { - queue.put(vectors[i]); - } catch (InterruptedException e) { - logger.error(e.getMessage(), e); + String type = dataset.getJavaType().getSimpleName().toLowerCase(); + EmbeddingGenerator generator = getGenerator(type); + Object data; + if (dataset.getSizeInBytes() > Integer.MAX_VALUE) { + // TODO: For now this will be implemented to handle numeric types with + // 2 dimensions where the 1st dimension is the number of vectors and the 2nd + // dimension is the number of dimensions in the vector. + long[] sliceOffset = new long[dims.length]; + int[] sliceDimensions = new int[dims.length]; + sliceDimensions[1] = dims[1]; + int noOfSlices = (int) (dataset.getSizeInBytes() / Integer.MAX_VALUE) + 1; + int sliceSize = dims[0] / noOfSlices; + for (int i = 0; i < noOfSlices; i++) { + sliceOffset[0] = (long) i * sliceSize; + sliceDimensions[0] = sliceSize; + data = dataset.getData(sliceOffset, sliceDimensions); + float[][] vectors = generator.generateEmbeddingFrom(data, dims); + for (float[] vector : vectors) { + try { + queue.put(vector); + } catch (InterruptedException e) { + logger.error(e.getMessage(), e); + } + } + } + } else { + data = dataset.getData(); + float[][] vectors = generator.generateEmbeddingFrom(data, dims); + int i = 1; + for (float[] vector : vectors) { + i++; + try { + queue.put(vector); + } catch (InterruptedException e) { + logger.error(e.getMessage(), e); + } } } }); @@ -110,5 +138,6 @@ public class Hdf5Reader implements HdfReader { } hdfFile.close(); writer.shutdown(); + executorService.shutdown(); } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java new file mode 100644 index 000000000..afb0789f4 --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.writers; + +import java.util.concurrent.LinkedBlockingQueue; + +public class NoopVectorWriter extends AbstractVectorWriter { + + @Override + protected void writeVector(float[] vector) { + //No-op + } + + @Override + public void shutdown() { + shutdown = true; + } +} diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index e854cae52..c8e2e1ea5 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,9 +1,9 @@ format: HDF5 -sourceFile: /home/mwolters138/Documents/hdf5/datasets/deep-image-96-angular.hdf5 +sourceFile: /home/mwolters138/Documents/hdf5/datasets/pass/glove-25-angular.hdf5 datasets: - all embedding: word2vec -writer: filewriter +writer: noop astra: scb: /home/mwolters138/Dev/testing/secure-connect-vector-correctness.zip clientId: IvpdaZejwNuvWeupsIkWTHeL From 7109c4d09f64431fda86e09e4b39a48b7f328c2b Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Thu, 10 Aug 2023 09:16:57 -0400 Subject: [PATCH 36/52] adding binding function for hdf5 files --- virtdata-lib-basics/pom.xml | 11 +++ .../from_long/to_vector/HdfFileToVector.java | 68 +++++++++++++++++++ .../embedding/EmbeddingGenerator.java | 24 +++++++ .../embedding/EmbeddingGeneratorFactory.java | 44 ++++++++++++ .../embedding/FloatEmbeddingGenerator.java | 35 ++++++++++ .../embedding/IntEmbeddingGenerator.java | 33 +++++++++ 6 files changed, 215 insertions(+) create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java diff --git a/virtdata-lib-basics/pom.xml b/virtdata-lib-basics/pom.xml index 1d069da92..468823d60 100644 --- a/virtdata-lib-basics/pom.xml +++ b/virtdata-lib-basics/pom.xml @@ -82,6 +82,17 @@ 5.1.1 test + + io.jhdf + jhdf + 0.6.10 + + + io.jhdf + jhdf + 0.6.10 + compile + diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java new file mode 100644 index 000000000..7481f2c57 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; + +import io.jhdf.HdfFile; +import io.jhdf.api.Dataset; +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding.EmbeddingGenerator; +import io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding.EmbeddingGeneratorFactory; + +import java.nio.file.Paths; +import java.util.List; +import java.util.function.LongFunction; + +/** + * This function reads a vector dataset from an HDF5 file. The dataset itself is not + * read into memory, only the metadata (the "dataset" Java Object). The lambda function + * reads a single vector from the dataset, based on the long input value. As currently + * written this class will only work for datasets with 2 dimensions where the 1st dimension + * specifies the number of vectors and the 2nd dimension specifies the number of elements in + * each vector. Only datatypes short, int, and float are supported at this time. + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class HdfFileToVector implements LongFunction> { + private final HdfFile hdfFile; + private final Dataset dataset; + private final int[] dims; + private final EmbeddingGenerator embeddingGenerator; + + public HdfFileToVector(String filename, String datasetName) { + hdfFile = new HdfFile(Paths.get(filename)); + //TODO: implement a function to get the dataset by name only without needing the full path + dataset = hdfFile.getDatasetByPath(datasetName); + dims = dataset.getDimensions(); + embeddingGenerator = EmbeddingGeneratorFactory.getGenerator(dataset.getJavaType().getSimpleName().toLowerCase()); + } + @Override + public List apply(long l) { + long[] sliceOffset = new long[dims.length]; + sliceOffset[0] = (l % dims[0]); + int[] sliceDimensions = new int[dims.length]; + sliceDimensions[0] = 1; + // Do we want to give the option of reducing vector dimensions here? + sliceDimensions[1] = dims[1]; + Object data = dataset.getData(sliceOffset, sliceDimensions); + + return embeddingGenerator.generateEmbeddingFrom(data, dims); + } + +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java new file mode 100644 index 000000000..6bd94a12b --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; + +import java.util.List; + +public interface EmbeddingGenerator { + List generateEmbeddingFrom(Object o, int[] dims); +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java new file mode 100644 index 000000000..3f7eac827 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; + +import java.util.HashMap; +import java.util.Map; + +public class EmbeddingGeneratorFactory { + private static final Map generators = new HashMap<>(); + + public static EmbeddingGenerator getGenerator(String type) { + String typeLower = type.equalsIgnoreCase("short") ? "int" : type.toLowerCase(); + switch (typeLower) { + case "float" -> { + if (!generators.containsKey(type)) { + generators.put(type, new FloatEmbeddingGenerator()); + } + return generators.get(type); + } + case "int" -> { + if (!generators.containsKey(type)) { + generators.put(type, new IntEmbeddingGenerator()); + } + return generators.get(type); + } + default -> throw new RuntimeException("Unknown embedding type: " + type); + } + } +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java new file mode 100644 index 000000000..c5676327b --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; + +import java.util.List; + +public class FloatEmbeddingGenerator implements EmbeddingGenerator { + + @Override + public List generateEmbeddingFrom(Object o, int[] dims) { + // in this case o will always be float[1][x] + float[] vector = ((float[][]) o)[0]; + Float[] vector2 = new Float[vector.length]; + for (int i = 0; i < vector.length; i++) { + vector2[i] = vector[i]; + } + return List.of(vector2); + } + +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java new file mode 100644 index 000000000..d3a88c0b8 --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; + +import java.util.List; + +public class IntEmbeddingGenerator implements EmbeddingGenerator { + @Override + public List generateEmbeddingFrom(Object o, int[] dims) { + // in this case o will always be int[1][x] + int[] vector = ((int[][]) o)[0]; + Float[] vector2 = new Float[vector.length]; + for (int i = 0; i < vector.length; i++) { + vector2[i] = (float) vector[i]; + } + return List.of(vector2); + } +} From c68cb4b3d345484936e6e6cd02199ee2ff296786 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Fri, 11 Aug 2023 19:23:31 +0000 Subject: [PATCH 37/52] fix: upgrade io.netty:netty-handler from 4.1.94.Final to 4.1.95.Final Snyk has created this PR to upgrade io.netty:netty-handler from 4.1.94.Final to 4.1.95.Final. See this package in Maven Repository: https://mvnrepository.com/artifact/io.netty/netty-handler/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/3e4c0e79-b7a9-4806-b967-55079b66e71e?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..54cbdec0c 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -209,7 +209,7 @@ io.netty netty-handler - 4.1.94.Final + 4.1.95.Final io.netty From f7f0e6f73212a8d470f6492bf564ba38924325a2 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Sun, 13 Aug 2023 03:48:42 +0000 Subject: [PATCH 38/52] fix: upgrade org.xerial.snappy:snappy-java from 1.1.10.2 to 1.1.10.3 Snyk has created this PR to upgrade org.xerial.snappy:snappy-java from 1.1.10.2 to 1.1.10.3. See this package in Maven Repository: https://mvnrepository.com/artifact/org.xerial.snappy/snappy-java/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/73081f87-c1b7-4c91-b407-1cb299565642?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..ce19f6a9c 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -184,7 +184,7 @@ org.xerial.snappy snappy-java - 1.1.10.2 + 1.1.10.3 com.datastax.oss @@ -199,7 +199,7 @@ org.xerial.snappy snappy-java - 1.1.10.2 + 1.1.10.3 com.esri.geometry From 6685ae8e2eaf49cbd5f2900f32fd04e52fb05c0e Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Sun, 13 Aug 2023 04:54:44 +0000 Subject: [PATCH 39/52] fix: upgrade org.glassfish.jersey.core:jersey-common from 3.1.2 to 3.1.3 Snyk has created this PR to upgrade org.glassfish.jersey.core:jersey-common from 3.1.2 to 3.1.3. See this package in Maven Repository: https://mvnrepository.com/artifact/org.glassfish.jersey.core/jersey-common/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/99d5f5ce-a2bd-4e05-9eff-86de84bd0b9e?utm_source=github&utm_medium=referral&page=upgrade-pr --- docsys/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docsys/pom.xml b/docsys/pom.xml index 9585b1405..0101bc59b 100644 --- a/docsys/pom.xml +++ b/docsys/pom.xml @@ -22,7 +22,7 @@ docsys http://nosqlbench.io/ - 3.1.2 + 3.1.3 From 0c577e5b6f810d8f5a5c4789ca35c57859098a2e Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Tue, 15 Aug 2023 02:08:38 +0000 Subject: [PATCH 40/52] fix: upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.13.0 Snyk has created this PR to upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.13.0. See this package in Maven Repository: https://mvnrepository.com/artifact/org.apache.commons/commons-lang3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/bb64938c-e68b-4c38-9e2f-d9d8336c07ee?utm_source=github&utm_medium=referral&page=upgrade-pr --- adapter-kafka/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-kafka/pom.xml b/adapter-kafka/pom.xml index ffb5db38f..7bd4f07c7 100644 --- a/adapter-kafka/pom.xml +++ b/adapter-kafka/pom.xml @@ -60,7 +60,7 @@ org.apache.commons commons-lang3 - 3.12.0 + 3.13.0 From 2502a970b6697f74ac4d2c8fc3629489f865f1d8 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 15 Aug 2023 10:08:33 -0400 Subject: [PATCH 41/52] code cleanup and fixed issue with executable not exiting --- .../io/nosqlbench/loader/hdf/HdfLoader.java | 8 ++++++- .../loader/hdf/readers/Hdf5Reader.java | 11 +++++++--- .../loader/hdf/writers/AstraVectorWriter.java | 10 ++++++--- .../loader/hdf/writers/FileVectorWriter.java | 1 + .../loader/hdf/writers/NoopVectorWriter.java | 5 ++++- hdf-loader/src/main/resources/config.yaml | 6 +++--- .../to_vector/HdfFileToVectorTest.java | 21 +++++++++++++++++++ 7 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java index d4c05a85d..3749e7802 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/HdfLoader.java @@ -42,6 +42,7 @@ public class HdfLoader { } try { LoaderConfig config = new LoaderConfig(args[0]); + logger.info("Starting loader with config: " + config); HdfReader reader = null; VectorWriter writer = null; @@ -51,7 +52,10 @@ public class HdfLoader { logger.info("HDF4 format not yet supported"); System.exit(1); } - case HDF5 -> reader = new Hdf5Reader(config); + case HDF5 -> { + logger.info("HDF5 format selected"); + reader = new Hdf5Reader(config); + } default -> { logger.info("Unknown format: " + format); System.exit(1); @@ -59,6 +63,7 @@ public class HdfLoader { } String writerType = config.getWriter(); + logger.info("Using writer type: " + writerType); switch (writerType.toLowerCase()) { case FILEWRITER -> writer = new FileVectorWriter(config); case ASTRA -> writer = new AstraVectorWriter(config); @@ -69,6 +74,7 @@ public class HdfLoader { } } reader.setWriter(writer); + logger.info("Starting main read loop"); reader.read(); } catch (Exception e) { logger.error(e); diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java index bbca714c0..232207919 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/readers/Hdf5Reader.java @@ -46,6 +46,7 @@ public class Hdf5Reader implements HdfReader { private final ExecutorService executorService; private final LinkedBlockingQueue queue; private List datasets; + private final float[] SHUTDOWN = new float[0]; public Hdf5Reader(LoaderConfig config) { this.config = config; executorService = Executors.newCachedThreadPool(); @@ -79,7 +80,7 @@ public class Hdf5Reader implements HdfReader { extractDatasets(hdfFile); } List> futures = new ArrayList<>(); - Future writerFuture = executorService.submit(writer); + executorService.submit(writer); for (String ds : datasets) { if (ds.equalsIgnoreCase(ALL)) { continue; @@ -92,6 +93,7 @@ public class Hdf5Reader implements HdfReader { EmbeddingGenerator generator = getGenerator(type); Object data; if (dataset.getSizeInBytes() > Integer.MAX_VALUE) { + logger.info("slicing large dataset: " + ds); // TODO: For now this will be implemented to handle numeric types with // 2 dimensions where the 1st dimension is the number of vectors and the 2nd // dimension is the number of dimensions in the vector. @@ -116,9 +118,7 @@ public class Hdf5Reader implements HdfReader { } else { data = dataset.getData(); float[][] vectors = generator.generateEmbeddingFrom(data, dims); - int i = 1; for (float[] vector : vectors) { - i++; try { queue.put(vector); } catch (InterruptedException e) { @@ -138,6 +138,11 @@ public class Hdf5Reader implements HdfReader { } hdfFile.close(); writer.shutdown(); + try { + queue.put(SHUTDOWN); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } executorService.shutdown(); } } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java index a272ad1ef..120567af8 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/AstraVectorWriter.java @@ -21,12 +21,15 @@ import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.cql.PreparedStatement; import com.datastax.oss.driver.api.core.data.CqlVector; import io.nosqlbench.loader.hdf.config.LoaderConfig; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.nio.file.Paths; import java.util.Map; public class AstraVectorWriter extends AbstractVectorWriter { - private CqlSession session; + private static final Logger logger = LogManager.getLogger(AstraVectorWriter.class); + private final CqlSession session; PreparedStatement insert_vector; public AstraVectorWriter(LoaderConfig config) { @@ -36,9 +39,10 @@ public class AstraVectorWriter extends AbstractVectorWriter { .withAuthCredentials(astraParams.get("clientId"), astraParams.get("clientSecret")) .withKeyspace(astraParams.get("keyspace")) .build(); + logger.info("Astra session initialized"); insert_vector = session.prepare(astraParams.get("query")); } - +//TODO: this is insanely slow. Needs work on threading/batching @Override protected void writeVector(float[] vector) { Float[] vector2 = new Float[vector.length]; @@ -46,7 +50,7 @@ public class AstraVectorWriter extends AbstractVectorWriter { vector2[i] = vector[i]; } CqlVector.Builder vectorBuilder = CqlVector.builder(); - vectorBuilder.add((Object[]) vector2); + vectorBuilder.add(vector2); session.execute(insert_vector.bind(getPartitionValue(vector), vectorBuilder.build())); } diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java index 78e327d8e..a1eacdb3f 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/FileVectorWriter.java @@ -29,6 +29,7 @@ public class FileVectorWriter extends AbstractVectorWriter { public FileVectorWriter(LoaderConfig config) throws IOException { String targetFileName = config.getTargetFile(); targetFile = new BufferedWriter(new FileWriter(targetFileName)); + logger.info("Writing to file: " + targetFileName); } @Override diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java index afb0789f4..15e62f067 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/writers/NoopVectorWriter.java @@ -17,13 +17,16 @@ package io.nosqlbench.loader.hdf.writers; -import java.util.concurrent.LinkedBlockingQueue; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; public class NoopVectorWriter extends AbstractVectorWriter { + private static final Logger logger = LogManager.getLogger(NoopVectorWriter.class); @Override protected void writeVector(float[] vector) { //No-op + logger.debug(vector); } @Override diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index c8e2e1ea5..d7c1c9dfd 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -3,11 +3,11 @@ sourceFile: /home/mwolters138/Documents/hdf5/datasets/pass/glove-25-angular.hdf5 datasets: - all embedding: word2vec -writer: noop +writer: filewriter astra: scb: /home/mwolters138/Dev/testing/secure-connect-vector-correctness.zip clientId: IvpdaZejwNuvWeupsIkWTHeL clientSecret: .bxut2-OQL,dWunZeQbjZC0vMHd88UWXKS.xT,nl95zQC0B0xU9FzSWK3HSUGO11o_7pr7wG7+EMaZqegkKlr4fZ54__furPMtWPGiPp,2cZ1q15vrWwc9_-AcgeCbuf - keyspace: baselines768dot - query: INSERT INTO vectors(key, value) VALUES (?,?) + keyspace: baselines128dot + query: INSERT INTO vectors25(key, value) VALUES (?,?) targetFile: /home/mwolters138/vectors.txt diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java new file mode 100644 index 000000000..28dcfbc30 --- /dev/null +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; + +public class HdfFileToVectorTest { +} From f53870d58b51342fc16990b94ca6e663dd4f1a87 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 15 Aug 2023 12:08:07 -0400 Subject: [PATCH 42/52] added support for doubles --- .../embedding/DoubleEmbeddingGenerator.java | 63 +++++++++++++++++++ .../embedding/EmbeddingGeneratorFactory.java | 7 +++ .../embedding/FloatEmbeddingGenerator.java | 14 ++--- hdf-loader/src/main/resources/config.yaml | 2 +- 4 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java new file mode 100644 index 000000000..d4ba8545e --- /dev/null +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/DoubleEmbeddingGenerator.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.loader.hdf.embedding; + +public class DoubleEmbeddingGenerator implements EmbeddingGenerator { + + @Override + public float[][] generateEmbeddingFrom(Object o, int[] dims) { + return switch (dims.length) { + case 1 -> new float[][]{convertToFloat((double[]) o)}; + case 2 -> convertToFloats((double[][]) o); + case 3 -> flatten(o, dims); + default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length); + }; + } + + private float[][] convertToFloats(double[][] o) { + float[][] floats = new float[o.length][]; + for (int i = 0; i < o.length; i++) { + floats[i] = convertToFloat(o[i]); + } + return floats; + } + + public float[] convertToFloat(double[] doubleArray) { + if (doubleArray == null) { + return null; + } + float[] floatArray = new float[doubleArray.length]; + for (int i = 0; i < doubleArray.length; i++) { + floatArray[i] = (float) doubleArray[i]; + } + return floatArray; + } + + private float[][] flatten(Object o, int[] dims) { + double[][][] arr = (double[][][]) o; + float[][] flat = new float[dims[0]][dims[1] * dims[2]]; + for (int i = 0; i < dims[0]; i++) { + for (int j = 0; j < dims[1]; j++) { + for (int k = 0; k < dims[2]; k++) { + flat[i][j * dims[2] + k] = (float)arr[i][j][k]; + } + } + } + return flat; + } +} diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java index 266655232..6a54b2fdd 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/EmbeddingGeneratorFactory.java @@ -25,6 +25,7 @@ public class EmbeddingGeneratorFactory { public static EmbeddingGenerator getGenerator(String type) { String typeLower = type.equalsIgnoreCase("short") ? "int" : type.toLowerCase(); + if (typeLower.equals("integer")) typeLower = "int"; switch (typeLower) { case "string" -> { if (!generators.containsKey(type)) { @@ -38,6 +39,12 @@ public class EmbeddingGeneratorFactory { } return generators.get(type); } + case "double" -> { + if (!generators.containsKey(type)) { + generators.put(type, new DoubleEmbeddingGenerator()); + } + return generators.get(type); + } case "int" -> { if (!generators.containsKey(type)) { generators.put(type, new IntEmbeddingGenerator()); diff --git a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java index a6614eac6..92aa2045e 100644 --- a/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java +++ b/hdf-loader/src/main/java/io/nosqlbench/loader/hdf/embedding/FloatEmbeddingGenerator.java @@ -21,14 +21,12 @@ public class FloatEmbeddingGenerator implements EmbeddingGenerator { @Override public float[][] generateEmbeddingFrom(Object o, int[] dims) { - switch (dims.length) { - case 1: - return new float[][]{new float[]{(float) o}}; - case 2: return (float[][]) o; - case 3: return flatten(o, dims); - default: - throw new RuntimeException("unsupported embedding dimensionality: " + dims.length); - } + return switch (dims.length) { + case 1 -> new float[][]{(float[]) o}; + case 2 -> (float[][]) o; + case 3 -> flatten(o, dims); + default -> throw new RuntimeException("unsupported embedding dimensionality: " + dims.length); + }; } private float[][] flatten(Object o, int[] dims) { diff --git a/hdf-loader/src/main/resources/config.yaml b/hdf-loader/src/main/resources/config.yaml index d7c1c9dfd..c3e2338de 100644 --- a/hdf-loader/src/main/resources/config.yaml +++ b/hdf-loader/src/main/resources/config.yaml @@ -1,5 +1,5 @@ format: HDF5 -sourceFile: /home/mwolters138/Documents/hdf5/datasets/pass/glove-25-angular.hdf5 +sourceFile: /home/mwolters138/Downloads/h5ex_t_float.h5 #/home/mwolters138/Documents/hdf5/datasets/pass/glove-25-angular.hdf5 datasets: - all embedding: word2vec From eeb6cb6b608d0c1db263c1aa640b32ed04de9c24 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 15 Aug 2023 12:31:01 -0400 Subject: [PATCH 43/52] adding unit test for hdf5 file binding --- .../embedding/DoubleEmbeddingGenerator.java | 35 ++++++++++++++++++ .../embedding/EmbeddingGeneratorFactory.java | 6 +++ .../to_vector/HdfFileToVectorTest.java | 24 ++++++++++++ .../src/test/resources/data/h5ex_t_float.h5 | Bin 0 -> 2368 bytes 4 files changed, 65 insertions(+) create mode 100644 virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java create mode 100644 virtdata-lib-basics/src/test/resources/data/h5ex_t_float.h5 diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java new file mode 100644 index 000000000..7846d0cda --- /dev/null +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; + +import java.util.List; + +public class DoubleEmbeddingGenerator implements EmbeddingGenerator { + + @Override + public List generateEmbeddingFrom(Object o, int[] dims) { + // in this case o will always be double[1][x] + double[] vector = ((double[][]) o)[0]; + Float[] vector2 = new Float[vector.length]; + for (int i = 0; i < vector.length; i++) { + vector2[i] = (float) vector[i]; + } + return List.of(vector2); + } + +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java index 3f7eac827..2e7b029b0 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java +++ b/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java @@ -38,6 +38,12 @@ public class EmbeddingGeneratorFactory { } return generators.get(type); } + case "double" -> { + if (!generators.containsKey(type)) { + generators.put(type, new DoubleEmbeddingGenerator()); + } + return generators.get(type); + } default -> throw new RuntimeException("Unknown embedding type: " + type); } } diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java index 28dcfbc30..e54db114a 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java @@ -17,5 +17,29 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; +import org.junit.jupiter.api.Test; + +import java.util.List; + public class HdfFileToVectorTest { + @Test + public void testHdfFileToVector() { + final float[][] results = new float[][]{ + {0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f}, + {2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f}, + {4.0f,2.3333333f,2.8f,3.5714285f,4.4444447f,5.3636365f,6.3076925f}, + {6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f} + }; + + HdfFileToVector hdfFileToVector = new HdfFileToVector( + "src/test/resources/data/h5ex_t_float.h5", "/DS1"); + + List read; + for (int i = 0; i < 4; i++) { + read = hdfFileToVector.apply(i); + for (int j = 0; j < 7; j++) { + assert (read.get(j) == results[i][j]); + } + } + } } diff --git a/virtdata-lib-basics/src/test/resources/data/h5ex_t_float.h5 b/virtdata-lib-basics/src/test/resources/data/h5ex_t_float.h5 new file mode 100644 index 0000000000000000000000000000000000000000..9c8cb981d838cbb655ead25873f111818a976edf GIT binary patch literal 2368 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@p0tZfr2#gPtPk=HQp>zk7Ucm%mFfxE31A_!q zTo7tLy1I}cS62q0N|^aD8mf)KfCa+hfC-G!BPs+uTpa^I9*%(e8kR~=K+_p4FkFHS z!Aw|s^ngi_Ni#CAfzvO9U|?Wn-~cn3n3%v;FauLIh#{z850n7`2L=;v29y8HU=C0Y zlJXg$?t~^|26l)vP!B{uGh+fYX+MCn`$UV5mTmr#C0q&))@$*Ly7hL6INa=1(4pj_ xt9WdQsKZ8KYao(DvzNnR)=UuKbl9GIdt2^pKB##@4*FmV#T>do*4>bF000RoZ^i%s literal 0 HcmV?d00001 From b02df0f03a3b3335821bb6539827329d2d5368cd Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 15 Aug 2023 12:40:16 -0400 Subject: [PATCH 44/52] removed duplicate entry in pom.xml --- virtdata-lib-basics/pom.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/virtdata-lib-basics/pom.xml b/virtdata-lib-basics/pom.xml index 468823d60..c9d9c9a2b 100644 --- a/virtdata-lib-basics/pom.xml +++ b/virtdata-lib-basics/pom.xml @@ -87,12 +87,7 @@ jhdf 0.6.10 - - io.jhdf - jhdf - 0.6.10 - compile - + From b3c51f11ed550952a2de547a2312361f34887497 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 15 Aug 2023 13:01:38 -0400 Subject: [PATCH 45/52] fixed test issue causing build to fail --- .../to_vector/HdfFileToVectorTest.java | 23 ++++++++++++++++-- .../src/test/resources/data/h5ex_t_float.h5 | Bin 2368 -> 0 bytes 2 files changed, 21 insertions(+), 2 deletions(-) delete mode 100644 virtdata-lib-basics/src/test/resources/data/h5ex_t_float.h5 diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java index e54db114a..264a42159 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java +++ b/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java @@ -17,13 +17,23 @@ package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; +import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.Test; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; import java.util.List; +import static com.datastax.oss.protocol.internal.ProtocolConstants.ErrorCode.READ_TIMEOUT; +import static org.apache.logging.log4j.core.impl.ThrowableFormatOptions.FILE_NAME; + public class HdfFileToVectorTest { + private static final int CONNECT_TIMEOUT = 100; + @Test - public void testHdfFileToVector() { + public void testHdfFileToVector() throws IOException { final float[][] results = new float[][]{ {0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f}, {2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f}, @@ -31,8 +41,15 @@ public class HdfFileToVectorTest { {6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f} }; + FileUtils.copyURLToFile( + new URL("https://support.hdfgroup.org/ftp/HDF5/examples/files/exbyapi/h5ex_t_float.h5"), + new File("h5ex_t_float.h5"), + CONNECT_TIMEOUT, + READ_TIMEOUT); + HdfFileToVector hdfFileToVector = new HdfFileToVector( - "src/test/resources/data/h5ex_t_float.h5", "/DS1"); + "h5ex_t_float.h5", + "/DS1"); List read; for (int i = 0; i < 4; i++) { @@ -41,5 +58,7 @@ public class HdfFileToVectorTest { assert (read.get(j) == results[i][j]); } } + + new File("h5ex_t_float.h5").delete(); } } diff --git a/virtdata-lib-basics/src/test/resources/data/h5ex_t_float.h5 b/virtdata-lib-basics/src/test/resources/data/h5ex_t_float.h5 deleted file mode 100644 index 9c8cb981d838cbb655ead25873f111818a976edf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2368 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@p0tZfr2#gPtPk=HQp>zk7Ucm%mFfxE31A_!q zTo7tLy1I}cS62q0N|^aD8mf)KfCa+hfC-G!BPs+uTpa^I9*%(e8kR~=K+_p4FkFHS z!Aw|s^ngi_Ni#CAfzvO9U|?Wn-~cn3n3%v;FauLIh#{z850n7`2L=;v29y8HU=C0Y zlJXg$?t~^|26l)vP!B{uGh+fYX+MCn`$UV5mTmr#C0q&))@$*Ly7hL6INa=1(4pj_ xt9WdQsKZ8KYao(DvzNnR)=UuKbl9GIdt2^pKB##@4*FmV#T>do*4>bF000RoZ^i%s From e815fb3e6cf39da9f11aa21bafc489a80624bcb4 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Tue, 15 Aug 2023 17:50:50 +0000 Subject: [PATCH 46/52] fix: upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.13.0 Snyk has created this PR to upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.13.0. See this package in Maven Repository: https://mvnrepository.com/artifact/org.apache.commons/commons-lang3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/e1ef410a-5a4c-4ee4-9dd2-47ee3e1ab590?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 3987b53bc..9d1beb4e0 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -341,7 +341,7 @@ org.apache.commons commons-lang3 - 3.12.0 + 3.13.0 com.squareup From c06ac4e06bb5e5185ce498d2fe8fe1a643805f83 Mon Sep 17 00:00:00 2001 From: Jonathan Shook Date: Tue, 15 Aug 2023 13:52:27 -0500 Subject: [PATCH 47/52] stubbing out structure for virtdata-lib-hdf5 --- mvn-defaults/pom.xml | 5 -- pom.xml | 2 + virtdata-lib-hdf5/pom.xml | 64 +++++++++++++++++++ .../virtdata/library/hdf5/Placeholder.java | 23 +++++++ virtdata-userlibs/pom.xml | 7 ++ 5 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 virtdata-lib-hdf5/pom.xml create mode 100644 virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 8bcf43b15..57b386837 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -181,11 +181,6 @@ 1.4.1 - - org.xerial.snappy - snappy-java - 1.1.10.2 - com.datastax.oss java-driver-query-builder diff --git a/pom.xml b/pom.xml index 29573c851..173c73523 100644 --- a/pom.xml +++ b/pom.xml @@ -77,6 +77,7 @@ virtdata-lib-random virtdata-lib-curves4 virtdata-lib-realer + virtdata-lib-hdf5 virtdata-userlibs @@ -125,6 +126,7 @@ virtdata-lib-random virtdata-lib-curves4 virtdata-lib-realer + virtdata-lib-hdf5 virtdata-userlibs diff --git a/virtdata-lib-hdf5/pom.xml b/virtdata-lib-hdf5/pom.xml new file mode 100644 index 000000000..cc4ae7530 --- /dev/null +++ b/virtdata-lib-hdf5/pom.xml @@ -0,0 +1,64 @@ + + + + 4.0.0 + + + mvn-defaults + io.nosqlbench + ${revision} + ../mvn-defaults + + + virtdata-lib-hdf5 + jar + virtdata-lib-hdf5 + http://nosqlbench.io/ + + With inspiration from other libraries + + + + + io.nosqlbench + virtdata-lib-basics + ${revision} + + + + + + + + src/main/java + + **/*.md + **/*.yaml + **/*.txt + + + + src/main/resources + + ** + + + + + + + diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java new file mode 100644 index 000000000..736add3d5 --- /dev/null +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.virtdata.library.hdf5; + +public class Placeholder { + public static void main(String[] args) { + System.out.println("woo"); + } +} diff --git a/virtdata-userlibs/pom.xml b/virtdata-userlibs/pom.xml index 09be05153..c5d144b27 100644 --- a/virtdata-userlibs/pom.xml +++ b/virtdata-userlibs/pom.xml @@ -66,6 +66,13 @@ virtdata-lib-curves4 ${revision} + + + io.nosqlbench + virtdata-lib-hdf5 + ${revision} + + io.nosqlbench docsys From 3456485a3dd6d84023fde37863d78e991981c582 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 16 Aug 2023 00:31:32 +0000 Subject: [PATCH 48/52] fix: upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.13.0 Snyk has created this PR to upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.13.0. See this package in Maven Repository: https://mvnrepository.com/artifact/org.apache.commons/commons-lang3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/829dafa3-4836-462a-b1f4-15be99fe372a?utm_source=github&utm_medium=referral&page=upgrade-pr --- adapter-pulsar/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adapter-pulsar/pom.xml b/adapter-pulsar/pom.xml index 1b5f8e91d..1c6f28c5d 100644 --- a/adapter-pulsar/pom.xml +++ b/adapter-pulsar/pom.xml @@ -66,7 +66,7 @@ org.apache.commons commons-lang3 - 3.12.0 + 3.13.0 From 8943aea9469acf2f7a04624001a5247c3cc4a487 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Wed, 16 Aug 2023 08:21:33 -0400 Subject: [PATCH 49/52] initial checkin of hdf5 binding --- virtdata-lib-basics/pom.xml | 5 -- virtdata-lib-hdf5/pom.xml | 16 ++++- .../virtdata/library/hdf5/Placeholder.java | 23 ------- .../from_long/AbstractHdfFileToVector.java | 48 +++++++++++++++ .../to_array/HdfFileToVectorArray.java | 54 ++++++++++++++++ .../to_list/HdfFileToVectorList.java | 36 ++++------- .../helpers}/DoubleEmbeddingGenerator.java | 14 ++++- .../hdf5/helpers}/EmbeddingGenerator.java | 6 +- .../helpers}/EmbeddingGeneratorFactory.java | 2 +- .../helpers}/FloatEmbeddingGenerator.java | 9 ++- .../hdf5/helpers}/IntEmbeddingGenerator.java | 14 ++++- .../to_array/HdfFileToArrayTest.java | 61 +++++++++++++++++++ .../to_list}/HdfFileToVectorTest.java | 6 +- 13 files changed, 228 insertions(+), 66 deletions(-) delete mode 100644 virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java create mode 100644 virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVector.java create mode 100644 virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToVectorArray.java rename virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java => virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorList.java (56%) rename {virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding => virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers}/DoubleEmbeddingGenerator.java (70%) rename {virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding => virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers}/EmbeddingGenerator.java (78%) rename {virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding => virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers}/EmbeddingGeneratorFactory.java (95%) rename {virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding => virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers}/FloatEmbeddingGenerator.java (79%) rename {virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding => virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers}/IntEmbeddingGenerator.java (69%) create mode 100644 virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java rename {virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector => virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list}/HdfFileToVectorTest.java (88%) diff --git a/virtdata-lib-basics/pom.xml b/virtdata-lib-basics/pom.xml index c9d9c9a2b..1e67c931f 100644 --- a/virtdata-lib-basics/pom.xml +++ b/virtdata-lib-basics/pom.xml @@ -82,11 +82,6 @@ 5.1.1 test - - io.jhdf - jhdf - 0.6.10 - diff --git a/virtdata-lib-hdf5/pom.xml b/virtdata-lib-hdf5/pom.xml index cc4ae7530..2912a6c8f 100644 --- a/virtdata-lib-hdf5/pom.xml +++ b/virtdata-lib-hdf5/pom.xml @@ -39,6 +39,12 @@ ${revision} + + io.jhdf + jhdf + 0.6.10 + + @@ -56,8 +62,16 @@ ** + + h5ex_t_float.h5 + + + + src/test/resources + + h5ex_t_float.h5 + - diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java deleted file mode 100644 index 736add3d5..000000000 --- a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/Placeholder.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2023 nosqlbench - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.nosqlbench.virtdata.library.hdf5; - -public class Placeholder { - public static void main(String[] args) { - System.out.println("woo"); - } -} diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVector.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVector.java new file mode 100644 index 000000000..d304bf01a --- /dev/null +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/AbstractHdfFileToVector.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.hdf5.from_long; + +import io.jhdf.HdfFile; +import io.jhdf.api.Dataset; +import io.nosqlbench.api.content.NBIO; + +import java.nio.file.Paths; + +public abstract class AbstractHdfFileToVector { + protected final HdfFile hdfFile; + protected final Dataset dataset; + protected final int[] dims; + + public AbstractHdfFileToVector(String filename, String datasetName) { + //hdfFile = new HdfFile(NBIO.all().search(filename).first().get().asPath()); + hdfFile = new HdfFile(Paths.get(filename)); + //TODO: implement a function to get the dataset by name only without needing the full path + dataset = hdfFile.getDatasetByPath(datasetName); + dims = dataset.getDimensions(); + } + + protected Object getDataFrom(long l) { + long[] sliceOffset = new long[dims.length]; + sliceOffset[0] = (l % dims[0]); + int[] sliceDimensions = new int[dims.length]; + sliceDimensions[0] = 1; + // Do we want to give the option of reducing vector dimensions here? + sliceDimensions[1] = dims[1]; + return dataset.getData(sliceOffset, sliceDimensions); + } +} diff --git a/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToVectorArray.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToVectorArray.java new file mode 100644 index 000000000..1f908fbf4 --- /dev/null +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToVectorArray.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.hdf5.from_long.to_array; + +import io.nosqlbench.virtdata.api.annotations.Categories; +import io.nosqlbench.virtdata.api.annotations.Category; +import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; +import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVector; +import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator; +import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory; + +import java.util.function.LongFunction; + +/** + * This function reads a vector dataset from an HDF5 file. The dataset itself is not + * read into memory, only the metadata (the "dataset" Java Object). The lambda function + * reads a single vector from the dataset, based on the long input value. As currently + * written this class will only work for datasets with 2 dimensions where the 1st dimension + * specifies the number of vectors and the 2nd dimension specifies the number of elements in + * each vector. Only datatypes short, int, and float are supported at this time. + *

+ * This implementation is specific to returning an array of floats + */ +@ThreadSafeMapper +@Categories(Category.experimental) +public class HdfFileToVectorArray extends AbstractHdfFileToVector implements LongFunction { + private final EmbeddingGenerator embeddingGenerator; + + public HdfFileToVectorArray(String filename, String datasetName) { + super(filename, datasetName); + embeddingGenerator = EmbeddingGeneratorFactory.getGenerator(dataset.getJavaType().getSimpleName().toLowerCase()); + } + @Override + public float[] apply(long l) { + Object data = getDataFrom(l); + return embeddingGenerator.generateArrayEmbeddingFrom(data, dims); + } + +} diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorList.java similarity index 56% rename from virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java rename to virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorList.java index 7481f2c57..36d623043 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVector.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorList.java @@ -15,17 +15,15 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; +package io.nosqlbench.virtdata.library.hdf5.from_long.to_list; -import io.jhdf.HdfFile; -import io.jhdf.api.Dataset; import io.nosqlbench.virtdata.api.annotations.Categories; import io.nosqlbench.virtdata.api.annotations.Category; import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper; -import io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding.EmbeddingGenerator; -import io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding.EmbeddingGeneratorFactory; +import io.nosqlbench.virtdata.library.hdf5.from_long.AbstractHdfFileToVector; +import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGenerator; +import io.nosqlbench.virtdata.library.hdf5.helpers.EmbeddingGeneratorFactory; -import java.nio.file.Paths; import java.util.List; import java.util.function.LongFunction; @@ -36,33 +34,23 @@ import java.util.function.LongFunction; * written this class will only work for datasets with 2 dimensions where the 1st dimension * specifies the number of vectors and the 2nd dimension specifies the number of elements in * each vector. Only datatypes short, int, and float are supported at this time. + *

+ * This implementation is specific to returning a List of Floats, so as to work with the + * normalization functions e.g. NormalizeListVector and its variants. */ @ThreadSafeMapper @Categories(Category.experimental) -public class HdfFileToVector implements LongFunction> { - private final HdfFile hdfFile; - private final Dataset dataset; - private final int[] dims; +public class HdfFileToVectorList extends AbstractHdfFileToVector implements LongFunction> { private final EmbeddingGenerator embeddingGenerator; - public HdfFileToVector(String filename, String datasetName) { - hdfFile = new HdfFile(Paths.get(filename)); - //TODO: implement a function to get the dataset by name only without needing the full path - dataset = hdfFile.getDatasetByPath(datasetName); - dims = dataset.getDimensions(); + public HdfFileToVectorList(String filename, String datasetName) { + super(filename, datasetName); embeddingGenerator = EmbeddingGeneratorFactory.getGenerator(dataset.getJavaType().getSimpleName().toLowerCase()); } @Override public List apply(long l) { - long[] sliceOffset = new long[dims.length]; - sliceOffset[0] = (l % dims[0]); - int[] sliceDimensions = new int[dims.length]; - sliceDimensions[0] = 1; - // Do we want to give the option of reducing vector dimensions here? - sliceDimensions[1] = dims[1]; - Object data = dataset.getData(sliceOffset, sliceDimensions); - - return embeddingGenerator.generateEmbeddingFrom(data, dims); + Object data = getDataFrom(l); + return embeddingGenerator.generateListEmbeddingFrom(data, dims); } } diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/DoubleEmbeddingGenerator.java similarity index 70% rename from virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java rename to virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/DoubleEmbeddingGenerator.java index 7846d0cda..9c869307c 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/DoubleEmbeddingGenerator.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/DoubleEmbeddingGenerator.java @@ -15,14 +15,14 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; +package io.nosqlbench.virtdata.library.hdf5.helpers; import java.util.List; public class DoubleEmbeddingGenerator implements EmbeddingGenerator { @Override - public List generateEmbeddingFrom(Object o, int[] dims) { + public List generateListEmbeddingFrom(Object o, int[] dims) { // in this case o will always be double[1][x] double[] vector = ((double[][]) o)[0]; Float[] vector2 = new Float[vector.length]; @@ -32,4 +32,14 @@ public class DoubleEmbeddingGenerator implements EmbeddingGenerator { return List.of(vector2); } + @Override + public float[] generateArrayEmbeddingFrom(Object o, int[] dims) { + double[] vector = ((double[][]) o)[0]; + float[] vector2 = new float[vector.length]; + for (int i = 0; i < vector.length; i++) { + vector2[i] = (float) vector[i]; + } + return vector2; + } + } diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/EmbeddingGenerator.java similarity index 78% rename from virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java rename to virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/EmbeddingGenerator.java index 6bd94a12b..aed942cd7 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGenerator.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/EmbeddingGenerator.java @@ -15,10 +15,12 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; +package io.nosqlbench.virtdata.library.hdf5.helpers; import java.util.List; public interface EmbeddingGenerator { - List generateEmbeddingFrom(Object o, int[] dims); + List generateListEmbeddingFrom(Object o, int[] dims); + + float[] generateArrayEmbeddingFrom(Object o, int[] dims); } diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/EmbeddingGeneratorFactory.java similarity index 95% rename from virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java rename to virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/EmbeddingGeneratorFactory.java index 2e7b029b0..f717e3cde 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/EmbeddingGeneratorFactory.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/EmbeddingGeneratorFactory.java @@ -15,7 +15,7 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; +package io.nosqlbench.virtdata.library.hdf5.helpers; import java.util.HashMap; import java.util.Map; diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/FloatEmbeddingGenerator.java similarity index 79% rename from virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java rename to virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/FloatEmbeddingGenerator.java index c5676327b..ca2bb1dd1 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/FloatEmbeddingGenerator.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/FloatEmbeddingGenerator.java @@ -15,14 +15,14 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; +package io.nosqlbench.virtdata.library.hdf5.helpers; import java.util.List; public class FloatEmbeddingGenerator implements EmbeddingGenerator { @Override - public List generateEmbeddingFrom(Object o, int[] dims) { + public List generateListEmbeddingFrom(Object o, int[] dims) { // in this case o will always be float[1][x] float[] vector = ((float[][]) o)[0]; Float[] vector2 = new Float[vector.length]; @@ -32,4 +32,9 @@ public class FloatEmbeddingGenerator implements EmbeddingGenerator { return List.of(vector2); } + @Override + public float[] generateArrayEmbeddingFrom(Object o, int[] dims) { + return ((float[][]) o)[0]; + } + } diff --git a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/IntEmbeddingGenerator.java similarity index 69% rename from virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java rename to virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/IntEmbeddingGenerator.java index d3a88c0b8..bdbdc8ba8 100644 --- a/virtdata-lib-basics/src/main/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/embedding/IntEmbeddingGenerator.java +++ b/virtdata-lib-hdf5/src/main/java/io/nosqlbench/virtdata/library/hdf5/helpers/IntEmbeddingGenerator.java @@ -15,13 +15,13 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector.embedding; +package io.nosqlbench.virtdata.library.hdf5.helpers; import java.util.List; public class IntEmbeddingGenerator implements EmbeddingGenerator { @Override - public List generateEmbeddingFrom(Object o, int[] dims) { + public List generateListEmbeddingFrom(Object o, int[] dims) { // in this case o will always be int[1][x] int[] vector = ((int[][]) o)[0]; Float[] vector2 = new Float[vector.length]; @@ -30,4 +30,14 @@ public class IntEmbeddingGenerator implements EmbeddingGenerator { } return List.of(vector2); } + + @Override + public float[] generateArrayEmbeddingFrom(Object o, int[] dims) { + int[] vector = ((int[][]) o)[0]; + float[] vector2 = new float[vector.length]; + for (int i = 0; i < vector.length; i++) { + vector2[i] = (float) vector[i]; + } + return vector2; + } } diff --git a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java new file mode 100644 index 000000000..b3ae56e96 --- /dev/null +++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package io.nosqlbench.virtdata.library.hdf5.from_long.to_array; + +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.IOException; +import java.net.URL; + +import static com.datastax.oss.protocol.internal.ProtocolConstants.ErrorCode.READ_TIMEOUT; + +public class HdfFileToArrayTest { + private static final int CONNECT_TIMEOUT = 100; + + @Test + public void testHdfFileToVector() throws IOException { + final float[][] results = new float[][]{ + {0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f}, + {2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f}, + {4.0f,2.3333333f,2.8f,3.5714285f,4.4444447f,5.3636365f,6.3076925f}, + {6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f} + }; + + FileUtils.copyURLToFile( + new URL("https://support.hdfgroup.org/ftp/HDF5/examples/files/exbyapi/h5ex_t_float.h5"), + new File("h5ex_t_float.h5"), + CONNECT_TIMEOUT, + READ_TIMEOUT); + + HdfFileToVectorArray hdfFileToVector = new HdfFileToVectorArray( + "h5ex_t_float.h5", + "/DS1"); + + float[] read; + for (int i = 0; i < 4; i++) { + read = hdfFileToVector.apply(i); + for (int j = 0; j < 7; j++) { + assert (read[j] == results[i][j]); + } + } + + new File("h5ex_t_float.h5").delete(); + } +} diff --git a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java similarity index 88% rename from virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java rename to virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java index 264a42159..9fe377996 100644 --- a/virtdata-lib-basics/src/test/java/io/nosqlbench/virtdata/library/basics/shared/from_long/to_vector/HdfFileToVectorTest.java +++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java @@ -15,19 +15,17 @@ * */ -package io.nosqlbench.virtdata.library.basics.shared.from_long.to_vector; +package io.nosqlbench.virtdata.library.hdf5.from_long.to_list; import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.Test; import java.io.File; import java.io.IOException; -import java.net.MalformedURLException; import java.net.URL; import java.util.List; import static com.datastax.oss.protocol.internal.ProtocolConstants.ErrorCode.READ_TIMEOUT; -import static org.apache.logging.log4j.core.impl.ThrowableFormatOptions.FILE_NAME; public class HdfFileToVectorTest { private static final int CONNECT_TIMEOUT = 100; @@ -47,7 +45,7 @@ public class HdfFileToVectorTest { CONNECT_TIMEOUT, READ_TIMEOUT); - HdfFileToVector hdfFileToVector = new HdfFileToVector( + HdfFileToVectorList hdfFileToVector = new HdfFileToVectorList( "h5ex_t_float.h5", "/DS1"); From 5a9cf9dd58bca7767d089c3713ed6a17a08609a1 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Wed, 16 Aug 2023 09:05:17 -0400 Subject: [PATCH 50/52] fixing hdf5 tests to use local file --- virtdata-lib-hdf5/pom.xml | 26 ++++-------------- .../to_array/HdfFileToArrayTest.java | 20 ++------------ .../to_list/HdfFileToVectorTest.java | 19 ++----------- .../src/test/resources/h5ex_t_float.h5 | Bin 0 -> 2368 bytes 4 files changed, 9 insertions(+), 56 deletions(-) create mode 100644 virtdata-lib-hdf5/src/test/resources/h5ex_t_float.h5 diff --git a/virtdata-lib-hdf5/pom.xml b/virtdata-lib-hdf5/pom.xml index 2912a6c8f..6f0ebf6d2 100644 --- a/virtdata-lib-hdf5/pom.xml +++ b/virtdata-lib-hdf5/pom.xml @@ -48,31 +48,15 @@ - - - src/main/java - - **/*.md - **/*.yaml - **/*.txt - - - - src/main/resources - - ** - - - h5ex_t_float.h5 - - - + + src/test/resources h5ex_t_float.h5 - - + true + + diff --git a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java index b3ae56e96..5ec14883d 100644 --- a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java +++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_array/HdfFileToArrayTest.java @@ -17,20 +17,12 @@ package io.nosqlbench.virtdata.library.hdf5.from_long.to_array; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.Test; -import java.io.File; -import java.io.IOException; -import java.net.URL; - -import static com.datastax.oss.protocol.internal.ProtocolConstants.ErrorCode.READ_TIMEOUT; - public class HdfFileToArrayTest { - private static final int CONNECT_TIMEOUT = 100; @Test - public void testHdfFileToVector() throws IOException { + public void testHdfFileToVector() { final float[][] results = new float[][]{ {0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f}, {2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f}, @@ -38,14 +30,8 @@ public class HdfFileToArrayTest { {6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f} }; - FileUtils.copyURLToFile( - new URL("https://support.hdfgroup.org/ftp/HDF5/examples/files/exbyapi/h5ex_t_float.h5"), - new File("h5ex_t_float.h5"), - CONNECT_TIMEOUT, - READ_TIMEOUT); - HdfFileToVectorArray hdfFileToVector = new HdfFileToVectorArray( - "h5ex_t_float.h5", + "src/test/resources/h5ex_t_float.h5", "/DS1"); float[] read; @@ -55,7 +41,5 @@ public class HdfFileToArrayTest { assert (read[j] == results[i][j]); } } - - new File("h5ex_t_float.h5").delete(); } } diff --git a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java index 9fe377996..c2f4194cc 100644 --- a/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java +++ b/virtdata-lib-hdf5/src/test/java/io/nosqlbench/virtdata/library/hdf5/from_long/to_list/HdfFileToVectorTest.java @@ -17,21 +17,14 @@ package io.nosqlbench.virtdata.library.hdf5.from_long.to_list; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.Test; -import java.io.File; -import java.io.IOException; -import java.net.URL; import java.util.List; -import static com.datastax.oss.protocol.internal.ProtocolConstants.ErrorCode.READ_TIMEOUT; - public class HdfFileToVectorTest { - private static final int CONNECT_TIMEOUT = 100; @Test - public void testHdfFileToVector() throws IOException { + public void testHdfFileToVector() { final float[][] results = new float[][]{ {0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f}, {2.0f,1.6666666f,2.4f,3.2857144f,4.2222223f,5.181818f,6.1538463f}, @@ -39,14 +32,8 @@ public class HdfFileToVectorTest { {6.0f,3.0f,3.2f,3.857143f,4.6666665f,5.5454545f,6.4615383f} }; - FileUtils.copyURLToFile( - new URL("https://support.hdfgroup.org/ftp/HDF5/examples/files/exbyapi/h5ex_t_float.h5"), - new File("h5ex_t_float.h5"), - CONNECT_TIMEOUT, - READ_TIMEOUT); - HdfFileToVectorList hdfFileToVector = new HdfFileToVectorList( - "h5ex_t_float.h5", + "src/test/resources/h5ex_t_float.h5", "/DS1"); List read; @@ -56,7 +43,5 @@ public class HdfFileToVectorTest { assert (read.get(j) == results[i][j]); } } - - new File("h5ex_t_float.h5").delete(); } } diff --git a/virtdata-lib-hdf5/src/test/resources/h5ex_t_float.h5 b/virtdata-lib-hdf5/src/test/resources/h5ex_t_float.h5 new file mode 100644 index 0000000000000000000000000000000000000000..9c8cb981d838cbb655ead25873f111818a976edf GIT binary patch literal 2368 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@p0tZfr2#gPtPk=HQp>zk7Ucm%mFfxE31A_!q zTo7tLy1I}cS62q0N|^aD8mf)KfCa+hfC-G!BPs+uTpa^I9*%(e8kR~=K+_p4FkFHS z!Aw|s^ngi_Ni#CAfzvO9U|?Wn-~cn3n3%v;FauLIh#{z850n7`2L=;v29y8HU=C0Y zlJXg$?t~^|26l)vP!B{uGh+fYX+MCn`$UV5mTmr#C0q&))@$*Ly7hL6INa=1(4pj_ xt9WdQsKZ8KYao(DvzNnR)=UuKbl9GIdt2^pKB##@4*FmV#T>do*4>bF000RoZ^i%s literal 0 HcmV?d00001 From 6466299a688128c42be9b2a72de43222ae41bb8e Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 16 Aug 2023 16:18:10 +0000 Subject: [PATCH 51/52] fix: upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.507 to 1.12.513 Snyk has created this PR to upgrade com.amazonaws:aws-java-sdk-s3 from 1.12.507 to 1.12.513. See this package in Maven Repository: https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-s3/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/024e32ec-7f80-485c-b7bf-f69d45f933ce?utm_source=github&utm_medium=referral&page=upgrade-pr --- mvn-defaults/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index 9d1beb4e0..d8fe9a533 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -326,7 +326,7 @@ com.amazonaws aws-java-sdk-s3 - 1.12.507 + 1.12.513 com.elega9t From 5b9fff98e4c02eca70a48cda3bffdc2873e9282a Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 16 Aug 2023 19:13:00 +0000 Subject: [PATCH 52/52] fix: upgrade org.glassfish.jersey.media:jersey-media-json-jackson from 3.1.2 to 3.1.3 Snyk has created this PR to upgrade org.glassfish.jersey.media:jersey-media-json-jackson from 3.1.2 to 3.1.3. See this package in Maven Repository: https://mvnrepository.com/artifact/org.glassfish.jersey.media/jersey-media-json-jackson/ See this project in Snyk: https://app.snyk.io/org/nosqlbench/project/99d5f5ce-a2bd-4e05-9eff-86de84bd0b9e?utm_source=github&utm_medium=referral&page=upgrade-pr --- docsys/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docsys/pom.xml b/docsys/pom.xml index 0101bc59b..8b44d73d4 100644 --- a/docsys/pom.xml +++ b/docsys/pom.xml @@ -94,7 +94,7 @@ org.glassfish.jersey.media jersey-media-json-jackson - 3.1.2 + 3.1.3