diff --git a/mvn-defaults/pom.xml b/mvn-defaults/pom.xml index b2a6f0426..c66371b88 100644 --- a/mvn-defaults/pom.xml +++ b/mvn-defaults/pom.xml @@ -398,6 +398,22 @@ 2.23.1 + + io.weaviate + client + 4.8.2 + + + + com.azure + azure-search-documents + 11.7.0 + + + com.azure + azure-identity + 1.13.2 + diff --git a/nb-adapters/adapter-azure-aisearch/pom.xml b/nb-adapters/adapter-azure-aisearch/pom.xml new file mode 100644 index 000000000..da6a953ab --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/pom.xml @@ -0,0 +1,59 @@ + + + + 4.0.0 + + adapter-azure-aisearch + jar + + + mvn-defaults + io.nosqlbench + ${revision} + ../../mvn-defaults + + + ${project.artifactId} + + An nosqlbench adapter driver module for the Azure AI Search database. + + + + + io.nosqlbench + nb-annotations + ${revision} + compile + + + io.nosqlbench + adapters-api + ${revision} + compile + + + com.azure + azure-search-documents + + + com.azure + azure-identity + + + \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchAdapterUtils.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchAdapterUtils.java new file mode 100644 index 000000000..d281b7b94 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchAdapterUtils.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +import com.azure.search.documents.SearchDocument; +import com.azure.search.documents.util.SearchPagedIterable; + +public class AzureAISearchAdapterUtils { + + public static final String AZURE_AI_SEARCH = "azure_aisearch"; + + public static List splitNames(String input) { + assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input); + return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).toList(); +} + + public static List splitLongs(String input) { + assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input); + return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).map(Long::parseLong).toList(); + } + + /** + * Mask the numeric digits in the given string with '*'. + * + * @param unmasked The string to mask + * @return The masked string + */ + protected static String maskDigits(String unmasked) { + assert StringUtils.isNotBlank(unmasked) && StringUtils.isNotEmpty(unmasked); + int inputLength = unmasked.length(); + StringBuilder masked = new StringBuilder(inputLength); + for (char ch : unmasked.toCharArray()) { + if (Character.isDigit(ch)) { + masked.append("*"); + } else { + masked.append(ch); + } + } + return masked.toString(); + } + + public String[] responseFieldToStringArray(String fieldName, SearchPagedIterable response) { + return response.stream() + .map(searchResult -> searchResult.getDocument(SearchDocument.class).get(fieldName).toString()) + .toArray(String[]::new); + } +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchDriverAdapter.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchDriverAdapter.java new file mode 100644 index 000000000..0377bde16 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchDriverAdapter.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch; + + +import java.util.function.Function; + +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapters.api.activityimpl.OpMapper; +import io.nosqlbench.adapters.api.activityimpl.uniform.BaseDriverAdapter; +import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter; +import io.nosqlbench.nb.annotations.Service; +import io.nosqlbench.nb.api.components.core.NBComponent; +import io.nosqlbench.nb.api.config.standard.NBConfigModel; +import io.nosqlbench.nb.api.config.standard.NBConfiguration; +import io.nosqlbench.nb.api.labels.NBLabels; + +import static io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.AZURE_AI_SEARCH; + +@Service(value = DriverAdapter.class, selector = AZURE_AI_SEARCH) +public class AzureAISearchDriverAdapter extends BaseDriverAdapter, AzureAISearchSpace> { + + public AzureAISearchDriverAdapter(NBComponent parentComponent, NBLabels labels) { + super(parentComponent, labels); + } + + @Override + public OpMapper> getOpMapper() { + return new AzureAISearchOpMapper(this); + } + + @Override + public Function getSpaceInitializer(NBConfiguration cfg) { + return (s) -> new AzureAISearchSpace(s, cfg); + } + + @Override + public NBConfigModel getConfigModel() { + return super.getConfigModel().add(AzureAISearchSpace.getConfigModel()); + } + +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchDriverAdapterLoader.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchDriverAdapterLoader.java new file mode 100644 index 000000000..d259c0ffa --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchDriverAdapterLoader.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.azureaisearch; + +import static io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.AZURE_AI_SEARCH; + +import io.nosqlbench.adapter.diag.DriverAdapterLoader; +import io.nosqlbench.nb.annotations.Service; +import io.nosqlbench.nb.api.components.core.NBComponent; +import io.nosqlbench.nb.api.labels.NBLabels; + +@Service(value = DriverAdapterLoader.class, selector = AZURE_AI_SEARCH) +public class AzureAISearchDriverAdapterLoader implements DriverAdapterLoader { + @Override + public AzureAISearchDriverAdapter load(NBComponent parent, NBLabels childLabels) { + return new AzureAISearchDriverAdapter(parent, childLabels); + } +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchOpMapper.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchOpMapper.java new file mode 100644 index 000000000..5e80e70e3 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchOpMapper.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.azureaisearch; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchBaseOpDispenser; +import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchCreateOrUpdateIndexOpDispenser; +import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchDeleteIndexOpDispenser; +import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchListIndexesOpDispenser; +import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchSearchDocumentsOpDispenser; +import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchUploadDocumentsOpDispenser; +import io.nosqlbench.adapter.azureaisearch.types.AzureAISearchOpType; +import io.nosqlbench.adapters.api.activityimpl.OpDispenser; +import io.nosqlbench.adapters.api.activityimpl.OpMapper; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.nosqlbench.engine.api.templating.TypeAndTarget; + +public class AzureAISearchOpMapper implements OpMapper> { + private static final Logger logger = LogManager.getLogger(AzureAISearchOpMapper.class); + private final AzureAISearchDriverAdapter adapter; + + /** + * Create a new {@code AzureAISearchOpMapper} implementing the {@link OpMapper}. + * interface. + * + * @param adapter The associated {@link AzureAISearchDriverAdapter} + */ + public AzureAISearchOpMapper(AzureAISearchDriverAdapter adapter) { + this.adapter = adapter; + } + + /** + * Given an instance of a {@link ParsedOp} returns the appropriate + * {@link AzureAISearchBaseOpDispenser} subclass. + * + * @param op The {@link ParsedOp} to be evaluated + * @return The correct {@link AzureAISearchBaseOpDispenser} subclass based on + * the op type + */ + @Override + public OpDispenser> apply(ParsedOp op) { + TypeAndTarget typeAndTarget = op.getTypeAndTarget(AzureAISearchOpType.class, + String.class, "type", "target"); + logger.info(() -> "Using '" + typeAndTarget.enumId + "' op type for op template '" + op.getName() + "'"); + + return switch (typeAndTarget.enumId) { + case delete_index -> new AzureAISearchDeleteIndexOpDispenser(adapter, op, typeAndTarget.targetFunction); + case create_or_update_index -> + new AzureAISearchCreateOrUpdateIndexOpDispenser(adapter, op, typeAndTarget.targetFunction); + case list_indexes -> new AzureAISearchListIndexesOpDispenser(adapter, op, typeAndTarget.targetFunction); + case upload_documents -> new AzureAISearchUploadDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction); + case search_documents -> new AzureAISearchSearchDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction); + +// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " + +// "mapping parsed op " + op); + }; + } +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchSpace.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchSpace.java new file mode 100644 index 000000000..6a76e2f1c --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/AzureAISearchSpace.java @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.azureaisearch; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.credential.TokenCredential; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.search.documents.SearchServiceVersion; +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.indexes.SearchIndexClientBuilder; + +import io.nosqlbench.nb.api.config.standard.ConfigModel; +import io.nosqlbench.nb.api.config.standard.NBConfigModel; +import io.nosqlbench.nb.api.config.standard.NBConfiguration; +import io.nosqlbench.nb.api.config.standard.Param; + +/** + * The {@code AzureAISearchSpace} class is a context object which stores all + * stateful contextual information needed to interact with the + * {@code Azure AI Search} database instance. + * + * @see Troubleshooting + * guide + * @see AI + * Search quick start guide + * @see Azure + * AI Search Java searchIndexClient + */ +public class AzureAISearchSpace implements AutoCloseable { + private final static Logger logger = LogManager.getLogger(AzureAISearchSpace.class); + private final String name; + private final NBConfiguration cfg; + + protected SearchIndexClient searchIndexClient; +// protected SearchClient searchClient; + + /** + * Create a new {@code AzureAISearchSpace} Object which stores all stateful + * contextual information needed to interact with the Azure AI Search + * database instance. + * + * @param name The name of this space + * @param cfg The configuration ({@link NBConfiguration}) for this nb run + */ + public AzureAISearchSpace(String name, NBConfiguration cfg) { + this.name = name; + this.cfg = cfg; + } + + public synchronized SearchIndexClient getSearchIndexClient() { + if (searchIndexClient == null) { + searchIndexClient = createSearchClients(); + } + return searchIndexClient; + } + +// public synchronized SearchClient getSearchClient() { +// if (searchClient == null) { +// createSearchClients(); +// } +// return searchClient; +// } + + private SearchIndexClient createSearchClients() { + String uri = cfg.get("endpoint"); + var requiredToken = cfg.getOptional("token_file").map(Paths::get).map(tokenFilePath -> { + try { + return Files.readAllLines(tokenFilePath).getFirst(); + } catch (IOException e) { + String error = "Error while reading token from file:" + tokenFilePath; + logger.error(error, e); + throw new RuntimeException(e); + } + }).orElseGet(() -> cfg.getOptional("token").orElseThrow(() -> new RuntimeException( + "You must provide either a 'token_file' or a 'token' to configure a Azure AI Search client"))); + + logger.info("{}: Creating new Azure AI Search Client with (masked) token/key [{}], uri/endpoint [{}]", + this.name, AzureAISearchAdapterUtils.maskDigits(requiredToken), uri); + + var searchIndexClientBuilder = new SearchIndexClientBuilder().endpoint(uri); +// var searchClientBuilder = new SearchClientBuilder().endpoint(uri); + if (!requiredToken.isBlank()) { + searchIndexClientBuilder = searchIndexClientBuilder.credential(new AzureKeyCredential(requiredToken)); +// searchClientBuilder = searchClientBuilder.credential(new AzureKeyCredential(requiredToken)); + } else { + TokenCredential tokenCredential = new DefaultAzureCredentialBuilder().build(); + searchIndexClientBuilder = searchIndexClientBuilder.credential(tokenCredential); +// searchClientBuilder = searchClientBuilder.credential(tokenCredential); + } + // Should we leave these below to leverage the SearchServiceVersion.getLatest()? + String apiVersion = cfg.getOptional("api_version").orElse(SearchServiceVersion.V2024_07_01.name()); + logger.warn( + "Latest search service version supported by this client is '{}', but we're using '{}' version. Ignore this warning if both are same.", + SearchServiceVersion.getLatest(), apiVersion); + // TODO - try to find a way to get rid of placeholder +// this.searchClient = searchClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)) +// .indexName("PLACEHOLDER").buildClient(); + return searchIndexClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)).buildClient(); + } + + public static NBConfigModel getConfigModel() { + return ConfigModel.of(AzureAISearchSpace.class) + .add(Param.optional("token_file", String.class, "the file to load the api token/key from")) + .add(Param.defaultTo("token", "azure-aisearch-admin-key-changeme") + .setDescription("the Azure AI Search api token/key to use to connect to the database")) + .add(Param.defaultTo("endpoint", "localhost:8080").setDescription( + "the URI endpoint in which the database is running. Check out https://learn.microsoft.com/en-us/azure/search/search-create-service-portal.")) + .add(Param.optional("api_version", String.class, + "the api version to be used. Example 'V2024-07-01'. Defaults to latest service version supported by the SDK client version")) + .asReadOnly(); + } + + @Override + public void close() throws Exception { + searchIndexClient = null; + } +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchBaseOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchBaseOp.java new file mode 100644 index 000000000..55408cfea --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchBaseOp.java @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.azureaisearch.ops; + + +import java.util.function.LongFunction; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.azure.search.documents.indexes.SearchIndexClient; + +import io.nosqlbench.adapters.api.activityimpl.uniform.flowtypes.CycleOp; + +public abstract class AzureAISearchBaseOp implements CycleOp { + + protected final static Logger logger = LogManager.getLogger(AzureAISearchBaseOp.class); + + protected final SearchIndexClient searchIndexClient; +// protected final SearchClient searchClient; + protected final T request; + protected final LongFunction apiCall; + + public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam) { + this.searchIndexClient = searchIndexClient; + // TODO - figure out how to do this cleanly +// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER"); + this.request = requestParam; + this.apiCall = this::applyOp; + } + + public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam, LongFunction call) { + this.searchIndexClient = searchIndexClient; + // TODO - figure out how to do this cleanly +// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER"); + this.request = requestParam; + this.apiCall = call; + } + + @SuppressWarnings("unchecked") + @Override + public final Object apply(long value) { + logger.trace("applying op: " + this); + + try { + Object result = applyOp(value); + + return result; + } catch (Exception e) { + if (e instanceof RuntimeException rte) { + throw rte; + } else { + throw new RuntimeException(e); + } + } + }; + + public abstract Object applyOp(long value); + + @Override + public String toString() { + return "AzureAISearchBaseOp(" + this.request.getClass().getSimpleName() + ")"; + } +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchClientBaseOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchClientBaseOp.java new file mode 100644 index 000000000..0f0be2c8d --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchClientBaseOp.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.ops; + +import com.azure.search.documents.SearchClient; +import com.azure.search.documents.indexes.SearchIndexClient; + +public abstract class AzureAISearchClientBaseOp extends AzureAISearchBaseOp { + protected final SearchClient searchClient; + + public AzureAISearchClientBaseOp(SearchIndexClient searchIdxClient, SearchClient searchClnt, T requestParam) { + super(searchIdxClient, requestParam); + this.searchClient = searchClnt; + } +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchCreateOrUpdateIndexOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchCreateOrUpdateIndexOp.java new file mode 100644 index 000000000..e7c09e0f3 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchCreateOrUpdateIndexOp.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.ops; + +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.indexes.models.SearchIndex; + +public class AzureAISearchCreateOrUpdateIndexOp extends AzureAISearchBaseOp { + + public AzureAISearchCreateOrUpdateIndexOp(SearchIndexClient client, SearchIndex request) { + super(client, request); + } + + @Override + public Object applyOp(long value) { + SearchIndex createResponse = null; + try { + if (logger.isDebugEnabled()) { + request.getFields().forEach((field) -> { + logger.debug( + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}", + field.getName(), field.getVectorSearchProfileName(), field.getType().toString(), + field.getVectorSearchDimensions()); + }); + } + createResponse = searchIndexClient.createOrUpdateIndex(request); + logger.debug("Successfully created the collection with return code of {}", createResponse.toString()); + } catch (RuntimeException rte) { + throw rte; + } + return createResponse; + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchDeleteIndexOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchDeleteIndexOp.java new file mode 100644 index 000000000..c504a4014 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchDeleteIndexOp.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.ops; + +import com.azure.search.documents.indexes.SearchIndexClient; + +public class AzureAISearchDeleteIndexOp extends AzureAISearchBaseOp { + + public AzureAISearchDeleteIndexOp(SearchIndexClient client, String request) { + super(client, request); + } + + @Override + public Object applyOp(long value) { + try { + searchIndexClient.deleteIndex(request); + logger.debug("Successfully deleted the index: {}", request); + } catch (RuntimeException rte) { + throw rte; + } + return "Deleted"; + } + +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchListIndexesOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchListIndexesOp.java new file mode 100644 index 000000000..85676fead --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchListIndexesOp.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.ops; + +import com.azure.core.http.rest.PagedIterable; +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.indexes.models.SearchIndex; + +public class AzureAISearchListIndexesOp extends AzureAISearchBaseOp { + + public AzureAISearchListIndexesOp(SearchIndexClient client, String request) { + super(client, request); + } + + @Override + public Object applyOp(long value) { + try { + PagedIterable response = searchIndexClient.listIndexes(); + response.forEach((index) -> { + logger.info("Indexes available are: Name: {}, ETag: {}", index.getName(), index.getETag()); + index.getFields().forEach(field -> { + logger.info( + "Field Name: {}, Field isKey?: {}, Field Dimension: {}, Field Vector Search Profile: {}", + field.getName(), field.isKey(), field.getVectorSearchDimensions(), + field.getVectorSearchProfileName()); + }); + }); + } catch (RuntimeException rte) { + throw rte; + } + return "Listed indexes"; + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchSearchDocumentsOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchSearchDocumentsOp.java new file mode 100644 index 000000000..af2bd8343 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchSearchDocumentsOp.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.ops; + +import com.azure.core.util.Context; +import com.azure.search.documents.SearchClient; +import com.azure.search.documents.SearchDocument; +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.models.SearchOptions; +import com.azure.search.documents.util.SearchPagedIterable; + +public class AzureAISearchSearchDocumentsOp extends AzureAISearchClientBaseOp { + + public AzureAISearchSearchDocumentsOp(SearchIndexClient searchIndexClient, SearchClient searchClient, + SearchOptions request) { + super(searchIndexClient, searchClient, request); + } + + @Override + public Object applyOp(long value) { + SearchPagedIterable searchDocsResponse = null; + try { + searchDocsResponse = searchClient.search(null, // we've not implemented other complex searches yet here. + request, + Context.NONE); + if (logger.isInfoEnabled()) { + searchDocsResponse.forEach((r) -> { + SearchDocument doc = r.getDocument(SearchDocument.class); + logger.debug( + "Successfully searched the index and returned id: {}, score: {}, vector embedding: {}", + doc.get("id"), r.getScore(), doc.get("value")); + }); + } + } catch (RuntimeException rte) { + throw rte; + } + return searchDocsResponse; + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchUploadDocumentsOp.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchUploadDocumentsOp.java new file mode 100644 index 000000000..0b4bb59cd --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/ops/AzureAISearchUploadDocumentsOp.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.ops; + +import java.util.List; + +import com.azure.search.documents.SearchClient; +import com.azure.search.documents.SearchDocument; +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.models.IndexDocumentsResult; + +public class AzureAISearchUploadDocumentsOp extends AzureAISearchClientBaseOp { + + public AzureAISearchUploadDocumentsOp(SearchIndexClient searchIndexClient, SearchClient searchClient, + SearchDocument request) { + super(searchIndexClient, searchClient, request); + } + + @Override + public Object applyOp(long value) { + IndexDocumentsResult uploadDocsResponse = null; + try { +// request.getFields().forEach((field) -> { +// logger.info( +// ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>[AzureAISearchUploadDocumentsOp] SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}", +// field.getName(), field.getVectorSearchProfileName(), field.getType().toString(), +// field.getVectorSearchDimensions()); +// }); + uploadDocsResponse = searchClient.uploadDocuments(List.of(request)); + if (logger.isDebugEnabled()) { + uploadDocsResponse.getResults().forEach((r) -> { + logger.debug( + "Successfully created the collection with return status code: {}, key: {}, succeeded?: {}, error message: {}", + r.getStatusCode(), r.getKey(), r.isSucceeded(), r.getErrorMessage()); + }); + } + } catch (RuntimeException rte) { + throw rte; + } + return uploadDocsResponse; + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchBaseOpDispenser.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchBaseOpDispenser.java new file mode 100644 index 000000000..eeeb2ff6a --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchBaseOpDispenser.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.nosqlbench.adapter.azureaisearch.opsdispenser; + +import java.util.function.LongFunction; + +import com.azure.search.documents.indexes.SearchIndexClient; + +import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter; +import io.nosqlbench.adapter.azureaisearch.AzureAISearchSpace; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser; +import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter; +import io.nosqlbench.adapters.api.templating.ParsedOp; + +public abstract class AzureAISearchBaseOpDispenser + extends BaseOpDispenser, AzureAISearchSpace> { + + protected final LongFunction azureAISearchSpaceFunction; + protected final LongFunction clientFunction; + private final LongFunction> opF; + private final LongFunction paramF; + + @SuppressWarnings("rawtypes") + protected AzureAISearchBaseOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super((DriverAdapter) adapter, op); + this.azureAISearchSpaceFunction = adapter.getSpaceFunc(op); + this.clientFunction = (long l) -> { + try { + return this.azureAISearchSpaceFunction.apply(l).getSearchIndexClient(); + } catch (Exception e) { + e.printStackTrace(); + } + return null; + }; + this.paramF = getParamFunc(this.clientFunction, op, targetF); + this.opF = createOpFunc(paramF, this.clientFunction, op, targetF); + } + + protected AzureAISearchDriverAdapter getDriverAdapter() { + return (AzureAISearchDriverAdapter) adapter; + } + + public abstract LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF); + + public abstract LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF); + + @Override + public AzureAISearchBaseOp getOp(long value) { + return opF.apply(value); + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchCreateOrUpdateIndexOpDispenser.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchCreateOrUpdateIndexOpDispenser.java new file mode 100644 index 000000000..3c939e56b --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchCreateOrUpdateIndexOpDispenser.java @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.opsdispenser; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.LongFunction; + +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.indexes.models.BinaryQuantizationCompression; +import com.azure.search.documents.indexes.models.ExhaustiveKnnAlgorithmConfiguration; +import com.azure.search.documents.indexes.models.ExhaustiveKnnParameters; +import com.azure.search.documents.indexes.models.HnswAlgorithmConfiguration; +import com.azure.search.documents.indexes.models.HnswParameters; +import com.azure.search.documents.indexes.models.ScalarQuantizationCompression; +import com.azure.search.documents.indexes.models.ScalarQuantizationParameters; +import com.azure.search.documents.indexes.models.SearchField; +import com.azure.search.documents.indexes.models.SearchFieldDataType; +import com.azure.search.documents.indexes.models.SearchIndex; +import com.azure.search.documents.indexes.models.VectorSearch; +import com.azure.search.documents.indexes.models.VectorSearchAlgorithmConfiguration; +import com.azure.search.documents.indexes.models.VectorSearchAlgorithmMetric; +import com.azure.search.documents.indexes.models.VectorSearchCompression; +import com.azure.search.documents.indexes.models.VectorSearchCompressionTarget; +import com.azure.search.documents.indexes.models.VectorSearchProfile; + +import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchCreateOrUpdateIndexOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.nosqlbench.nb.api.errors.OpConfigError; + +/** + * @see API + * Reference + * @see Index + * docs + */ +public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBaseOpDispenser { + private SearchField searchField; + private VectorSearchProfile vsProfile; + + public AzureAISearchCreateOrUpdateIndexOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @SuppressWarnings("rawtypes") + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>getParamFunc"); + LongFunction ebF = l -> new SearchIndex(targetF.apply(l)); + + Optional> fieldsMapF = op.getAsOptionalFunction("fields", Map.class); + if (fieldsMapF.isPresent()) { + final LongFunction> searchFieldListF = buildFieldsStruct(op); + final LongFunction fieldsF = ebF; + ebF = l -> fieldsF.apply(l).setFields(searchFieldListF.apply(l)); + } + + Optional> vsearchMapF = op.getAsOptionalFunction("vectorSearch", Map.class); + if (vsearchMapF.isPresent()) { + final LongFunction vSearchF = buildVectorSearchStruct(op); + final LongFunction vsF = ebF; + ebF = l -> vsF.apply(l).setVectorSearch(vSearchF.apply(l)); + } + + final LongFunction lastF = ebF; + return l -> lastF.apply(l); + } + + @SuppressWarnings({ "unchecked", "rawtypes", "static-access" }) + private LongFunction buildVectorSearchStruct(ParsedOp op) { + Optional> baseFunc = op.getAsOptionalFunction("vectorSearch", Map.class); + return baseFunc.>map(mapLongFunc -> l -> { + Map vsMap = mapLongFunc.apply(l); + VectorSearch vectorSearch = new VectorSearch(); + vsMap.forEach((vsField, vsValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>vsField:{} vsValue:{}", + vsField, vsValue); + if (vsValue instanceof Map) { + ((Map) vsValue).forEach((innerKey, innerValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>innerKey:{} innerValue:{}", + innerKey, innerValue); + if ("compressions".equals(vsField)) { + List vsCompList = new ArrayList<>(); + String kind; + if (((Map) innerValue).containsKey("kind")) { + kind = (String) ((Map) innerValue).get("kind"); + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions>>>>kind:{}", + kind); + if (kind.equals("scalarQuantization")) { + ScalarQuantizationCompression sqComp = new ScalarQuantizationCompression(innerKey); + ((Map) innerValue).forEach((compressKey, compressValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}", + kind, compressKey, compressValue); + if (compressKey.equals("kind")) { + sqComp.getKind().fromString((String) compressValue); + } + if (compressKey.equals("rerankWithOriginalVectors")) { + sqComp.setRerankWithOriginalVectors((Boolean) compressValue); + } + if (compressKey.equals("defaultOversampling")) { + sqComp.setDefaultOversampling(((Number) compressValue).doubleValue()); + } + if (compressKey.equals("scalarQuantizationParameters")) { + ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters(); + ((Map) compressValue).forEach((sqKey, sqVal) -> { + if (sqKey.equals("quantizedDataType")) { + sqParams.setQuantizedDataType( + VectorSearchCompressionTarget.fromString((String) sqVal)); + } + }); + sqComp.setParameters(sqParams); + } + }); + vsCompList.add(sqComp); +// vsCompList.add(buildVectorSearchCompression(bqComp, compressKey, compressValue, true)); + } else { + // BinaryQuantization is assumed here + + BinaryQuantizationCompression bqComp = new BinaryQuantizationCompression(innerKey); + ((Map) innerValue).forEach((compressKey, compressValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}", + kind, compressKey, compressValue); + if (compressKey.equals("kind")) { + bqComp.getKind().fromString((String) compressValue); + } + if (compressKey.equals("rerankWithOriginalVectors")) { + bqComp.setRerankWithOriginalVectors((Boolean) compressValue); + } + if (compressKey.equals("defaultOversampling")) { + bqComp.setDefaultOversampling(((Number) compressValue).doubleValue()); + } + }); + vsCompList.add(bqComp); +// vsCompList.add( +// buildVectorSearchCompression(bqComp, compressKey, compressValue, false)); + } + } else { + VectorSearchCompression vsComp = new VectorSearchCompression(innerKey); + ((Map) innerValue).forEach((compressKey, compressValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}", + null, compressKey, compressValue); + if (compressKey.equals("kind")) { + vsComp.getKind().fromString((String) compressValue); + } + if (compressKey.equals("rerankWithOriginalVectors")) { + vsComp.setRerankWithOriginalVectors((Boolean) compressValue); + } + if (compressKey.equals("defaultOversampling")) { + vsComp.setDefaultOversampling(((Number) compressValue).doubleValue()); + } + }); + vsCompList.add(vsComp); + } + vectorSearch.setCompressions(vsCompList); + vectorSearch.getCompressions().forEach((comp) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions FINAL: Name:{}", + comp.getCompressionName()); + }); + } + if ("algorithms".equals(vsField)) { + List vsAlgoList = new ArrayList<>(); + String kind; + if (((Map) innerValue).containsKey("kind")) { + kind = (String) ((Map) innerValue).get("kind"); + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{}", + kind); + if("hnsw".equals(kind)) { + HnswAlgorithmConfiguration hnswAlgoConf = new HnswAlgorithmConfiguration(innerKey); + ((Map) innerValue).forEach((hnswKey, hnswValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{}", + kind, hnswKey, hnswValue); + if ("hnswParameters".equals(hnswKey)) { + ((Map) innerValue) + .forEach((hnswParamsKey, hnswParamsValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{} hnswParamsKey:{} hnswParamsValue:{}", + kind, hnswKey, hnswValue, hnswParamsKey, + hnswParamsValue); + HnswParameters hnswParams = new HnswParameters(); + if ("m".equals(hnswParamsKey)) { + hnswParams.setM(((Number) hnswParamsValue).intValue()); + } + if ("efConstruction".equals(hnswParamsKey)) { + hnswParams.setEfConstruction( + ((Number) hnswParamsValue).intValue()); + } + if ("efSearch".equals(hnswParamsKey)) { + hnswParams + .setEfSearch(((Number) hnswParamsValue).intValue()); + } + if ("metric".equals(hnswParamsKey)) { + hnswParams.setMetric(VectorSearchAlgorithmMetric + .fromString((String) hnswParamsValue)); + } + hnswAlgoConf.setParameters(hnswParams); + }); + } + }); + vsAlgoList.add(hnswAlgoConf); + } + if ("exhaustiveKnn".equals(kind)) { + ExhaustiveKnnAlgorithmConfiguration exhausKnnAlgoConf = new ExhaustiveKnnAlgorithmConfiguration( + innerKey); + ((Map) innerValue).forEach((algoKey, algoValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{}", + kind, algoKey, algoValue); + if (algoKey.equals("exhaustiveKnnParameters")) { + ExhaustiveKnnParameters eKnnParms = new ExhaustiveKnnParameters(); + ((Map) algoValue).forEach((ekpKey, ekpVal) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{} ekpKey:{} ekpVal:{}", + kind, algoKey, algoValue, ekpKey, ekpVal); + if (ekpKey.equals("quantizedDataType")) { + eKnnParms.setMetric( + VectorSearchAlgorithmMetric.fromString((String) ekpVal)); + } + }); + exhausKnnAlgoConf.setParameters(eKnnParms); + } + }); + vsAlgoList.add(exhausKnnAlgoConf); + } + } + vectorSearch.setAlgorithms(vsAlgoList); + vectorSearch.getAlgorithms().forEach((algo) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms FINAL: Name:{}", + algo.getName()); + }); + } + if ("profiles".equals(vsField)) { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles"); + List vsProfileList = new ArrayList<>(); + // VectorSearchProfile vsProfile = new VectorSearchProfile(innerKey, null); + ((Map) vsValue).forEach((profKey, profVal) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{}", + profKey, profVal); + ((Map) profVal).forEach((pK, pV) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{} pK:{} pV:{}", + profKey, profVal, pK, pV); + if ("algorithm".equals(pK)) { + vsProfile = new VectorSearchProfile(profKey, (String) pV); + } + if ("compression".equals(pK)) { + vsProfile.setCompressionName((String) pV); + } + + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: Name:{}>>>AlgoName:{}>>>CompressionName:{}", + vsProfile.getName(), vsProfile.getAlgorithmConfigurationName(), + vsProfile.getCompressionName()); + }); + vsProfileList.add(vsProfile); + }); + vectorSearch.setProfiles(vsProfileList); + vectorSearch.getProfiles().forEach((profile) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles FINAL: Name:{} AlgorithmConfName:{}", + profile.getName(), profile.getAlgorithmConfigurationName()); + }); + } + }); + } else { + throw new OpConfigError( + "Vector Search properties must be a Map>, but got " + + vsValue.getClass().getSimpleName() + " instead for the inner value"); + } + }); + return vectorSearch; + }).orElse(null); + } + + @SuppressWarnings({ "unchecked", "static-access" }) + private VectorSearchCompression buildVectorSearchCompression(VectorSearchCompression vsComp, String key, Object val, + boolean isSQ) { + if (key.equals("kind")) { + vsComp.getKind().fromString((String) val); + } + if (key.equals("rerankWithOriginalVectors")) { + vsComp.setRerankWithOriginalVectors((Boolean) val); + } + if (key.equals("defaultOversampling")) { + vsComp.setDefaultOversampling(((Number) val).doubleValue()); + } + if (isSQ) { + if (key.equals("scalarQuantizationParameters")) { + ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters(); + ((Map) val).forEach((sqKey, sqVal) -> { + if (sqKey.equals("quantizedDataType")) { + sqParams.setQuantizedDataType(VectorSearchCompressionTarget.fromString((String) sqVal)); + } + }); + ((ScalarQuantizationCompression) vsComp).setParameters(sqParams); + } + } + return vsComp; + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private LongFunction> buildFieldsStruct(ParsedOp op) { + logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct"); + Optional> baseFunc = op.getAsOptionalFunction("fields", Map.class); + return baseFunc.>>map(mapLongFunc -> l -> { + Map fMap = mapLongFunc.apply(l); + List fieldsList = new ArrayList<>(); + fMap.forEach((fName, fValue) -> { + if (fValue instanceof Map) { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{}", + fName, fValue); + ((Map) fValue).forEach((innerKey, innerValue) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{} fName:{} fValue:{}", + fName, fValue, innerKey, innerValue); + if (innerKey.equals("type")) { + searchField = new SearchField(fName, SearchFieldDataType.fromString((String) innerValue)); + } + if (innerKey.equals("key")) { + searchField.setKey((Boolean) innerValue); + } + if (innerKey.equals("dimensions")) { + searchField.setVectorSearchDimensions(((Number) innerValue).intValue()); + } + if (innerKey.equals("vectorSearchProfile")) { + searchField.setVectorSearchProfileName((String) innerValue); + logger.debug("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {} %n", + searchField.getVectorSearchProfileName()); + } + if (innerKey.equals("filterable")) { + searchField.setFilterable((Boolean) innerValue); + } + if (innerKey.equals("sortable")) { + searchField.setSortable(((Boolean) innerValue)); + } + if (innerKey.equals("searchable")) { + searchField.setSearchable((Boolean) innerValue); + } + if (innerKey.equals("facetable")) { + searchField.setFacetable((Boolean) innerValue); + } + if (innerKey.equals("retrievable")) { + // For now we're ignoring this as this is internally set to 'hidden' property's + // value by the searchIndexClient + } + if (innerKey.equals("hidden")) { + searchField.setHidden((Boolean) innerValue); + } + }); + } else { + throw new OpConfigError( + "Fields properties must be a Map>, but got " + + fValue.getClass().getSimpleName() + " instead for the inner value"); + } + fieldsList.add(searchField); + if (logger.isDebugEnabled()) { + fieldsList.forEach((field) -> { + logger.debug( + ">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>> fields FINAL: Name:{} VSProfileName:{}", + field.getName(), field.getVectorSearchProfileName()); + }); + } + }); + return fieldsList; + }).orElse(null); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new AzureAISearchCreateOrUpdateIndexOp(clientF.apply(l), paramF.apply(l)); + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchDeleteIndexOpDispenser.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchDeleteIndexOpDispenser.java new file mode 100644 index 000000000..d1bfda555 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchDeleteIndexOpDispenser.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.opsdispenser; + +import java.util.function.LongFunction; + +import com.azure.search.documents.indexes.SearchIndexClient; + +import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchDeleteIndexOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; + +/** + * Delete an Azure AI Search index. + * + * @see Delete + * Index docs. + * @see REST + * API. + */ +public class AzureAISearchDeleteIndexOpDispenser extends AzureAISearchBaseOpDispenser { + + public AzureAISearchDeleteIndexOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + return l -> targetF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new AzureAISearchDeleteIndexOp(clientF.apply(l), paramF.apply(l)); + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchListIndexesOpDispenser.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchListIndexesOpDispenser.java new file mode 100644 index 000000000..eecee9bdf --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchListIndexesOpDispenser.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.opsdispenser; + +import java.util.function.LongFunction; + +import com.azure.search.documents.indexes.SearchIndexClient; + +import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchListIndexesOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; + +public class AzureAISearchListIndexesOpDispenser extends AzureAISearchBaseOpDispenser { + public AzureAISearchListIndexesOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + return l -> targetF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new AzureAISearchListIndexesOp(clientF.apply(l), paramF.apply(l)); + } +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchSearchDocumentsOpDispenser.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchSearchDocumentsOpDispenser.java new file mode 100644 index 000000000..f7785508f --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchSearchDocumentsOpDispenser.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.opsdispenser; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.LongFunction; + +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.models.SearchOptions; +import com.azure.search.documents.models.VectorQuery; +import com.azure.search.documents.models.VectorSearchOptions; +import com.azure.search.documents.models.VectorizedQuery; + +import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchSearchDocumentsOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.nosqlbench.nb.api.errors.OpConfigError; + +/** + * @see https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery + * @see https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request + */ +public class AzureAISearchSearchDocumentsOpDispenser extends AzureAISearchBaseOpDispenser { + public AzureAISearchSearchDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @SuppressWarnings("rawtypes") + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + LongFunction ebF = l -> new SearchOptions(); + + Optional> countFunc = op.getAsOptionalFunction("count", Boolean.class); + if (countFunc.isPresent()) { + final LongFunction countLF = ebF; + ebF = l -> countLF.apply(l).setIncludeTotalCount(countFunc.get().apply(l)); + } + + LongFunction selectFunc = op.getAsRequiredFunction("select", String.class); + final LongFunction selectLF = ebF; + ebF = l -> selectLF.apply(l).setSelect(selectFunc.apply(l)); + + final LongFunction vqLF = ebF; + ebF = l -> vqLF.apply(l).setVectorSearchOptions(buildVectorSearchOptionsStruct(op).apply(l)); + + final LongFunction lastF = ebF; + return l -> lastF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new AzureAISearchSearchDocumentsOp(clientF.apply(l), + clientF.apply(l).getSearchClient(targetF.apply(l)), paramF.apply(l)); + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + private LongFunction buildVectorSearchOptionsStruct(ParsedOp op) { + if (!op.isDefined("vectorQueries")) { + throw new OpConfigError("Must provide values for 'vectorQueries' in 'search_documents' op"); + } + Optional> baseFunc = op.getAsOptionalFunction("vectorQueries", Map.class); + return baseFunc.>map(mapLongFunc -> l -> { + Map vsoMap = mapLongFunc.apply(l); + VectorSearchOptions vsOpts = new VectorSearchOptions(); + if (!vsoMap.containsKey("vector")) { + throw new OpConfigError( + "Must provide list of float values for 'vector' field within 'vectorQueries' of 'search_documents' op"); + } + VectorQuery vectorizableQuery = new VectorizedQuery((List) vsoMap.get("vector")); + if (vsoMap.containsKey("exhaustive")) + vectorizableQuery.setExhaustive((Boolean) vsoMap.get("exhaustive")); + if (vsoMap.containsKey("fields")) + vectorizableQuery.setFields(new String[] { (String) vsoMap.get("fields") }); + if (vsoMap.containsKey("weight")) + vectorizableQuery.setWeight(((Number) vsoMap.get("weight")).floatValue()); + if (vsoMap.containsKey("k")) + vectorizableQuery.setKNearestNeighborsCount(((Number) vsoMap.get("k")).intValue()); + vsOpts.setQueries(vectorizableQuery); + return vsOpts; + }).orElse(null); + } +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchUploadDocumentsOpDispenser.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchUploadDocumentsOpDispenser.java new file mode 100644 index 000000000..fa11d5fc8 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/opsdispenser/AzureAISearchUploadDocumentsOpDispenser.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.azureaisearch.opsdispenser; + +import java.util.Map; +import java.util.function.LongFunction; + +import com.azure.search.documents.SearchDocument; +import com.azure.search.documents.indexes.SearchIndexClient; + +import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp; +import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchUploadDocumentsOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; + +/** + * @see API + * Reference + * @see Index + * docs + */ +public class AzureAISearchUploadDocumentsOpDispenser extends AzureAISearchBaseOpDispenser { + public AzureAISearchUploadDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @SuppressWarnings("rawtypes") + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + LongFunction ebF = l -> new SearchDocument(); + + LongFunction fieldsMapF = op.getAsRequiredFunction("fields", Map.class); + final LongFunction fieldF = buildFieldsStruct(fieldsMapF); + ebF = l -> fieldF.apply(l); + + final LongFunction lastF = ebF; + return l -> lastF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new AzureAISearchUploadDocumentsOp(clientF.apply(l), + clientF.apply(l).getSearchClient(targetF.apply(l)), paramF.apply(l)); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private LongFunction buildFieldsStruct(LongFunction fieldsFunction) { + return l -> { + Map fields = fieldsFunction.apply(l); + var doc = new SearchDocument(); + fields.forEach((key, val) -> { + doc.put(key, val); + }); + return doc; + }; + } + +} diff --git a/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/types/AzureAISearchOpType.java b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/types/AzureAISearchOpType.java new file mode 100644 index 000000000..d610e69dc --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/java/io/nosqlbench/adapter/azureaisearch/types/AzureAISearchOpType.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.azureaisearch.types; + +public enum AzureAISearchOpType { + create_or_update_index, + delete_index, + list_indexes, + upload_documents, + search_documents, +} \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/resources/activities/azure_aisearch_vectors_live.yaml b/nb-adapters/adapter-azure-aisearch/src/main/resources/activities/azure_aisearch_vectors_live.yaml new file mode 100644 index 000000000..3df79fab6 --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/resources/activities/azure_aisearch_vectors_live.yaml @@ -0,0 +1,174 @@ +min_version: 5.21 +description: | + This is a template for live vector search testing. + Template Variables: + + schema: Install the schema required to run the test + rampup: Measure how long it takes to load a set of embeddings + search: Measure how the system responds to queries while it + is indexing recently ingested data. + search: Run vector search with a set of default (or overridden) parameters + In all of these phases, it is important to instance the metrics with distinct names. + Also, aggregates of recall should include total aggregate as well as a moving average. + TEMPLATE(token_file) + TEMPLATE(token) + +scenarios: + azure_aisearch_vectors: + delete_index: >- + run tags==block:delete_index + errors===stop + cycles===UNDEF threads===UNDEF + endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file) + + create_or_update_index: >- + run tags==block:create_or_update_index + errors===stop + cycles===UNDEF threads===UNDEF + endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file) + + list_indexes: >- + run tags==block:list_indexes + errors===stop + cycles===UNDEF threads===UNDEF + endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file) + + upload_documents: >- + run tags==block:upload_documents + errors===warn,counter + cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO) + token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost) + + search_documents: >- + run tags==block:search_documents + errors===warn,counter + cycles===TEMPLATE(testann_cycles,TEMPLATE(testsize,1000)) threads===TEMPLATE(testann_threads,AUTO) + endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file) + +params: + driver: azure_aisearch + instrument: true + +bindings: + id_val: Identity(); + id_val_uuid: ToHashedUUID() -> java.util.UUID + row_key: ToString() + row_key_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),long->ToString()); + # filetype=hdf5 for TEMPLATE(filetype,hdf5) + test_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/test"); + relevant_indices_hdf5: HdfFileToIntArray("local/testdata/TEMPLATE(dataset).hdf5", "/neighbors") + distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/distance") + # TODO - remove the 'local' keyword in path + train_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/train"); + train_floatlist_hdf5_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/train")); + # filetype=fvec for TEMPLATE(filetype,fvec) + test_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_query_vectors.fvec"); + relevant_indices_fvec: IVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_indices_query.ivec"); + distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0); + train_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0); + train_floatlist_fvec_batch: Mul(TEMPLATE(batch_size,10)L); ListSizedStepped(TEMPLATE(batch_size),FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0)); + +############################################## +# NOTE: An Azure AI Search index name must start and end with alphanumeric characters and contain only lowercase letters, digits or dashes. +############################################## + +blocks: + delete_index: + ops: + # https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/delete?view=rest-searchservice-2024-07-01&tabs=HTTP + delete_idx_op: + delete_index: "TEMPLATE(collection)" + + create_or_update_index: + ops: + # https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/create-or-update?view=rest-searchservice-2024-07-01&tabs=HTTP + create_or_update_index_op: + create_or_update_index: "TEMPLATE(collection)" + fields: + id: + type: "Edm.String" # Data types - https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types#edm-data-types-for-vector-fields + key: true + filterable: true + sortable: true + searchable: true + facetable: false + retrievable: true + hidden: false + value: + type: "Collection(Edm.Single)" + dimensions: TEMPLATE(dimensions) + vectorSearchProfile: "vector-profile-hnsw-scalar-1" + hidden: false + searchable: true + retrievable: true + filterable: false + sortable: false + facetable: false + vectorSearch: + compressions: + scalar-quantization-1: + kind: "scalarQuantization" + rerankWithOriginalVectors: true + defaultOversampling: 1 + scalarQuantizationParameters: + quantizedDataType: "int8" + algorithms: + hnsw-sq-1: + kind: "hnsw" # or "exhaustiveKnn" + hnswParameters: + m: 32 + efConstruction: 100 + efSearch: 100 + metric: "TEMPLATE(similarity_function)" + #exhaustiveKnnParameters: + #metric: "TEMPLATE(similarity_function)" + profiles: + vector-profile-hnsw-scalar-1: + algorithm: "hnsw-sq-1" + compression: "scalar-quantization-1" + + list_indexes: + ops: + # https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/list?view=rest-searchservice-2024-07-01&tabs=HTTP + list_indexes_op: + list_indexes: "DUMMY_PLACEHOLDER" + + upload_documents: + ops: + upload_documents_op: + upload_documents: "TEMPLATE(collection)" + fields: + id: "{row_key}" + value: "{train_floatlist_TEMPLATE(filetype)}" + + search_documents: + ops: + search_documents_op: + search_documents: "TEMPLATE(collection)" + count: false + select: "id, value" + vectorQueries: + kind: "vector" + vector: "{test_floatlist_TEMPLATE(filetype)}" + exhaustive: false + fields: "value" + weight: 1.0 + k: TEMPLATE(select_limit,100) + verifier-init: | + relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op); + for (int k in List.of(100)) { + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k)); + } + verifier: | + // driver-specific function + actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.responseFieldToStringArray("id",result) + System.out.println("actual_indices ------>>>>: " + actual_indices); + // driver-agnostic function + relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices); + // because we are "verifying" although this needs to be reorganized + return true; + \ No newline at end of file diff --git a/nb-adapters/adapter-azure-aisearch/src/main/resources/azure-ai-search.md b/nb-adapters/adapter-azure-aisearch/src/main/resources/azure-ai-search.md new file mode 100644 index 000000000..e5ac8720a --- /dev/null +++ b/nb-adapters/adapter-azure-aisearch/src/main/resources/azure-ai-search.md @@ -0,0 +1,67 @@ +# Azure AI Search driver adapter +The Azure AI Search driver adapter is a NoSQLBench adapter for the `azure-aisearch` driver, a Java driver +for connecting to and performing operations on an instance of a Azure AI Search vector database. The driver is +leveraged from GitHub at https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/search/azure-search-documents/. + +## Run Commands (Remove prior to merge) + +### Create Collection Schema +``` +java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.rampup dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### Delete Collection +``` +java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.delete_index dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 similarity_function=cosine azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azure_aisearch,instance:vectors,vendor:azure_aisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### List Indexes +``` +java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.list_indexes dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### Upload Documents +``` +java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.upload_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### Search Documents +``` +java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.search_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + + +## Activity Parameters + +The following parameters must be supplied to the adapter at runtime in order to successfully connect to an +instance of the [Azure AI Search database](https://learn.microsoft.com/en-us/rest/api/searchservice/?view=rest-searchservice-2024-07-01): + +* `token` - In order to use the Weaviate database you must have an account. Once the account is created you can [request + an api key/token](https://weaviate.io/developers/wcs/quickstart#explore-the-details-panel). This key will need to be + provided any time a database connection is desired. Alternatively, the api key can be stored in a file securely and + referenced via the `token_file` config option pointing to the path of the file. +* `endpoint` - When a collection/index is created in the database the URI (aka endpoint) must be specified as well. The adapter will + use the default value of `localhost:8080` if none is provided at runtime. +* `api_version` - the api version to be used by the search client. Defaults to the latest service/api version supported + by the version of client SDK. + +## Op Templates + +The Azure AI Search adapter supports [**all basic operations**](../java/io/nosqlbench/adapter/azure-aisearch/ops) supported by the [Java +client SDK published by Azure AI Search](https://github.com/weaviate/java-client). The official Azure AI Search API reference can be +found at https://learn.microsoft.com/en-us/rest/api/searchservice/operation-groups?view=rest-searchservice-2024-07-01. + +The operations include a full-fledged support for key APIs available in the Java SDK client. +The following are a couple high level API operations. + +* Create or Update Index +* Delete Index +* List Indexes +* Upload Documents (vectors) +* (Vector) Search Documents (vectors) + +## Examples + +Check out the [full example workload available here](./activities/azure_aisearch_vectors_live.yaml). + +--- diff --git a/nb-adapters/adapter-milvus/pom.xml b/nb-adapters/adapter-milvus/pom.xml index 0189735b1..001a6e4fe 100644 --- a/nb-adapters/adapter-milvus/pom.xml +++ b/nb-adapters/adapter-milvus/pom.xml @@ -55,7 +55,7 @@ io.milvus milvus-sdk-java - 2.4.1 + 2.3.5 diff --git a/nb-adapters/adapter-milvus/src/main/java/io/nosqlbench/adapter/milvus/opdispensers/MilvusOpUtils.java b/nb-adapters/adapter-milvus/src/main/java/io/nosqlbench/adapter/milvus/opdispensers/MilvusOpUtils.java index 2bdeac342..7039923af 100644 --- a/nb-adapters/adapter-milvus/src/main/java/io/nosqlbench/adapter/milvus/opdispensers/MilvusOpUtils.java +++ b/nb-adapters/adapter-milvus/src/main/java/io/nosqlbench/adapter/milvus/opdispensers/MilvusOpUtils.java @@ -16,17 +16,18 @@ package io.nosqlbench.adapter.milvus.opdispensers; -import com.alibaba.fastjson.JSONObject; -import io.milvus.param.dml.InsertParam; -import io.nosqlbench.adapters.api.templating.ParsedOp; -import io.nosqlbench.nb.api.errors.OpConfigError; - import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.LongFunction; +import com.alibaba.fastjson.JSONObject; + +import io.milvus.param.dml.InsertParam; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.nosqlbench.nb.api.errors.OpConfigError; + public class MilvusOpUtils { public static Optional>> getHighLevelRowsFunction(ParsedOp op, String opfield) { @@ -69,8 +70,6 @@ public class MilvusOpUtils { ParsedOp valueTemplate = op.getAsSubOp(opfield, ParsedOp.SubOpNaming.SubKey); Map testFieldsValues = valueTemplate.apply(0L); - List> fieldsF = new ArrayList<>(testFieldsValues.size()); - for (String fieldName : testFieldsValues.keySet()) { Object testFieldValue = testFieldsValues.get(fieldName); if (!(testFieldValue instanceof List list)) { diff --git a/nb-adapters/adapter-qdrant/src/main/java/io/nosqlbench/adapter/qdrant/opdispensers/QdrantCreateCollectionOpDispenser.java b/nb-adapters/adapter-qdrant/src/main/java/io/nosqlbench/adapter/qdrant/opdispensers/QdrantCreateCollectionOpDispenser.java index e9d162ed9..0f0805785 100644 --- a/nb-adapters/adapter-qdrant/src/main/java/io/nosqlbench/adapter/qdrant/opdispensers/QdrantCreateCollectionOpDispenser.java +++ b/nb-adapters/adapter-qdrant/src/main/java/io/nosqlbench/adapter/qdrant/opdispensers/QdrantCreateCollectionOpDispenser.java @@ -16,20 +16,37 @@ package io.nosqlbench.adapter.qdrant.opdispensers; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.function.LongFunction; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import io.nosqlbench.adapter.qdrant.QdrantDriverAdapter; import io.nosqlbench.adapter.qdrant.ops.QdrantBaseOp; import io.nosqlbench.adapter.qdrant.ops.QdrantCreateCollectionOp; import io.nosqlbench.adapters.api.templating.ParsedOp; import io.nosqlbench.nb.api.errors.OpConfigError; import io.qdrant.client.QdrantClient; -import io.qdrant.client.grpc.Collections.*; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.function.LongFunction; +import io.qdrant.client.grpc.Collections.BinaryQuantization; +import io.qdrant.client.grpc.Collections.CompressionRatio; +import io.qdrant.client.grpc.Collections.CreateCollection; +import io.qdrant.client.grpc.Collections.HnswConfigDiff; +import io.qdrant.client.grpc.Collections.OptimizersConfigDiff; +import io.qdrant.client.grpc.Collections.ProductQuantization; +import io.qdrant.client.grpc.Collections.QuantizationConfig; +import io.qdrant.client.grpc.Collections.QuantizationType; +import io.qdrant.client.grpc.Collections.ScalarQuantization; +import io.qdrant.client.grpc.Collections.ShardingMethod; +import io.qdrant.client.grpc.Collections.SparseIndexConfig; +import io.qdrant.client.grpc.Collections.SparseVectorConfig; +import io.qdrant.client.grpc.Collections.SparseVectorParams; +import io.qdrant.client.grpc.Collections.VectorParams; +import io.qdrant.client.grpc.Collections.VectorParamsMap; +import io.qdrant.client.grpc.Collections.VectorsConfig; +import io.qdrant.client.grpc.Collections.WalConfigDiff; public class QdrantCreateCollectionOpDispenser extends QdrantBaseOpDispenser { private static final Logger logger = LogManager.getLogger(QdrantCreateCollectionOpDispenser.class); @@ -275,43 +292,6 @@ public class QdrantCreateCollectionOpDispenser extends QdrantBaseOpDispenserHNSW Config - */ - @Deprecated - private HnswConfigDiff buildHnswConfigDiff(ParsedOp fieldSpec) { - HnswConfigDiff.Builder hnswConfigBuilder = HnswConfigDiff.newBuilder(); - fieldSpec.getOptionalStaticValue("hnsw_config", Map.class).ifPresent(hnswConfigData -> { - if (hnswConfigData.isEmpty()) { - return; - } else { - if (hnswConfigData.containsKey("ef_construct")) { - hnswConfigBuilder.setEfConstruct(((Number) hnswConfigData.get("ef_construct")).longValue()); - } - if (hnswConfigData.containsKey("m")) { - hnswConfigBuilder.setM(((Number) hnswConfigData.get("m")).intValue()); - } - if (hnswConfigData.containsKey("full_scan_threshold")) { - hnswConfigBuilder.setFullScanThreshold(((Number) hnswConfigData.get("full_scan_threshold")).intValue()); - } - if (hnswConfigData.containsKey("max_indexing_threads")) { - hnswConfigBuilder.setMaxIndexingThreads(((Number) hnswConfigData.get("max_indexing_threads")).intValue()); - } - if (hnswConfigData.containsKey("on_disk")) { - hnswConfigBuilder.setOnDisk((Boolean) hnswConfigData.get("on_disk")); - } - if (hnswConfigData.containsKey("payload_m")) { - hnswConfigBuilder.setPayloadM(((Number) hnswConfigData.get("payload_m")).intValue()); - } - } - }); - return hnswConfigBuilder.build(); - } - private LongFunction buildHnswConfigDiff(LongFunction hnswConfigDiffMapLongFunc) { return l -> this.buildHnswConfigDiff(hnswConfigDiffMapLongFunc.apply(l)); } diff --git a/nb-adapters/adapter-weaviate/pom.xml b/nb-adapters/adapter-weaviate/pom.xml new file mode 100644 index 000000000..6df5514d6 --- /dev/null +++ b/nb-adapters/adapter-weaviate/pom.xml @@ -0,0 +1,40 @@ + + + 4.0.0 + + adapter-weaviate + jar + + + mvn-defaults + io.nosqlbench + ${revision} + ../../mvn-defaults + + + ${project.artifactId} + + An nosqlbench adapter driver module for the Weaviate database. + + + + + io.nosqlbench + nb-annotations + ${revision} + compile + + + io.nosqlbench + adapters-api + ${revision} + compile + + + io.weaviate + client + + + diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateAdapterUtils.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateAdapterUtils.java new file mode 100644 index 000000000..7d42eabb5 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateAdapterUtils.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +public class WeaviateAdapterUtils { + public static final String WEAVIATE = "weaviate"; + + public static List splitNames(String input) { + assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input); + return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).toList(); + } + + public static List splitLongs(String input) { + assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input); + return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).map(Long::parseLong).toList(); + } + + /** + * Mask the digits in the given string with '*' + * + * @param unmasked The string to mask + * @return The masked string + */ + protected static String maskDigits(String unmasked) { + assert StringUtils.isNotBlank(unmasked) && StringUtils.isNotEmpty(unmasked); + int inputLength = unmasked.length(); + StringBuilder masked = new StringBuilder(inputLength); + for (char ch : unmasked.toCharArray()) { + if (Character.isDigit(ch)) { + masked.append("*"); + } else { + masked.append(ch); + } + } + return masked.toString(); + } + +// public static int[] intArrayFromMilvusSearchResults(String fieldName, R result) { +// SearchResultsWrapper wrapper = new SearchResultsWrapper(result.getData().getResults()); +// List fieldData = (List) wrapper.getFieldData(fieldName, 0); +// int[] indices = new int[fieldData.size()]; +// for (int i = 0; i < indices.length; i++) { +// indices[i] = Integer.parseInt(fieldData.get(i)); +// } +// return indices; +// } +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateDriverAdapter.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateDriverAdapter.java new file mode 100644 index 000000000..cbb3c1214 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateDriverAdapter.java @@ -0,0 +1,57 @@ + +/* + * Copyright (c) 2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate; + + +import static io.nosqlbench.adapter.weaviate.WeaviateAdapterUtils.WEAVIATE; + +import java.util.function.Function; + +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapters.api.activityimpl.OpMapper; +import io.nosqlbench.adapters.api.activityimpl.uniform.BaseDriverAdapter; +import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter; +import io.nosqlbench.nb.annotations.Service; +import io.nosqlbench.nb.api.components.core.NBComponent; +import io.nosqlbench.nb.api.config.standard.NBConfigModel; +import io.nosqlbench.nb.api.config.standard.NBConfiguration; +import io.nosqlbench.nb.api.labels.NBLabels; + +@Service(value = DriverAdapter.class, selector = WEAVIATE) +public class WeaviateDriverAdapter extends BaseDriverAdapter, WeaviateSpace> { + + public WeaviateDriverAdapter(NBComponent parentComponent, NBLabels labels) { + super(parentComponent, labels); + } + + @Override + public OpMapper> getOpMapper() { + return new WeaviateOpMapper(this); + } + + @Override + public Function getSpaceInitializer(NBConfiguration cfg) { + return (s) -> new WeaviateSpace(s, cfg); + } + + @Override + public NBConfigModel getConfigModel() { + return super.getConfigModel().add(WeaviateSpace.getConfigModel()); + } + +} + diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateDriverAdapterLoader.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateDriverAdapterLoader.java new file mode 100644 index 000000000..392e96a86 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateDriverAdapterLoader.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.weaviate; + +import static io.nosqlbench.adapter.weaviate.WeaviateAdapterUtils.WEAVIATE; + +import io.nosqlbench.adapter.diag.DriverAdapterLoader; +import io.nosqlbench.nb.annotations.Service; +import io.nosqlbench.nb.api.components.core.NBComponent; +import io.nosqlbench.nb.api.labels.NBLabels; + +@Service(value = DriverAdapterLoader.class, selector = WEAVIATE) +public class WeaviateDriverAdapterLoader implements DriverAdapterLoader { + @Override + public WeaviateDriverAdapter load(NBComponent parent, NBLabels childLabels) { + return new WeaviateDriverAdapter(parent, childLabels); + } +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateOpMapper.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateOpMapper.java new file mode 100644 index 000000000..e985045d5 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateOpMapper.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.weaviate; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateBaseOpDispenser; +import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateCreateCollectionOpDispenser; +import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateCreateObjectsOpDispenser; +import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateDeleteCollectionOpDispenser; +import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateGetCollectionSchemaOpDispenser; +import io.nosqlbench.adapter.weaviate.types.WeaviateOpType; +import io.nosqlbench.adapters.api.activityimpl.OpDispenser; +import io.nosqlbench.adapters.api.activityimpl.OpMapper; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.nosqlbench.engine.api.templating.TypeAndTarget; + +public class WeaviateOpMapper implements OpMapper> { + private static final Logger logger = LogManager.getLogger(WeaviateOpMapper.class); + private final WeaviateDriverAdapter adapter; + + /** + * Create a new WeaviateOpMapper implementing the {@link OpMapper} interface. + * + * @param adapter The associated {@link WeaviateDriverAdapter} + */ + public WeaviateOpMapper(WeaviateDriverAdapter adapter) { + this.adapter = adapter; + } + + /** + * Given an instance of a {@link ParsedOp} returns the appropriate + * {@link WeaviateBaseOpDispenser} subclass + * + * @param op The {@link ParsedOp} to be evaluated + * @return The correct {@link WeaviateBaseOpDispenser} subclass based on the op + * type + */ + @Override + public OpDispenser> apply(ParsedOp op) { + TypeAndTarget typeAndTarget = op.getTypeAndTarget(WeaviateOpType.class, String.class, + "type", "target"); + logger.info(() -> "Using '" + typeAndTarget.enumId + "' op type for op template '" + op.getName() + "'"); + + return switch (typeAndTarget.enumId) { + case delete_collection -> new WeaviateDeleteCollectionOpDispenser(adapter, op, typeAndTarget.targetFunction); + case create_collection -> new WeaviateCreateCollectionOpDispenser(adapter, op, typeAndTarget.targetFunction); + case get_collection_schema -> + new WeaviateGetCollectionSchemaOpDispenser(adapter, op, typeAndTarget.targetFunction); + case create_objects -> new WeaviateCreateObjectsOpDispenser(adapter, op, typeAndTarget.targetFunction); +// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " + +// "mapping parsed op " + op); + }; + } +} + diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateSpace.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateSpace.java new file mode 100644 index 000000000..3ba4c734f --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/WeaviateSpace.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.weaviate; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import io.nosqlbench.nb.api.config.standard.ConfigModel; +import io.nosqlbench.nb.api.config.standard.NBConfigModel; +import io.nosqlbench.nb.api.config.standard.NBConfiguration; +import io.nosqlbench.nb.api.config.standard.Param; +import io.nosqlbench.nb.api.errors.OpConfigError; +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateAuthClient; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.v1.auth.exception.AuthException; + +/** + * The {@code WeaviateSpace} class is a context object which stores all stateful + * contextual information needed to interact with the Weaviate database + * instance. + * + * @see Weaviate + * quick start guide + * @see Weaviate cloud + * quick start guide + * @see Weaviate Java + * client + */ +public class WeaviateSpace implements AutoCloseable { + private final static Logger logger = LogManager.getLogger(WeaviateSpace.class); + private final String name; + private final NBConfiguration cfg; + + protected WeaviateClient client; + + /** + * Create a new WeaviateSpace Object which stores all stateful contextual + * information needed to interact with the Weaviate database instance. + * + * @param name The name of this space + * @param cfg The configuration ({@link NBConfiguration}) for this nb run + */ + public WeaviateSpace(String name, NBConfiguration cfg) { + this.name = name; + this.cfg = cfg; + } + + public synchronized WeaviateClient getClient() throws AuthException { + if (client == null) { + client = createClient(); + } + return client; + } + + private WeaviateClient createClient() throws AuthException { + String uri = cfg.get("uri"); + String scheme = cfg.getOptional("scheme").orElse("https"); + + var requiredToken = cfg.getOptional("token_file") + .map(Paths::get) + .map( + tokenFilePath -> { + try { + return Files.readAllLines(tokenFilePath).getFirst(); + } catch (IOException e) { + String error = "Error while reading token from file:" + tokenFilePath; + logger.error(error, e); + throw new RuntimeException(e); + } + } + ).orElseGet( + () -> cfg.getOptional("token") + .orElseThrow(() -> new RuntimeException("You must provide either a token_file or a token to " + + "configure a Weaviate client")) + ); + if (requiredToken != null + && (cfg.getOptional("username").isPresent() || cfg.getOptional("password").isPresent())) { + throw new OpConfigError("Username/Password combo cannot be used together with token/tokenFile"); + } + + logger.info("{}: Creating new Weaviate Client with (masked) token [{}], uri/endpoint [{}]", + this.name, WeaviateAdapterUtils.maskDigits(requiredToken), uri); + Config config = new Config(scheme, uri); + + if (cfg.getOptional("username").isPresent() && cfg.getOptional("password").isPresent()) { + return WeaviateAuthClient.clientPassword(config, cfg.getOptional("username").get(), + cfg.getOptional("password").get(), null); + } else { + return WeaviateAuthClient.apiKey(config, requiredToken); + } + } + + public static NBConfigModel getConfigModel() { + return ConfigModel.of(WeaviateSpace.class) + .add( + Param.optional("token_file", String.class, "the file to load the api token from") + ) + .add( + Param.defaultTo("token", "weaviate") + .setDescription("the Weaviate api token to use to connect to the database") + ) + .add( + Param.defaultTo("scheme", "http") + .setDescription("the scheme of the database. Defaults to http.")) + .add(Param.defaultTo("uri", "localhost:8080").setDescription( + "the URI endpoint in which the database is running. Do not provide any suffix like https:// here.") + ) + .add(Param.optional("username") + .setDescription("Username to be used for non-WCD clusters. Need Password config too.")) + .add(Param.optional("password") + .setDescription("Password to be used for non-WCD clusters. Need Username config too.")) + .add( + Param.defaultTo("timeout_ms", 3000) + .setDescription("sets the timeout in milliseconds for all requests. Defaults to 3000ms.") + ) + .asReadOnly(); + } + + @Override + public void close() throws Exception { + client = null; + } +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateBaseOp.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateBaseOp.java new file mode 100644 index 000000000..44b4e1c42 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateBaseOp.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.weaviate.ops; + + +import java.util.function.LongFunction; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import io.nosqlbench.adapters.api.activityimpl.uniform.flowtypes.CycleOp; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; + +public abstract class WeaviateBaseOp implements CycleOp { + + protected final static Logger logger = LogManager.getLogger(WeaviateBaseOp.class); + + protected final WeaviateClient client; + protected final T request; + protected final LongFunction apiCall; + + public WeaviateBaseOp(WeaviateClient client, T requestParam) { + this.client = client; + this.request = requestParam; + this.apiCall = this::applyOp; + } + + public WeaviateBaseOp(WeaviateClient client, T requestParam, LongFunction call) { + this.client = client; + this.request = requestParam; + this.apiCall = call; + } + + @SuppressWarnings("unchecked") + @Override + public final Object apply(long value) { + logger.trace("applying op: " + this); + + try { + Object result = applyOp(value); + if (result instanceof Result) { + +// Result result = client.misc().readyChecker().run(); +// +// if (result.hasErrors()) { +// System.out.println(result.getError()); +// return; +// } +// System.out.println(result.getResult()); + if (((Result) result).hasErrors()) { + logger.error("Result status: {}", ((Result) result).getError().toString()); + } + } else { + logger.warn("Op '" + this.toString() + "' did not return a Result 'Result' type." + + " Exception handling will be bypassed" + ); + } +// return (Result result).getResult(); + return result; + } catch (Exception e) { + if (e instanceof RuntimeException rte) { + throw rte; + } else { + throw new RuntimeException(e); + } + } + }; + + public abstract Object applyOp(long value); + + @Override + public String toString() { + return "WeaviateOp(" + this.request.getClass().getSimpleName() + ")"; + } +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateCreateCollectionOp.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateCreateCollectionOp.java new file mode 100644 index 000000000..274f8e582 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateCreateCollectionOp.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.ops; + +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.schema.model.WeaviateClass; + +public class WeaviateCreateCollectionOp extends WeaviateBaseOp { + + public WeaviateCreateCollectionOp(WeaviateClient client, WeaviateClass request) { + super(client, request); + } + + @Override + public Object applyOp(long value) { + Result createResponse = null; + try { + createResponse = client.schema().classCreator().withClass(request).run(); + logger.debug("Successfully created the collection with return code of {}", createResponse.getResult()); + } catch (RuntimeException rte) { + throw rte; + } + return createResponse; + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateCreateObjectsOp.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateCreateObjectsOp.java new file mode 100644 index 000000000..8e4da739c --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateCreateObjectsOp.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.ops; + +import java.util.Collections; +import java.util.HashMap; + +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.data.api.ObjectCreator; +import io.weaviate.client.v1.data.model.WeaviateObject; +import io.weaviate.client.v1.data.replication.model.ConsistencyLevel; + +public class WeaviateCreateObjectsOp extends WeaviateBaseOp { + + public WeaviateCreateObjectsOp(WeaviateClient client, ObjectCreator request) { + super(client, request); + } + + @SuppressWarnings("serial") + @Override + public Object applyOp(long value) { + Result response = null; + try { + response = client.data().creator().withClassName("Glove25").withProperties(new HashMap() { + { + put("key", "This is a key"); + put("value", "This is the value for a given key"); + } + }).withVector(Collections.nCopies(25, 0.12345f).toArray(new Float[0])) + .withConsistencyLevel(ConsistencyLevel.QUORUM).run(); + logger.debug("Successfully inserted objects in the collection: {}", response.getResult()); + } catch (RuntimeException rte) { + throw rte; + } + return response; + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateDeleteCollectionOp.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateDeleteCollectionOp.java new file mode 100644 index 000000000..7bb2cc1e4 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateDeleteCollectionOp.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.ops; + +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; + +public class WeaviateDeleteCollectionOp extends WeaviateBaseOp { + + public WeaviateDeleteCollectionOp(WeaviateClient client, String request) { + super(client, request); + } + + @Override + public Object applyOp(long value) { + Result delColResponse = null; + try { + delColResponse = client.schema().classDeleter().withClassName(request).run(); + if (delColResponse.hasErrors()) { + logger.error("Delete collection operation has errors {}", + delColResponse.getError().toString()); + } + logger.debug("Successfully deleted the collection: {}", delColResponse.getResult().toString()); + } catch (RuntimeException rte) { + throw rte; + } + return delColResponse; + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateGetCollectionSchemaOp.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateGetCollectionSchemaOp.java new file mode 100644 index 000000000..7df05f2d8 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/ops/WeaviateGetCollectionSchemaOp.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.ops; + +import com.google.gson.GsonBuilder; + +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.schema.model.Schema; + +public class WeaviateGetCollectionSchemaOp extends WeaviateBaseOp { + + public WeaviateGetCollectionSchemaOp(WeaviateClient client, String request) { + super(client, request); + } + + @Override + public Object applyOp(long value) { + Result getColSchemaResponse = null; + try { + getColSchemaResponse = client.schema().getter().run(); + if (getColSchemaResponse.hasErrors()) { + logger.error("Get all collection schema operation has errors {}", + getColSchemaResponse.getError().toString()); + } + logger.info("Successfully fetched entire schema for all the collections: \n{}", + new GsonBuilder().setPrettyPrinting().create() + .toJson(getColSchemaResponse.getResult())); + } catch (RuntimeException rte) { + throw rte; + } + return getColSchemaResponse; + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateBaseOpDispenser.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateBaseOpDispenser.java new file mode 100644 index 000000000..e2f220bd4 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateBaseOpDispenser.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.nosqlbench.adapter.weaviate.opsdispensers; + +import java.util.function.LongFunction; + +import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter; +import io.nosqlbench.adapter.weaviate.WeaviateSpace; +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser; +import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.v1.auth.exception.AuthException; + +public abstract class WeaviateBaseOpDispenser extends BaseOpDispenser, WeaviateSpace> { + + protected final LongFunction weaviateSpaceFunction; + protected final LongFunction clientFunction; + private final LongFunction> opF; + private final LongFunction paramF; + + protected WeaviateBaseOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, LongFunction targetF) { + super((DriverAdapter) adapter, op); + this.weaviateSpaceFunction = adapter.getSpaceFunc(op); + this.clientFunction = (long l) -> { + try { + return this.weaviateSpaceFunction.apply(l).getClient(); + } catch (AuthException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + }; + this.paramF = getParamFunc(this.clientFunction, op, targetF); + this.opF = createOpFunc(paramF, this.clientFunction, op, targetF); + } + + protected WeaviateDriverAdapter getDriverAdapter() { + return (WeaviateDriverAdapter) adapter; + } + + public abstract LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF); + + public abstract LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF); + + @Override + public WeaviateBaseOp getOp(long value) { + return opF.apply(value); + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateBaseOpDispenser.java.new b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateBaseOpDispenser.java.new new file mode 100644 index 000000000..250a0b8de --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateBaseOpDispenser.java.new @@ -0,0 +1,61 @@ +package io.nosqlbench.adapter.weaviate.opsdispensers; + +/* + * Copyright (c) 2022 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.function.LongFunction; + +import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter; +import io.nosqlbench.adapter.weaviate.WeaviateSpace; +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser; +import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.weaviate.client.WeaviateClient; + +public abstract class WeaviateBaseOpDispenser extends BaseOpDispenser, WeaviateSpace> { + + protected final LongFunction weaviateSpaceFunction; + protected final LongFunction clientFunction; + private final LongFunction> opF; + private final LongFunction paramF; + + protected WeaviateBaseOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, LongFunction targetF) { + super((DriverAdapter) adapter, op); + this.weaviateSpaceFunction = adapter.getSpaceFunc(op); + this.clientFunction = (long l) -> this.weaviateSpaceFunction.apply(l).getClient(); + this.paramF = getParamFunc(this.clientFunction, op, targetF); + this.opF = createOpFunc(paramF, this.clientFunction, op, targetF); + } + + protected WeaviateDriverAdapter getDriverAdapter() { + return (WeaviateDriverAdapter) adapter; + } + + public abstract LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF); + + public abstract LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF); + + @Override + public WeaviateBaseOp getOp(long value) { + return opF.apply(value); + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateCreateCollectionOpDispenser.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateCreateCollectionOpDispenser.java new file mode 100644 index 000000000..bdf474b0d --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateCreateCollectionOpDispenser.java @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.opsdispensers; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.LongFunction; + +import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter; +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapter.weaviate.ops.WeaviateCreateCollectionOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.nosqlbench.nb.api.errors.OpConfigError; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.v1.misc.model.BQConfig; +import io.weaviate.client.v1.misc.model.BQConfig.BQConfigBuilder; +import io.weaviate.client.v1.misc.model.MultiTenancyConfig; +import io.weaviate.client.v1.misc.model.MultiTenancyConfig.MultiTenancyConfigBuilder; +import io.weaviate.client.v1.misc.model.PQConfig; +import io.weaviate.client.v1.misc.model.PQConfig.Encoder; +import io.weaviate.client.v1.misc.model.PQConfig.PQConfigBuilder; +import io.weaviate.client.v1.misc.model.ReplicationConfig; +import io.weaviate.client.v1.misc.model.ReplicationConfig.ReplicationConfigBuilder; +import io.weaviate.client.v1.misc.model.VectorIndexConfig; +import io.weaviate.client.v1.misc.model.VectorIndexConfig.VectorIndexConfigBuilder; +import io.weaviate.client.v1.schema.model.Property; +import io.weaviate.client.v1.schema.model.Property.PropertyBuilder; +import io.weaviate.client.v1.schema.model.WeaviateClass; +import io.weaviate.client.v1.schema.model.WeaviateClass.VectorConfig; +import io.weaviate.client.v1.schema.model.WeaviateClass.VectorConfig.VectorConfigBuilder; +import io.weaviate.client.v1.schema.model.WeaviateClass.WeaviateClassBuilder;; + +/** + * @see API + * Reference + * @see Collection + * docs + */ +public class WeaviateCreateCollectionOpDispenser extends WeaviateBaseOpDispenser { + + public WeaviateCreateCollectionOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @SuppressWarnings("rawtypes") + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + + LongFunction ebF = l -> WeaviateClass.builder().className(targetF.apply(l)); + + ebF = op.enhanceFuncOptionally(ebF, "description", String.class, WeaviateClassBuilder::description); + ebF = op.enhanceFuncOptionally(ebF, "vectorizer", String.class, WeaviateClassBuilder::vectorizer); + + if (op.isDefined("properties")) { + LongFunction> propertiesListF = buildPropertiesStruct(op); + final LongFunction propertiesF = ebF; + ebF = l -> propertiesF.apply(l).properties(propertiesListF.apply(l)); + } + + Optional> rfF = op.getAsOptionalFunction("replicationConfig", Map.class); + if (rfF.isPresent()) { + final LongFunction rfFunc = ebF; + LongFunction repConfF = buildReplicationConfig(rfF.get()); + ebF = l -> rfFunc.apply(l).replicationConfig(repConfF.apply(l)); + } + + Optional> mtcF = op.getAsOptionalFunction("multiTenancyConfig", Map.class); + if (mtcF.isPresent()) { + final LongFunction multiTCFunc = ebF; + LongFunction multiTenantConfF = buildMultiTenancyConfig(mtcF.get()); + ebF = l -> multiTCFunc.apply(l).multiTenancyConfig(multiTenantConfF.apply(l)); + } + + ebF = op.enhanceFuncOptionally(ebF, "vectorIndexType", String.class, WeaviateClassBuilder::vectorIndexType); + if(op.isDefined("vectorIndexConfig")) { + LongFunction vecIdxConfF = buildVectorIndexConfig(op); + final LongFunction vectorIndexConfigF = ebF; + ebF = l -> vectorIndexConfigF.apply(l).vectorIndexConfig(vecIdxConfF.apply(l)); + } + + if (op.isDefined("vectorConfig")) { + LongFunction> vecConfF = buildVectorConfig(op); + final LongFunction vectorConfigFunc = ebF; + ebF = l -> vectorConfigFunc.apply(l).vectorConfig(vecConfF.apply(l)); + } + + final LongFunction lastF = ebF; + return l -> lastF.apply(l).build(); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private LongFunction> buildVectorConfig(ParsedOp op) { + Optional> baseFunc = op.getAsOptionalFunction("vectorConfig", Map.class); + return baseFunc.>>map(mapLongFunc -> l -> { + Map vcMap = mapLongFunc.apply(l); + Map finalVecConf = new HashMap<>(); + vcMap.forEach((k, v) -> { + if (v instanceof Map) { + VectorConfigBuilder vcBdr = VectorConfig.builder(); + + ((Map) v).forEach((innerKey, innerVal) -> { + if ("vectorizer".equals(innerKey)) { + Map vectorizerMap = new HashMap<>(1); + vectorizerMap.put((String) innerVal, (String) innerVal); + vcBdr.vectorizer(vectorizerMap); + // https://weaviate.io/developers/weaviate/api/rest#tag/schema/post/schema + // Result status: WeaviateError(statusCode=422, + // messages=[WeaviateErrorMessage(message=class.VectorConfig.Vectorizer must be + // an object, got , throwable=null)]) + + // WeaviateError(statusCode=422, + // messages=[WeaviateErrorMessage(message=class.VectorConfig.Vectorizer must + // consist only 1 configuration, got: 0, throwable=null)]) + logger.warn("For now, vectorizer is not properly implemented in named vector as its uncler"); + } + if ("vectorIndexType".equals(innerKey)) { + vcBdr.vectorIndexType((String) innerVal); + } + if ("vectorIndexConfig".equals(innerKey)) { + vcBdr.vectorIndexConfig(extractVectorIndexConfig((Map) innerVal)); + } + }); + + finalVecConf.put(k, vcBdr.build()); + } else { + throw new OpConfigError("Expected a map type for the value of '" + k + + "' named vector, but received " + v.getClass().getSimpleName()); + } + }); + return finalVecConf; + }).orElse(null); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private LongFunction buildVectorIndexConfig(ParsedOp op) { + Optional> baseFunc = op.getAsOptionalFunction("vectorIndexConfig", Map.class); + return baseFunc.>map(mapLongFunc -> l -> { + Map nvMap = mapLongFunc.apply(l); + return extractVectorIndexConfig(nvMap); + }).orElse(null); + } + + @SuppressWarnings({ "unchecked" }) + private VectorIndexConfig extractVectorIndexConfig(Map nvMap) { + VectorIndexConfigBuilder vecIdxConfBuilder = VectorIndexConfig.builder(); + nvMap.forEach((key, value) -> { + if (key.equals("cleanupIntervalSeconds")) { + vecIdxConfBuilder.cleanupIntervalSeconds(((Number) value).intValue()); + } + if (key.equals("distance")) { + vecIdxConfBuilder.distance((String) value); + } + if (key.equals("ef")) { + vecIdxConfBuilder.ef(((Number) value).intValue()); + } + if (key.equals("efConstruction")) { + vecIdxConfBuilder.efConstruction(((Number) value).intValue()); + } + if (key.equals("maxConnections")) { + vecIdxConfBuilder.maxConnections(((Number) value).intValue()); + } + if (key.equals("dynamicEfMin")) { + vecIdxConfBuilder.dynamicEfMin(((Number) value).intValue()); + } + if (key.equals("dynamicEfMax")) { + vecIdxConfBuilder.dynamicEfMax(((Number) value).intValue()); + } + if (key.equals("dynamicEfFactor")) { + vecIdxConfBuilder.dynamicEfFactor(((Number) value).intValue()); + } + if (key.equals("flatSearchCutoff")) { + vecIdxConfBuilder.flatSearchCutoff(((Number) value).intValue()); + } + if (key.equals("skip")) { + vecIdxConfBuilder.skip((Boolean) value); + } + if (key.equals("vectorCacheMaxObjects")) { + vecIdxConfBuilder.vectorCacheMaxObjects(((Number) value).longValue()); + } + if (key.equals("bq")) { + BQConfigBuilder bqConfBuilder = BQConfig.builder(); + if (value instanceof Map) { + ((Map) value).forEach((bqKey, bqVal) -> { + if (bqKey.equals("enabled")) { + bqConfBuilder.enabled((Boolean) bqVal); + } + if (bqKey.equals("rescoreLimit")) { + bqConfBuilder.rescoreLimit(((Number) bqVal).longValue()); + } + if (bqKey.equals("cache")) { + bqConfBuilder.cache((Boolean) bqVal); + } + }); + } else { + throw new OpConfigError("Expected a map type for 'bq' leaf of the configuration"); + } + vecIdxConfBuilder.bq(bqConfBuilder.build()); + } + if (key.equals("pq")) { + PQConfigBuilder pqConfBuilder = PQConfig.builder(); + if (value instanceof Map) { + ((Map) value).forEach((pqKey, pqVal) -> { + if (pqKey.equals("enabled")) { + pqConfBuilder.enabled((Boolean) pqVal); + } + if (pqKey.equals("rescoreLimit")) { + pqConfBuilder.rescoreLimit(((Number) pqVal).longValue()); + } + if (pqKey.equals("bitCompression")) { + pqConfBuilder.bitCompression((Boolean) pqVal); + } + if (pqKey.equals("segments")) { + pqConfBuilder.segments(((Number) pqVal).intValue()); + } + if (pqKey.equals("centroids")) { + pqConfBuilder.centroids(((Number) pqVal).intValue()); + } + if (pqKey.equals("trainingLimit")) { + pqConfBuilder.trainingLimit(((Number) pqVal).intValue()); + } + if (pqKey.equals("cache")) { + pqConfBuilder.cache((Boolean) pqVal); + } + if (pqKey.equals("encoder")) { + PQConfig.Encoder.EncoderBuilder encBuilder = Encoder.builder(); + if (pqVal instanceof Map) { + ((Map) pqVal).forEach((encKey, encVal) -> { + if (encKey.equals("type")) { + encBuilder.type((String) encVal); + } + if (encKey.equals("distribution")) { + encBuilder.distribution((String) encVal); + } + }); + } else { + throw new OpConfigError( + "Expected a map type for 'encoder's value of the configuration, but got " + + value.getClass().getSimpleName()); + } + pqConfBuilder.encoder(encBuilder.build()); + } + }); + } else { + throw new OpConfigError("Expected a map type for 'bq' leaf of the configuration, but got " + + value.getClass().getSimpleName()); + } + vecIdxConfBuilder.pq(pqConfBuilder.build()); + } + }); + return vecIdxConfBuilder.build(); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private LongFunction> buildPropertiesStruct(ParsedOp op) { +// if (!op.isDefined("properties")) { +// throw new OpConfigError("Must provide values for 'properties' in 'create_collection' op"); +// } + + Optional> baseFunc = op.getAsOptionalFunction("properties", Map.class); + return baseFunc.>>map(mapLongFunc -> l -> { + Map nvMap = mapLongFunc.apply(l); + List propertyList = new ArrayList<>(); + nvMap.forEach((name, value) -> { + // TODO -- see if we need to set name at the higher (i.e. Property) level or + // just at NetstedProperty level + PropertyBuilder propBuilder = Property.builder().name(name); + if (value instanceof Map) { + ((Map) value).forEach((innerKey, innerValue) -> { + if (innerKey.equals("description")) { + propBuilder.description((String) innerValue); + } + if (innerKey.equals("dataType")) { + // logger.info(">>>>>>> property name '{}' has dataType '{}'", name, + // innerValue); + // https://weaviate.io/developers/weaviate/config-refs/datatypes#datatype-cross-reference + // is unsupported at this time in NB driver + propBuilder.dataType(Arrays.asList((String) innerValue)); + } + if (innerKey.equals("tokenization")) { + propBuilder.tokenization(((String) innerValue)); + } + if (innerKey.equals("indexFilterable")) { + propBuilder.indexFilterable((Boolean) innerValue); + } + if (innerKey.equals("indexSearchable")) { + propBuilder.indexSearchable((Boolean) innerValue); + } + }); + } else { + throw new OpConfigError( + "Named vector properties must be a Map>, but got " + + value.getClass().getSimpleName() + " instead for the inner value"); + } + propertyList.add(propBuilder.build()); + }); + return propertyList; + }).orElse(null); + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + private LongFunction buildMultiTenancyConfig(LongFunction mapLongFunction) { + return l -> { + MultiTenancyConfigBuilder mtcB = MultiTenancyConfig.builder(); + mapLongFunction.apply(l).forEach((key, val) -> { + if (key.equals("enabled")) { + mtcB.enabled((Boolean) val); + } + }); + return mtcB.build(); + }; + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private LongFunction buildReplicationConfig(LongFunction mapLongFunction) { + return l -> { + ReplicationConfigBuilder rConfB = ReplicationConfig.builder(); + mapLongFunction.apply(l).forEach((key, val) -> { + if (key.equals("factor")) { + rConfB.factor(((Number) val).intValue()); + } + }); + return rConfB.build(); + }; + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new WeaviateCreateCollectionOp(clientF.apply(l), paramF.apply(l)); + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateCreateObjectsOpDispenser.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateCreateObjectsOpDispenser.java new file mode 100644 index 000000000..cefafe291 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateCreateObjectsOpDispenser.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.opsdispensers; + +import java.util.function.LongFunction; + +import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter; +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapter.weaviate.ops.WeaviateCreateObjectsOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.v1.data.api.ObjectCreator; + +/** + * Create objects. + * + * @see Create + * Objects. + * @see Create + * Objects - REST API. + */ +public class WeaviateCreateObjectsOpDispenser extends WeaviateBaseOpDispenser { + + public WeaviateCreateObjectsOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, LongFunction targetF) { + super(adapter, op, targetF); + } + + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { +// LongFunction ebF = l -> ObjectCreator.builder().className(targetF.apply(l)); +// +// final LongFunction lastF = ebF; +// return l -> lastF.apply(l).build(); + + @SuppressWarnings("deprecation") + LongFunction ebF = l -> { + try { + return ObjectCreator.class.newInstance().withClassName(targetF.apply(l)); + } catch (InstantiationException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IllegalAccessException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + }; + return l -> ebF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new WeaviateCreateObjectsOp(clientF.apply(l), paramF.apply(l)); + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateDeleteCollectionOpDispenser.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateDeleteCollectionOpDispenser.java new file mode 100644 index 000000000..1558cdbc3 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateDeleteCollectionOpDispenser.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.opsdispensers; + +import java.util.function.LongFunction; + +import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter; +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapter.weaviate.ops.WeaviateDeleteCollectionOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.weaviate.client.WeaviateClient; + +/** + * Delete a Weaviate collection. + * + * @see Delete + * Collection docs. + * @see Delete + * Collection REST API. + */ +public class WeaviateDeleteCollectionOpDispenser extends WeaviateBaseOpDispenser { + + public WeaviateDeleteCollectionOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { +// LongFunction ebF = l -> targetF.apply(l); +// +// final LongFunction lastF = ebF; +// return l -> lastF.apply(l); + return l -> targetF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new WeaviateDeleteCollectionOp(clientF.apply(l), paramF.apply(l)); + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateGetCollectionSchemaOpDispenser.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateGetCollectionSchemaOpDispenser.java new file mode 100644 index 000000000..322877ab3 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/opsdispensers/WeaviateGetCollectionSchemaOpDispenser.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.nosqlbench.adapter.weaviate.opsdispensers; + +import java.util.function.LongFunction; + +import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter; +import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp; +import io.nosqlbench.adapter.weaviate.ops.WeaviateGetCollectionSchemaOp; +import io.nosqlbench.adapters.api.templating.ParsedOp; +import io.weaviate.client.WeaviateClient; + +public class WeaviateGetCollectionSchemaOpDispenser extends WeaviateBaseOpDispenser { + + public WeaviateGetCollectionSchemaOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, + LongFunction targetF) { + super(adapter, op, targetF); + } + + @Override + public LongFunction getParamFunc(LongFunction clientF, ParsedOp op, + LongFunction targetF) { + return l -> targetF.apply(l); + } + + @Override + public LongFunction> createOpFunc(LongFunction paramF, + LongFunction clientF, ParsedOp op, LongFunction targetF) { + return l -> new WeaviateGetCollectionSchemaOp(clientF.apply(l), paramF.apply(l)); + } + +} diff --git a/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/types/WeaviateOpType.java b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/types/WeaviateOpType.java new file mode 100644 index 000000000..6d572ff57 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/java/io/nosqlbench/adapter/weaviate/types/WeaviateOpType.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020-2024 nosqlbench + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.nosqlbench.adapter.weaviate.types; + +public enum WeaviateOpType { + create_collection, + delete_collection, + get_collection_schema, + create_objects, +} diff --git a/nb-adapters/adapter-weaviate/src/main/resources/activities/weaviate_vector_live.yaml b/nb-adapters/adapter-weaviate/src/main/resources/activities/weaviate_vector_live.yaml new file mode 100644 index 000000000..fc7e61963 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/resources/activities/weaviate_vector_live.yaml @@ -0,0 +1,327 @@ +min_version: 5.21 +description: | + This is a template for live vector search testing. + Template Variables: + + schema: Install the schema required to run the test + rampup: Measure how long it takes to load a set of embeddings + search: Measure how the system responds to queries while it + is indexing recently ingested data. + search: Run vector search with a set of default (or overridden) parameters + In all of these phases, it is important to instance the metrics with distinct names. + Also, aggregates of recall should include total aggregate as well as a moving average. + +scenarios: + weaviate_vectors: + delete_collection: >- + run tags==block:delete_collection + errors===stop + cycles===UNDEF threads===UNDEF + uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file) + schema_collection: >- + run tags==block:schema_collection + errors===stop + cycles===UNDEF threads===UNDEF + uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file) + get_collection_schema: >- + run tags==block:get_collection_schema + errors===stop + cycles===UNDEF threads===UNDEF + uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file) + # TODO - ,retry should be added to errors for rampup + rampup: >- + run tags==block:rampup + errors===warn,counter + cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO) + uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file) + count_vectors: >- + run tags==block:count_vectors + errors===stop + cycles===UNDEF threads===UNDEF + uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file) + search_points: >- + run tags==block:search_points + errors===warn,counter + cycles===TEMPLATE(testann_cycles,TEMPLATE(testsize,1000)) threads===TEMPLATE(testann_threads,AUTO) + uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file) + +params: + driver: weaviate + instrument: true + +bindings: + id_val: Identity(); + id_val_uuid: ToHashedUUID() -> java.util.UUID + row_key: ToString() + row_key_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),long->ToString()); + # filetype=hdf5 for TEMPLATE(filetype,hdf5) + test_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/test"); + relevant_indices_hdf5: HdfFileToIntArray("testdata/TEMPLATE(dataset).hdf5", "/neighbors") + distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/distance") + # TODO - remove the local path + train_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/train"); + train_floatlist_hdf5_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/train")); + # filetype=fvec for TEMPLATE(filetype,fvec) + test_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_query_vectors.fvec"); + relevant_indices_fvec: IVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_indices_query.ivec"); + distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0); + train_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0); + train_floatlist_fvec_batch: Mul(TEMPLATE(batch_size,10)L); ListSizedStepped(TEMPLATE(batch_size),FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0)); + +blocks: + delete_collection: + ops: + # https://weaviate.io/developers/weaviate/manage-data/collections#delete-a-collection + delete_col_op: + delete_collection: "TEMPLATE(collection)" + + schema_collection1: + ops: + # https://weaviate.io/developers/weaviate/config-refs/schema#collection-object + create_col_op: + create_collection: "TEMPLATE(collection)" + description: "This is a key/value collection with value being the vector embedding" + vectorizer: "none" + vectorIndexType: "hnsw" + properties: + key: + description: "the key column, similar to partition key in Cassandra" + dataType: "text" + tokenization: "word" + indexFilterable: true + indexSearchable: true + value: + description: "the actual vector value column that stores embeddings" + dataType: "number[]" + indexFilterable: true + + schema_collection: + ops: + # https://weaviate.io/developers/weaviate/config-refs/schema#collection-object + create_col_op: + create_collection: "TEMPLATE(collection)" + description: "This is a key/value collection with value being the vector embedding" + vectorConfig: + # below 'value' is a named-vector + value: + vectorizer: "none" + vectorIndexType: "hnsw" + vectorIndexConfig: + #hnsw specifc configs + cleanupIntervalSeconds: 300 + distance: "cosine" + ef: 64 # -1 for dynamic ef, https://weaviate.io/developers/weaviate/concepts/vector-index#dynamic-ef + efConstruction: 128 + maxConnections: 32 # default 64 + dynamicEfMin: 100 + dynamicEfMax: 500 + dynamicEfFactor: 8 + flatSearchCutoff: 40000 + skip: false + vectorCacheMaxObjects: 1e12 + #pq index specific configs + #pq: + #enabled: false + #trainingLimit: 100000 + #segments: 2 + #centroids: 256 + #encoder: + #type: "kmeans" + #distribution: "log-normal" + #distribution: "log-normal" + #flat index specific + #vectorCacheMaxObjects: 1e12 + #bq specific configs + #bq: + #enabled: false + #rescoreLimit: -1 + #cache: false + #dynamic index specific configs: + #distance: "cosine" + #hnsw: + #... + #flat: + #... + #threshold: 10000 + properties: + key: + description: "the key column, similar to partition key in Cassandra" + dataType: "text" + tokenization: "word" + indexFilterable: true + indexSearchable: true + value: + description: "the actual vector value column that stores embeddings" + dataType: "number[]" + indexFilterable: true + replicationConfig: + factor: 3 + multiTenancyConfig: + enabled: true + + get_collection_schema: + ops: + get_collection_schema_op: + get_collection_schema: "TEMPLATE(collection)" + + rampup: + ops: + create_objects_op: + create_objects: "TEMPLATE(collection)" + objects: + - id: "{id_val_uuid}" + #properties: + # key: "{row_key}" + vectors: + # Only named vectors are supported at this time + value: "{train_floatlist_TEMPLATE(filetype)}" + #tenant: "" + + search_points: + ops: + search_points_op: + search_points: "TEMPLATE(collection)" + timeout: 300 # 5 minutes + # https://github.com/weaviate/weaviate/blob/v1.9.0/lib/api/src/grpc/proto/points.proto#L21-L25 + # 0 - All, 1 - Majority, 2 - Quorum + consistency: "Quorum" + with_payload: true + with_vector: true + limit: TEMPLATE(select_limit,100) + # Another option to set with payload is as follows + # with_payload: ["key1"] + # Another option to set with payload is as follows + # with_payload: + # include: ["key1"] + # exclude: ["key2"] + vector: + - name: "value" + values: "{test_floatlist_TEMPLATE(filetype)}" + #indices: "[1,7]" + verifier-init: | + relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op); + for (int k in List.of(100)) { + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k)); + relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k)); + } + verifier: | + // driver-specific function + actual_indices=io.nosqlbench.adapter.weaviate.weaviateAdapterUtils.searchPointsResponseIdNumToIntArray(result) + // System.out.println("actual_indices ------>>>>: " + actual_indices); + // driver-agnostic function + relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices); + // because we are "verifying" although this needs to be reorganized + return true; + # More complex filtering available. See 'count_points' below for an example filter structure + + create_payload_index: + ops: + create_payload_index_op: + create_payload_index: "TEMPLATE(collection)" + field_name: "field17" + field_type: "Keyword" + ordering: "Strong" + wait: true + # https://github.com/weaviate/weaviate/blob/v1.9.2/lib/api/src/grpc/proto/collections.proto#L395-L400 + + count_vectors: + ops: + count_points_op: + count_points: "TEMPLATE(collection)" + exact: true + # More complex filtering logic could be provided as follows + #filter: + # - clause: "must" + # condition: "match" + # key: "field1" + # value: "abc1" + # - clause: "must_not" + # condition: "match_any" + # key: "field2" + # value: + # - "abc2" + # - "abc3" + # - clause: "should" + # condition: "range" + # key: "field3" + # # any one of below + # value: + # gte: 10 + # lte: 20 + # gt: null + # lt: null + # - clause: "must" + # condition: "nested" + # key: "field4" + # nested: + # - condition: "match" + # key: "field5[].whatsup" + # value: "ni24maddy" + # - condition: "match" + # key: "field6" + # value: true + # - clause: "should" + # condition: "has_id" + # # no other keys are supported for this type + # key: "id" + # value: + # - 1 + # - 2 + # - 3 + # - clause: "should" + # condition: "match" + # key: "field7" + # # special case of match is text + # text: "abc7" + # - clause: "should" + # condition: "geo_bounding_box" + # key: "field8" + # value: + # top_left: + # lat: 40.7128 + # lon: -74.0060 + # bottom_right: + # lat: 40.7128 + # lon: -74.0060 + # - clause: "must_not" + # condition: "geo_radius" + # key: "field9" + # value: + # center: + # lat: 40.7128 + # lon: -74.0060 + # radius: 100.0 + # - clause: "must" + # condition: "geo_polygon" + # key: "field10" + # value: + # exterior_points: + # - lat: 30.7128 + # lon: -34.0060 + # interior_points: + # - lat: 42.7128 + # lon: -54.0060 + # - clause: "should" + # condition: "values_count" + # key: "field11" + # # Any one of below + # value: + # gte: 1 + # lte: 10 + # gt: null + # lt: null + # - clause: "must_not" + # condition: "is_empty" + # key: "field12" + # - clause: "must" + # condition: "is_null" + # key: "field13" + # - clause: "must" + # condition: "match_except" + # key: "field14" + # value: + # - 1 + # - 2 diff --git a/nb-adapters/adapter-weaviate/src/main/resources/weaviate.md b/nb-adapters/adapter-weaviate/src/main/resources/weaviate.md new file mode 100644 index 000000000..76f73c027 --- /dev/null +++ b/nb-adapters/adapter-weaviate/src/main/resources/weaviate.md @@ -0,0 +1,69 @@ +# Weaviate driver adapter + +The Weaviate driver adapter is a NoSQLBench adapter for the `weaviate` driver, an open-source Java driver +for connecting to and performing operations on an instance of a Weaviate vector database. The driver is +leveraged from GitHub at https://github.com/weaviate/java-client. + +## Run Commands (Remove prior to merge) + +### Create Collection Schema +``` +java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.rampup dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### Delete Collection +``` +java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.delete_collection dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### Get all schema +``` +java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.get_collection_schema dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5 +``` + +### Insert objects +``` +TODO +``` + +### Read objects +``` +TODO +``` + +### TODO - Work on the below + +## activity parameters + +The following parameters must be supplied to the adapter at runtime in order to successfully connect to an +instance of the [Weaviate database](https://weaviate.io/developers/weaviate): + +* `token` - In order to use the Weaviate database you must have an account. Once the account is created you can [request + an api key/token](https://weaviate.io/developers/wcs/quickstart#explore-the-details-panel). This key will need to be + provided any time a database connection is desired. Alternatively, the api key can be stored in a file securely and + referenced via the `token_file` config option pointing to the path of the file. +* `uri` - When a collection/index is created in the database the URI (aka endpoint) must be specified as well. The adapter will + use the default value of `localhost:8080` if none is provided at runtime. Remember to *not* provide the `https://` + suffix. +* `scheme` - the scheme database. Defaults to `http`. + +## Op Templates + +The Weaviate adapter supports [**all basic operations**](../java/io/nosqlbench/adapter/weaviate/ops) supported by the [Java +driver published by Weaviate](https://github.com/weaviate/java-client). The official Weaviate API reference can be found at +https://weaviate.io/developers/weaviate/api/rest. + +The operations include a full-fledged support for key APIs available in the Weaviate Java driver. +The following are a couple high level API operations. + +* Create Collection +* Delete Collection +* Get Entire Schema +* Create Objects (vectors) +* Read Objects (vectors) + +## Examples + +Check out the [full example available here](./activities/weaviate_vectors_live.yaml). + +--- diff --git a/nb-adapters/nb-adapters-included/pom.xml b/nb-adapters/nb-adapters-included/pom.xml index 5e75acc35..7bd4e450a 100644 --- a/nb-adapters/nb-adapters-included/pom.xml +++ b/nb-adapters/nb-adapters-included/pom.xml @@ -116,7 +116,7 @@ adapter-jdbc-include - true + false @@ -228,7 +228,7 @@ adapter-kafka-include - false + true @@ -242,7 +242,7 @@ adapter-amqp-include - false + true @@ -256,7 +256,7 @@ adapter-qdrant-include - false + true @@ -267,6 +267,34 @@ + + adapter-weaviate-include + + true + + + + io.nosqlbench + adapter-weaviate + ${revision} + + + + + + adapter-azure-aisearch-include + + true + + + + io.nosqlbench + adapter-azure-aisearch + ${revision} + + + + diff --git a/nb-adapters/pom.xml b/nb-adapters/pom.xml index 4d9012544..e0c717aaa 100644 --- a/nb-adapters/pom.xml +++ b/nb-adapters/pom.xml @@ -87,7 +87,7 @@ adapter-jdbc-module - true + false adapter-jdbc @@ -157,7 +157,7 @@ adapter-kafka-module - false + true adapter-kafka @@ -167,7 +167,7 @@ adapter-amqp-module - false + true adapter-amqp @@ -187,12 +187,32 @@ adapter-qdrant-module - false + true adapter-qdrant + + adapter-weaviate-module + + true + + + adapter-weaviate + + + + + adapter-azure-aisearch-module + + true + + + adapter-azure-aisearch + + +