Merge pull request #1984 from nosqlbench/driver/weaviate

Weaviate & Azure AI Search vector database driver adapter
This commit is contained in:
Madhavan 2024-08-14 08:08:00 -04:00 committed by GitHub
commit c8b81c0fa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
48 changed files with 3658 additions and 61 deletions

View File

@ -398,6 +398,22 @@
<version>2.23.1</version>
</dependency>
<dependency>
<groupId>io.weaviate</groupId>
<artifactId>client</artifactId>
<version>4.8.2</version>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-search-documents</artifactId>
<version>11.7.0</version>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-identity</artifactId>
<version>1.13.2</version>
</dependency>
</dependencies>
</dependencyManagement>

View File

@ -0,0 +1,59 @@
<!--
~ Copyright (c) 2020-2024 nosqlbench
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--<?xml version="1.0" encoding="UTF-8"?>-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>adapter-azure-aisearch</artifactId>
<packaging>jar</packaging>
<parent>
<artifactId>mvn-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>${revision}</version>
<relativePath>../../mvn-defaults</relativePath>
</parent>
<name>${project.artifactId}</name>
<description>
An nosqlbench adapter driver module for the Azure AI Search database.
</description>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>nb-annotations</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapters-api</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-search-documents</artifactId>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-identity</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.util.SearchPagedIterable;
public class AzureAISearchAdapterUtils {
public static final String AZURE_AI_SEARCH = "azure_aisearch";
public static List<String> splitNames(String input) {
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).toList();
}
public static List<Long> splitLongs(String input) {
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).map(Long::parseLong).toList();
}
/**
* Mask the numeric digits in the given string with '*'.
*
* @param unmasked The string to mask
* @return The masked string
*/
protected static String maskDigits(String unmasked) {
assert StringUtils.isNotBlank(unmasked) && StringUtils.isNotEmpty(unmasked);
int inputLength = unmasked.length();
StringBuilder masked = new StringBuilder(inputLength);
for (char ch : unmasked.toCharArray()) {
if (Character.isDigit(ch)) {
masked.append("*");
} else {
masked.append(ch);
}
}
return masked.toString();
}
public String[] responseFieldToStringArray(String fieldName, SearchPagedIterable response) {
return response.stream()
.map(searchResult -> searchResult.getDocument(SearchDocument.class).get(fieldName).toString())
.toArray(String[]::new);
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import java.util.function.Function;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.activityimpl.uniform.BaseDriverAdapter;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.labels.NBLabels;
import static io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.AZURE_AI_SEARCH;
@Service(value = DriverAdapter.class, selector = AZURE_AI_SEARCH)
public class AzureAISearchDriverAdapter extends BaseDriverAdapter<AzureAISearchBaseOp<?>, AzureAISearchSpace> {
public AzureAISearchDriverAdapter(NBComponent parentComponent, NBLabels labels) {
super(parentComponent, labels);
}
@Override
public OpMapper<AzureAISearchBaseOp<?>> getOpMapper() {
return new AzureAISearchOpMapper(this);
}
@Override
public Function<String, ? extends AzureAISearchSpace> getSpaceInitializer(NBConfiguration cfg) {
return (s) -> new AzureAISearchSpace(s, cfg);
}
@Override
public NBConfigModel getConfigModel() {
return super.getConfigModel().add(AzureAISearchSpace.getConfigModel());
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import static io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.AZURE_AI_SEARCH;
import io.nosqlbench.adapter.diag.DriverAdapterLoader;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.labels.NBLabels;
@Service(value = DriverAdapterLoader.class, selector = AZURE_AI_SEARCH)
public class AzureAISearchDriverAdapterLoader implements DriverAdapterLoader {
@Override
public AzureAISearchDriverAdapter load(NBComponent parent, NBLabels childLabels) {
return new AzureAISearchDriverAdapter(parent, childLabels);
}
}

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchBaseOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchCreateOrUpdateIndexOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchDeleteIndexOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchListIndexesOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchSearchDocumentsOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchUploadDocumentsOpDispenser;
import io.nosqlbench.adapter.azureaisearch.types.AzureAISearchOpType;
import io.nosqlbench.adapters.api.activityimpl.OpDispenser;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.engine.api.templating.TypeAndTarget;
public class AzureAISearchOpMapper implements OpMapper<AzureAISearchBaseOp<?>> {
private static final Logger logger = LogManager.getLogger(AzureAISearchOpMapper.class);
private final AzureAISearchDriverAdapter adapter;
/**
* Create a new {@code AzureAISearchOpMapper} implementing the {@link OpMapper}.
* interface.
*
* @param adapter The associated {@link AzureAISearchDriverAdapter}
*/
public AzureAISearchOpMapper(AzureAISearchDriverAdapter adapter) {
this.adapter = adapter;
}
/**
* Given an instance of a {@link ParsedOp} returns the appropriate
* {@link AzureAISearchBaseOpDispenser} subclass.
*
* @param op The {@link ParsedOp} to be evaluated
* @return The correct {@link AzureAISearchBaseOpDispenser} subclass based on
* the op type
*/
@Override
public OpDispenser<? extends AzureAISearchBaseOp<?>> apply(ParsedOp op) {
TypeAndTarget<AzureAISearchOpType, String> typeAndTarget = op.getTypeAndTarget(AzureAISearchOpType.class,
String.class, "type", "target");
logger.info(() -> "Using '" + typeAndTarget.enumId + "' op type for op template '" + op.getName() + "'");
return switch (typeAndTarget.enumId) {
case delete_index -> new AzureAISearchDeleteIndexOpDispenser(adapter, op, typeAndTarget.targetFunction);
case create_or_update_index ->
new AzureAISearchCreateOrUpdateIndexOpDispenser(adapter, op, typeAndTarget.targetFunction);
case list_indexes -> new AzureAISearchListIndexesOpDispenser(adapter, op, typeAndTarget.targetFunction);
case upload_documents -> new AzureAISearchUploadDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
case search_documents -> new AzureAISearchSearchDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " +
// "mapping parsed op " + op);
};
}
}

View File

@ -0,0 +1,141 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.TokenCredential;
import com.azure.identity.DefaultAzureCredentialBuilder;
import com.azure.search.documents.SearchServiceVersion;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
import io.nosqlbench.nb.api.config.standard.ConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.config.standard.Param;
/**
* The {@code AzureAISearchSpace} class is a context object which stores all
* stateful contextual information needed to interact with the
* <b>{@code Azure AI Search}</b> database instance.
*
* @see <a href=
* "https://learn.microsoft.com/en-us/azure/developer/java/sdk/troubleshooting-dependency-version-conflict">Troubleshooting
* guide</a>
* @see <a href=
* "https://learn.microsoft.com/en-us/java/api/overview/azure/search-documents-readme?view=azure-java-stable">AI
* Search quick start guide</a>
* @see <a href=
* "https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/search/azure-search-documents/">Azure
* AI Search Java searchIndexClient</a>
*/
public class AzureAISearchSpace implements AutoCloseable {
private final static Logger logger = LogManager.getLogger(AzureAISearchSpace.class);
private final String name;
private final NBConfiguration cfg;
protected SearchIndexClient searchIndexClient;
// protected SearchClient searchClient;
/**
* Create a new {@code AzureAISearchSpace} Object which stores all stateful
* contextual information needed to interact with the <b>Azure AI Search</b>
* database instance.
*
* @param name The name of this space
* @param cfg The configuration ({@link NBConfiguration}) for this nb run
*/
public AzureAISearchSpace(String name, NBConfiguration cfg) {
this.name = name;
this.cfg = cfg;
}
public synchronized SearchIndexClient getSearchIndexClient() {
if (searchIndexClient == null) {
searchIndexClient = createSearchClients();
}
return searchIndexClient;
}
// public synchronized SearchClient getSearchClient() {
// if (searchClient == null) {
// createSearchClients();
// }
// return searchClient;
// }
private SearchIndexClient createSearchClients() {
String uri = cfg.get("endpoint");
var requiredToken = cfg.getOptional("token_file").map(Paths::get).map(tokenFilePath -> {
try {
return Files.readAllLines(tokenFilePath).getFirst();
} catch (IOException e) {
String error = "Error while reading token from file:" + tokenFilePath;
logger.error(error, e);
throw new RuntimeException(e);
}
}).orElseGet(() -> cfg.getOptional("token").orElseThrow(() -> new RuntimeException(
"You must provide either a 'token_file' or a 'token' to configure a Azure AI Search client")));
logger.info("{}: Creating new Azure AI Search Client with (masked) token/key [{}], uri/endpoint [{}]",
this.name, AzureAISearchAdapterUtils.maskDigits(requiredToken), uri);
var searchIndexClientBuilder = new SearchIndexClientBuilder().endpoint(uri);
// var searchClientBuilder = new SearchClientBuilder().endpoint(uri);
if (!requiredToken.isBlank()) {
searchIndexClientBuilder = searchIndexClientBuilder.credential(new AzureKeyCredential(requiredToken));
// searchClientBuilder = searchClientBuilder.credential(new AzureKeyCredential(requiredToken));
} else {
TokenCredential tokenCredential = new DefaultAzureCredentialBuilder().build();
searchIndexClientBuilder = searchIndexClientBuilder.credential(tokenCredential);
// searchClientBuilder = searchClientBuilder.credential(tokenCredential);
}
// Should we leave these below to leverage the SearchServiceVersion.getLatest()?
String apiVersion = cfg.getOptional("api_version").orElse(SearchServiceVersion.V2024_07_01.name());
logger.warn(
"Latest search service version supported by this client is '{}', but we're using '{}' version. Ignore this warning if both are same.",
SearchServiceVersion.getLatest(), apiVersion);
// TODO - try to find a way to get rid of placeholder
// this.searchClient = searchClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion))
// .indexName("PLACEHOLDER").buildClient();
return searchIndexClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)).buildClient();
}
public static NBConfigModel getConfigModel() {
return ConfigModel.of(AzureAISearchSpace.class)
.add(Param.optional("token_file", String.class, "the file to load the api token/key from"))
.add(Param.defaultTo("token", "azure-aisearch-admin-key-changeme")
.setDescription("the Azure AI Search api token/key to use to connect to the database"))
.add(Param.defaultTo("endpoint", "localhost:8080").setDescription(
"the URI endpoint in which the database is running. Check out https://learn.microsoft.com/en-us/azure/search/search-create-service-portal."))
.add(Param.optional("api_version", String.class,
"the api version to be used. Example 'V2024-07-01'. Defaults to latest service version supported by the SDK client version"))
.asReadOnly();
}
@Override
public void close() throws Exception {
searchIndexClient = null;
}
}

View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import java.util.function.LongFunction;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapters.api.activityimpl.uniform.flowtypes.CycleOp;
public abstract class AzureAISearchBaseOp<T> implements CycleOp<Object> {
protected final static Logger logger = LogManager.getLogger(AzureAISearchBaseOp.class);
protected final SearchIndexClient searchIndexClient;
// protected final SearchClient searchClient;
protected final T request;
protected final LongFunction<Object> apiCall;
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam) {
this.searchIndexClient = searchIndexClient;
// TODO - figure out how to do this cleanly
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
this.request = requestParam;
this.apiCall = this::applyOp;
}
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam, LongFunction<Object> call) {
this.searchIndexClient = searchIndexClient;
// TODO - figure out how to do this cleanly
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
this.request = requestParam;
this.apiCall = call;
}
@SuppressWarnings("unchecked")
@Override
public final Object apply(long value) {
logger.trace("applying op: " + this);
try {
Object result = applyOp(value);
return result;
} catch (Exception e) {
if (e instanceof RuntimeException rte) {
throw rte;
} else {
throw new RuntimeException(e);
}
}
};
public abstract Object applyOp(long value);
@Override
public String toString() {
return "AzureAISearchBaseOp(" + this.request.getClass().getSimpleName() + ")";
}
}

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.search.documents.SearchClient;
import com.azure.search.documents.indexes.SearchIndexClient;
public abstract class AzureAISearchClientBaseOp<T> extends AzureAISearchBaseOp<T> {
protected final SearchClient searchClient;
public AzureAISearchClientBaseOp(SearchIndexClient searchIdxClient, SearchClient searchClnt, T requestParam) {
super(searchIdxClient, requestParam);
this.searchClient = searchClnt;
}
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.models.SearchIndex;
public class AzureAISearchCreateOrUpdateIndexOp extends AzureAISearchBaseOp<SearchIndex> {
public AzureAISearchCreateOrUpdateIndexOp(SearchIndexClient client, SearchIndex request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
SearchIndex createResponse = null;
try {
if (logger.isDebugEnabled()) {
request.getFields().forEach((field) -> {
logger.debug(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
field.getVectorSearchDimensions());
});
}
createResponse = searchIndexClient.createOrUpdateIndex(request);
logger.debug("Successfully created the collection with return code of {}", createResponse.toString());
} catch (RuntimeException rte) {
throw rte;
}
return createResponse;
}
}

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.search.documents.indexes.SearchIndexClient;
public class AzureAISearchDeleteIndexOp extends AzureAISearchBaseOp<String> {
public AzureAISearchDeleteIndexOp(SearchIndexClient client, String request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
try {
searchIndexClient.deleteIndex(request);
logger.debug("Successfully deleted the index: {}", request);
} catch (RuntimeException rte) {
throw rte;
}
return "Deleted";
}
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.core.http.rest.PagedIterable;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.models.SearchIndex;
public class AzureAISearchListIndexesOp extends AzureAISearchBaseOp<String> {
public AzureAISearchListIndexesOp(SearchIndexClient client, String request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
try {
PagedIterable<SearchIndex> response = searchIndexClient.listIndexes();
response.forEach((index) -> {
logger.info("Indexes available are: Name: {}, ETag: {}", index.getName(), index.getETag());
index.getFields().forEach(field -> {
logger.info(
"Field Name: {}, Field isKey?: {}, Field Dimension: {}, Field Vector Search Profile: {}",
field.getName(), field.isKey(), field.getVectorSearchDimensions(),
field.getVectorSearchProfileName());
});
});
} catch (RuntimeException rte) {
throw rte;
}
return "Listed indexes";
}
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.core.util.Context;
import com.azure.search.documents.SearchClient;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.SearchOptions;
import com.azure.search.documents.util.SearchPagedIterable;
public class AzureAISearchSearchDocumentsOp extends AzureAISearchClientBaseOp<SearchOptions> {
public AzureAISearchSearchDocumentsOp(SearchIndexClient searchIndexClient, SearchClient searchClient,
SearchOptions request) {
super(searchIndexClient, searchClient, request);
}
@Override
public Object applyOp(long value) {
SearchPagedIterable searchDocsResponse = null;
try {
searchDocsResponse = searchClient.search(null, // we've not implemented other complex searches yet here.
request,
Context.NONE);
if (logger.isInfoEnabled()) {
searchDocsResponse.forEach((r) -> {
SearchDocument doc = r.getDocument(SearchDocument.class);
logger.debug(
"Successfully searched the index and returned id: {}, score: {}, vector embedding: {}",
doc.get("id"), r.getScore(), doc.get("value"));
});
}
} catch (RuntimeException rte) {
throw rte;
}
return searchDocsResponse;
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import java.util.List;
import com.azure.search.documents.SearchClient;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.IndexDocumentsResult;
public class AzureAISearchUploadDocumentsOp extends AzureAISearchClientBaseOp<SearchDocument> {
public AzureAISearchUploadDocumentsOp(SearchIndexClient searchIndexClient, SearchClient searchClient,
SearchDocument request) {
super(searchIndexClient, searchClient, request);
}
@Override
public Object applyOp(long value) {
IndexDocumentsResult uploadDocsResponse = null;
try {
// request.getFields().forEach((field) -> {
// logger.info(
// ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>[AzureAISearchUploadDocumentsOp] SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
// field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
// field.getVectorSearchDimensions());
// });
uploadDocsResponse = searchClient.uploadDocuments(List.of(request));
if (logger.isDebugEnabled()) {
uploadDocsResponse.getResults().forEach((r) -> {
logger.debug(
"Successfully created the collection with return status code: {}, key: {}, succeeded?: {}, error message: {}",
r.getStatusCode(), r.getKey(), r.isSucceeded(), r.getErrorMessage());
});
}
} catch (RuntimeException rte) {
throw rte;
}
return uploadDocsResponse;
}
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchSpace;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.adapters.api.templating.ParsedOp;
public abstract class AzureAISearchBaseOpDispenser<T>
extends BaseOpDispenser<AzureAISearchBaseOp<T>, AzureAISearchSpace> {
protected final LongFunction<AzureAISearchSpace> azureAISearchSpaceFunction;
protected final LongFunction<SearchIndexClient> clientFunction;
private final LongFunction<? extends AzureAISearchBaseOp<T>> opF;
private final LongFunction<T> paramF;
@SuppressWarnings("rawtypes")
protected AzureAISearchBaseOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super((DriverAdapter) adapter, op);
this.azureAISearchSpaceFunction = adapter.getSpaceFunc(op);
this.clientFunction = (long l) -> {
try {
return this.azureAISearchSpaceFunction.apply(l).getSearchIndexClient();
} catch (Exception e) {
e.printStackTrace();
}
return null;
};
this.paramF = getParamFunc(this.clientFunction, op, targetF);
this.opF = createOpFunc(paramF, this.clientFunction, op, targetF);
}
protected AzureAISearchDriverAdapter getDriverAdapter() {
return (AzureAISearchDriverAdapter) adapter;
}
public abstract LongFunction<T> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF);
public abstract LongFunction<AzureAISearchBaseOp<T>> createOpFunc(LongFunction<T> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF);
@Override
public AzureAISearchBaseOp<T> getOp(long value) {
return opF.apply(value);
}
}

View File

@ -0,0 +1,407 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.models.BinaryQuantizationCompression;
import com.azure.search.documents.indexes.models.ExhaustiveKnnAlgorithmConfiguration;
import com.azure.search.documents.indexes.models.ExhaustiveKnnParameters;
import com.azure.search.documents.indexes.models.HnswAlgorithmConfiguration;
import com.azure.search.documents.indexes.models.HnswParameters;
import com.azure.search.documents.indexes.models.ScalarQuantizationCompression;
import com.azure.search.documents.indexes.models.ScalarQuantizationParameters;
import com.azure.search.documents.indexes.models.SearchField;
import com.azure.search.documents.indexes.models.SearchFieldDataType;
import com.azure.search.documents.indexes.models.SearchIndex;
import com.azure.search.documents.indexes.models.VectorSearch;
import com.azure.search.documents.indexes.models.VectorSearchAlgorithmConfiguration;
import com.azure.search.documents.indexes.models.VectorSearchAlgorithmMetric;
import com.azure.search.documents.indexes.models.VectorSearchCompression;
import com.azure.search.documents.indexes.models.VectorSearchCompressionTarget;
import com.azure.search.documents.indexes.models.VectorSearchProfile;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchCreateOrUpdateIndexOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
/**
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/create-or-update?view=rest-searchservice-2024-07-01&tabs=HTTP">API
* Reference</a>
* @see <a href=
* "https://learn.microsoft.com/en-us/java/api/com.azure.search.documents?view=azure-java-stable">Index
* docs</a>
*/
public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBaseOpDispenser<SearchIndex> {
private SearchField searchField;
private VectorSearchProfile vsProfile;
public AzureAISearchCreateOrUpdateIndexOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<SearchIndex> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>getParamFunc");
LongFunction<SearchIndex> ebF = l -> new SearchIndex(targetF.apply(l));
Optional<LongFunction<Map>> fieldsMapF = op.getAsOptionalFunction("fields", Map.class);
if (fieldsMapF.isPresent()) {
final LongFunction<List<SearchField>> searchFieldListF = buildFieldsStruct(op);
final LongFunction<SearchIndex> fieldsF = ebF;
ebF = l -> fieldsF.apply(l).setFields(searchFieldListF.apply(l));
}
Optional<LongFunction<Map>> vsearchMapF = op.getAsOptionalFunction("vectorSearch", Map.class);
if (vsearchMapF.isPresent()) {
final LongFunction<VectorSearch> vSearchF = buildVectorSearchStruct(op);
final LongFunction<SearchIndex> vsF = ebF;
ebF = l -> vsF.apply(l).setVectorSearch(vSearchF.apply(l));
}
final LongFunction<SearchIndex> lastF = ebF;
return l -> lastF.apply(l);
}
@SuppressWarnings({ "unchecked", "rawtypes", "static-access" })
private LongFunction<VectorSearch> buildVectorSearchStruct(ParsedOp op) {
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("vectorSearch", Map.class);
return baseFunc.<LongFunction<VectorSearch>>map(mapLongFunc -> l -> {
Map<String, Object> vsMap = mapLongFunc.apply(l);
VectorSearch vectorSearch = new VectorSearch();
vsMap.forEach((vsField, vsValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>vsField:{} vsValue:{}",
vsField, vsValue);
if (vsValue instanceof Map) {
((Map<String, Object>) vsValue).forEach((innerKey, innerValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>innerKey:{} innerValue:{}",
innerKey, innerValue);
if ("compressions".equals(vsField)) {
List<VectorSearchCompression> vsCompList = new ArrayList<>();
String kind;
if (((Map<String, Object>) innerValue).containsKey("kind")) {
kind = (String) ((Map<String, Object>) innerValue).get("kind");
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions>>>>kind:{}",
kind);
if (kind.equals("scalarQuantization")) {
ScalarQuantizationCompression sqComp = new ScalarQuantizationCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
kind, compressKey, compressValue);
if (compressKey.equals("kind")) {
sqComp.getKind().fromString((String) compressValue);
}
if (compressKey.equals("rerankWithOriginalVectors")) {
sqComp.setRerankWithOriginalVectors((Boolean) compressValue);
}
if (compressKey.equals("defaultOversampling")) {
sqComp.setDefaultOversampling(((Number) compressValue).doubleValue());
}
if (compressKey.equals("scalarQuantizationParameters")) {
ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters();
((Map<String, Object>) compressValue).forEach((sqKey, sqVal) -> {
if (sqKey.equals("quantizedDataType")) {
sqParams.setQuantizedDataType(
VectorSearchCompressionTarget.fromString((String) sqVal));
}
});
sqComp.setParameters(sqParams);
}
});
vsCompList.add(sqComp);
// vsCompList.add(buildVectorSearchCompression(bqComp, compressKey, compressValue, true));
} else {
// BinaryQuantization is assumed here
BinaryQuantizationCompression bqComp = new BinaryQuantizationCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
kind, compressKey, compressValue);
if (compressKey.equals("kind")) {
bqComp.getKind().fromString((String) compressValue);
}
if (compressKey.equals("rerankWithOriginalVectors")) {
bqComp.setRerankWithOriginalVectors((Boolean) compressValue);
}
if (compressKey.equals("defaultOversampling")) {
bqComp.setDefaultOversampling(((Number) compressValue).doubleValue());
}
});
vsCompList.add(bqComp);
// vsCompList.add(
// buildVectorSearchCompression(bqComp, compressKey, compressValue, false));
}
} else {
VectorSearchCompression vsComp = new VectorSearchCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
null, compressKey, compressValue);
if (compressKey.equals("kind")) {
vsComp.getKind().fromString((String) compressValue);
}
if (compressKey.equals("rerankWithOriginalVectors")) {
vsComp.setRerankWithOriginalVectors((Boolean) compressValue);
}
if (compressKey.equals("defaultOversampling")) {
vsComp.setDefaultOversampling(((Number) compressValue).doubleValue());
}
});
vsCompList.add(vsComp);
}
vectorSearch.setCompressions(vsCompList);
vectorSearch.getCompressions().forEach((comp) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions FINAL: Name:{}",
comp.getCompressionName());
});
}
if ("algorithms".equals(vsField)) {
List<VectorSearchAlgorithmConfiguration> vsAlgoList = new ArrayList<>();
String kind;
if (((Map<String, Object>) innerValue).containsKey("kind")) {
kind = (String) ((Map<String, Object>) innerValue).get("kind");
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{}",
kind);
if("hnsw".equals(kind)) {
HnswAlgorithmConfiguration hnswAlgoConf = new HnswAlgorithmConfiguration(innerKey);
((Map<String, Object>) innerValue).forEach((hnswKey, hnswValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{}",
kind, hnswKey, hnswValue);
if ("hnswParameters".equals(hnswKey)) {
((Map<String, Object>) innerValue)
.forEach((hnswParamsKey, hnswParamsValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{} hnswParamsKey:{} hnswParamsValue:{}",
kind, hnswKey, hnswValue, hnswParamsKey,
hnswParamsValue);
HnswParameters hnswParams = new HnswParameters();
if ("m".equals(hnswParamsKey)) {
hnswParams.setM(((Number) hnswParamsValue).intValue());
}
if ("efConstruction".equals(hnswParamsKey)) {
hnswParams.setEfConstruction(
((Number) hnswParamsValue).intValue());
}
if ("efSearch".equals(hnswParamsKey)) {
hnswParams
.setEfSearch(((Number) hnswParamsValue).intValue());
}
if ("metric".equals(hnswParamsKey)) {
hnswParams.setMetric(VectorSearchAlgorithmMetric
.fromString((String) hnswParamsValue));
}
hnswAlgoConf.setParameters(hnswParams);
});
}
});
vsAlgoList.add(hnswAlgoConf);
}
if ("exhaustiveKnn".equals(kind)) {
ExhaustiveKnnAlgorithmConfiguration exhausKnnAlgoConf = new ExhaustiveKnnAlgorithmConfiguration(
innerKey);
((Map<String, Object>) innerValue).forEach((algoKey, algoValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{}",
kind, algoKey, algoValue);
if (algoKey.equals("exhaustiveKnnParameters")) {
ExhaustiveKnnParameters eKnnParms = new ExhaustiveKnnParameters();
((Map<String, Object>) algoValue).forEach((ekpKey, ekpVal) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{} ekpKey:{} ekpVal:{}",
kind, algoKey, algoValue, ekpKey, ekpVal);
if (ekpKey.equals("quantizedDataType")) {
eKnnParms.setMetric(
VectorSearchAlgorithmMetric.fromString((String) ekpVal));
}
});
exhausKnnAlgoConf.setParameters(eKnnParms);
}
});
vsAlgoList.add(exhausKnnAlgoConf);
}
}
vectorSearch.setAlgorithms(vsAlgoList);
vectorSearch.getAlgorithms().forEach((algo) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms FINAL: Name:{}",
algo.getName());
});
}
if ("profiles".equals(vsField)) {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles");
List<VectorSearchProfile> vsProfileList = new ArrayList<>();
// VectorSearchProfile vsProfile = new VectorSearchProfile(innerKey, null);
((Map<String, Object>) vsValue).forEach((profKey, profVal) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{}",
profKey, profVal);
((Map<String, Object>) profVal).forEach((pK, pV) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{} pK:{} pV:{}",
profKey, profVal, pK, pV);
if ("algorithm".equals(pK)) {
vsProfile = new VectorSearchProfile(profKey, (String) pV);
}
if ("compression".equals(pK)) {
vsProfile.setCompressionName((String) pV);
}
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: Name:{}>>>AlgoName:{}>>>CompressionName:{}",
vsProfile.getName(), vsProfile.getAlgorithmConfigurationName(),
vsProfile.getCompressionName());
});
vsProfileList.add(vsProfile);
});
vectorSearch.setProfiles(vsProfileList);
vectorSearch.getProfiles().forEach((profile) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles FINAL: Name:{} AlgorithmConfName:{}",
profile.getName(), profile.getAlgorithmConfigurationName());
});
}
});
} else {
throw new OpConfigError(
"Vector Search properties must be a Map<String, Map<String, Object>>, but got "
+ vsValue.getClass().getSimpleName() + " instead for the inner value");
}
});
return vectorSearch;
}).orElse(null);
}
@SuppressWarnings({ "unchecked", "static-access" })
private VectorSearchCompression buildVectorSearchCompression(VectorSearchCompression vsComp, String key, Object val,
boolean isSQ) {
if (key.equals("kind")) {
vsComp.getKind().fromString((String) val);
}
if (key.equals("rerankWithOriginalVectors")) {
vsComp.setRerankWithOriginalVectors((Boolean) val);
}
if (key.equals("defaultOversampling")) {
vsComp.setDefaultOversampling(((Number) val).doubleValue());
}
if (isSQ) {
if (key.equals("scalarQuantizationParameters")) {
ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters();
((Map<String, Object>) val).forEach((sqKey, sqVal) -> {
if (sqKey.equals("quantizedDataType")) {
sqParams.setQuantizedDataType(VectorSearchCompressionTarget.fromString((String) sqVal));
}
});
((ScalarQuantizationCompression) vsComp).setParameters(sqParams);
}
}
return vsComp;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<List<SearchField>> buildFieldsStruct(ParsedOp op) {
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct");
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("fields", Map.class);
return baseFunc.<LongFunction<List<SearchField>>>map(mapLongFunc -> l -> {
Map<String, Object> fMap = mapLongFunc.apply(l);
List<SearchField> fieldsList = new ArrayList<>();
fMap.forEach((fName, fValue) -> {
if (fValue instanceof Map) {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{}",
fName, fValue);
((Map<String, Object>) fValue).forEach((innerKey, innerValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{} fName:{} fValue:{}",
fName, fValue, innerKey, innerValue);
if (innerKey.equals("type")) {
searchField = new SearchField(fName, SearchFieldDataType.fromString((String) innerValue));
}
if (innerKey.equals("key")) {
searchField.setKey((Boolean) innerValue);
}
if (innerKey.equals("dimensions")) {
searchField.setVectorSearchDimensions(((Number) innerValue).intValue());
}
if (innerKey.equals("vectorSearchProfile")) {
searchField.setVectorSearchProfileName((String) innerValue);
logger.debug("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {} %n",
searchField.getVectorSearchProfileName());
}
if (innerKey.equals("filterable")) {
searchField.setFilterable((Boolean) innerValue);
}
if (innerKey.equals("sortable")) {
searchField.setSortable(((Boolean) innerValue));
}
if (innerKey.equals("searchable")) {
searchField.setSearchable((Boolean) innerValue);
}
if (innerKey.equals("facetable")) {
searchField.setFacetable((Boolean) innerValue);
}
if (innerKey.equals("retrievable")) {
// For now we're ignoring this as this is internally set to 'hidden' property's
// value by the searchIndexClient
}
if (innerKey.equals("hidden")) {
searchField.setHidden((Boolean) innerValue);
}
});
} else {
throw new OpConfigError(
"Fields properties must be a Map<String, Map<String, Object>>, but got "
+ fValue.getClass().getSimpleName() + " instead for the inner value");
}
fieldsList.add(searchField);
if (logger.isDebugEnabled()) {
fieldsList.forEach((field) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>> fields FINAL: Name:{} VSProfileName:{}",
field.getName(), field.getVectorSearchProfileName());
});
}
});
return fieldsList;
}).orElse(null);
}
@Override
public LongFunction<AzureAISearchBaseOp<SearchIndex>> createOpFunc(LongFunction<SearchIndex> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchCreateOrUpdateIndexOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchDeleteIndexOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
/**
* Delete an Azure AI Search index.
*
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/delete?view=rest-searchservice-2024-07-01&tabs=HTTP">Delete
* Index docs</a>.
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/">REST
* API</a>.
*/
public class AzureAISearchDeleteIndexOpDispenser extends AzureAISearchBaseOpDispenser<String> {
public AzureAISearchDeleteIndexOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@Override
public LongFunction<String> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
return l -> targetF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<String>> createOpFunc(LongFunction<String> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchDeleteIndexOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchListIndexesOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
public class AzureAISearchListIndexesOpDispenser extends AzureAISearchBaseOpDispenser<String> {
public AzureAISearchListIndexesOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@Override
public LongFunction<String> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
return l -> targetF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<String>> createOpFunc(LongFunction<String> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchListIndexesOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.SearchOptions;
import com.azure.search.documents.models.VectorQuery;
import com.azure.search.documents.models.VectorSearchOptions;
import com.azure.search.documents.models.VectorizedQuery;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchSearchDocumentsOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
/**
* @see https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery
* @see https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request
*/
public class AzureAISearchSearchDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchOptions> {
public AzureAISearchSearchDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<SearchOptions> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
LongFunction<SearchOptions> ebF = l -> new SearchOptions();
Optional<LongFunction<Boolean>> countFunc = op.getAsOptionalFunction("count", Boolean.class);
if (countFunc.isPresent()) {
final LongFunction<SearchOptions> countLF = ebF;
ebF = l -> countLF.apply(l).setIncludeTotalCount(countFunc.get().apply(l));
}
LongFunction<String> selectFunc = op.getAsRequiredFunction("select", String.class);
final LongFunction<SearchOptions> selectLF = ebF;
ebF = l -> selectLF.apply(l).setSelect(selectFunc.apply(l));
final LongFunction<SearchOptions> vqLF = ebF;
ebF = l -> vqLF.apply(l).setVectorSearchOptions(buildVectorSearchOptionsStruct(op).apply(l));
final LongFunction<SearchOptions> lastF = ebF;
return l -> lastF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<SearchOptions>> createOpFunc(LongFunction<SearchOptions> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchSearchDocumentsOp(clientF.apply(l),
clientF.apply(l).getSearchClient(targetF.apply(l)), paramF.apply(l));
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private LongFunction<VectorSearchOptions> buildVectorSearchOptionsStruct(ParsedOp op) {
if (!op.isDefined("vectorQueries")) {
throw new OpConfigError("Must provide values for 'vectorQueries' in 'search_documents' op");
}
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("vectorQueries", Map.class);
return baseFunc.<LongFunction<VectorSearchOptions>>map(mapLongFunc -> l -> {
Map<String, Object> vsoMap = mapLongFunc.apply(l);
VectorSearchOptions vsOpts = new VectorSearchOptions();
if (!vsoMap.containsKey("vector")) {
throw new OpConfigError(
"Must provide list of float values for 'vector' field within 'vectorQueries' of 'search_documents' op");
}
VectorQuery vectorizableQuery = new VectorizedQuery((List<Float>) vsoMap.get("vector"));
if (vsoMap.containsKey("exhaustive"))
vectorizableQuery.setExhaustive((Boolean) vsoMap.get("exhaustive"));
if (vsoMap.containsKey("fields"))
vectorizableQuery.setFields(new String[] { (String) vsoMap.get("fields") });
if (vsoMap.containsKey("weight"))
vectorizableQuery.setWeight(((Number) vsoMap.get("weight")).floatValue());
if (vsoMap.containsKey("k"))
vectorizableQuery.setKNearestNeighborsCount(((Number) vsoMap.get("k")).intValue());
vsOpts.setQueries(vectorizableQuery);
return vsOpts;
}).orElse(null);
}
}

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.Map;
import java.util.function.LongFunction;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchUploadDocumentsOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
/**
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/documents/?view=rest-searchservice-2024-07-01&tabs=HTTP">API
* Reference</a>
* @see <a href=
* "https://learn.microsoft.com/en-us/java/api/com.azure.search.documents?view=azure-java-stable">Index
* docs</a>
*/
public class AzureAISearchUploadDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchDocument> {
public AzureAISearchUploadDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<SearchDocument> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
LongFunction<SearchDocument> ebF = l -> new SearchDocument();
LongFunction<Map> fieldsMapF = op.getAsRequiredFunction("fields", Map.class);
final LongFunction<SearchDocument> fieldF = buildFieldsStruct(fieldsMapF);
ebF = l -> fieldF.apply(l);
final LongFunction<SearchDocument> lastF = ebF;
return l -> lastF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<SearchDocument>> createOpFunc(LongFunction<SearchDocument> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchUploadDocumentsOp(clientF.apply(l),
clientF.apply(l).getSearchClient(targetF.apply(l)), paramF.apply(l));
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<SearchDocument> buildFieldsStruct(LongFunction<Map> fieldsFunction) {
return l -> {
Map<String, Object> fields = fieldsFunction.apply(l);
var doc = new SearchDocument();
fields.forEach((key, val) -> {
doc.put(key, val);
});
return doc;
};
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.types;
public enum AzureAISearchOpType {
create_or_update_index,
delete_index,
list_indexes,
upload_documents,
search_documents,
}

View File

@ -0,0 +1,174 @@
min_version: 5.21
description: |
This is a template for live vector search testing.
Template Variables:
schema: Install the schema required to run the test
rampup: Measure how long it takes to load a set of embeddings
search: Measure how the system responds to queries while it
is indexing recently ingested data.
search: Run vector search with a set of default (or overridden) parameters
In all of these phases, it is important to instance the metrics with distinct names.
Also, aggregates of recall should include total aggregate as well as a moving average.
TEMPLATE(token_file)
TEMPLATE(token)
scenarios:
azure_aisearch_vectors:
delete_index: >-
run tags==block:delete_index
errors===stop
cycles===UNDEF threads===UNDEF
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
create_or_update_index: >-
run tags==block:create_or_update_index
errors===stop
cycles===UNDEF threads===UNDEF
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
list_indexes: >-
run tags==block:list_indexes
errors===stop
cycles===UNDEF threads===UNDEF
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
upload_documents: >-
run tags==block:upload_documents
errors===warn,counter
cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO)
token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost)
search_documents: >-
run tags==block:search_documents
errors===warn,counter
cycles===TEMPLATE(testann_cycles,TEMPLATE(testsize,1000)) threads===TEMPLATE(testann_threads,AUTO)
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
params:
driver: azure_aisearch
instrument: true
bindings:
id_val: Identity();
id_val_uuid: ToHashedUUID() -> java.util.UUID
row_key: ToString()
row_key_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),long->ToString());
# filetype=hdf5 for TEMPLATE(filetype,hdf5)
test_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/test");
relevant_indices_hdf5: HdfFileToIntArray("local/testdata/TEMPLATE(dataset).hdf5", "/neighbors")
distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/distance")
# TODO - remove the 'local' keyword in path
train_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/train");
train_floatlist_hdf5_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/train"));
# filetype=fvec for TEMPLATE(filetype,fvec)
test_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_query_vectors.fvec");
relevant_indices_fvec: IVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_indices_query.ivec");
distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec_batch: Mul(TEMPLATE(batch_size,10)L); ListSizedStepped(TEMPLATE(batch_size),FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0));
##############################################
# NOTE: An Azure AI Search index name must start and end with alphanumeric characters and contain only lowercase letters, digits or dashes.
##############################################
blocks:
delete_index:
ops:
# https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/delete?view=rest-searchservice-2024-07-01&tabs=HTTP
delete_idx_op:
delete_index: "TEMPLATE(collection)"
create_or_update_index:
ops:
# https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/create-or-update?view=rest-searchservice-2024-07-01&tabs=HTTP
create_or_update_index_op:
create_or_update_index: "TEMPLATE(collection)"
fields:
id:
type: "Edm.String" # Data types - https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types#edm-data-types-for-vector-fields
key: true
filterable: true
sortable: true
searchable: true
facetable: false
retrievable: true
hidden: false
value:
type: "Collection(Edm.Single)"
dimensions: TEMPLATE(dimensions)
vectorSearchProfile: "vector-profile-hnsw-scalar-1"
hidden: false
searchable: true
retrievable: true
filterable: false
sortable: false
facetable: false
vectorSearch:
compressions:
scalar-quantization-1:
kind: "scalarQuantization"
rerankWithOriginalVectors: true
defaultOversampling: 1
scalarQuantizationParameters:
quantizedDataType: "int8"
algorithms:
hnsw-sq-1:
kind: "hnsw" # or "exhaustiveKnn"
hnswParameters:
m: 32
efConstruction: 100
efSearch: 100
metric: "TEMPLATE(similarity_function)"
#exhaustiveKnnParameters:
#metric: "TEMPLATE(similarity_function)"
profiles:
vector-profile-hnsw-scalar-1:
algorithm: "hnsw-sq-1"
compression: "scalar-quantization-1"
list_indexes:
ops:
# https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/list?view=rest-searchservice-2024-07-01&tabs=HTTP
list_indexes_op:
list_indexes: "DUMMY_PLACEHOLDER"
upload_documents:
ops:
upload_documents_op:
upload_documents: "TEMPLATE(collection)"
fields:
id: "{row_key}"
value: "{train_floatlist_TEMPLATE(filetype)}"
search_documents:
ops:
search_documents_op:
search_documents: "TEMPLATE(collection)"
count: false
select: "id, value"
vectorQueries:
kind: "vector"
vector: "{test_floatlist_TEMPLATE(filetype)}"
exhaustive: false
fields: "value"
weight: 1.0
k: TEMPLATE(select_limit,100)
verifier-init: |
relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
for (int k in List.of(100)) {
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
}
verifier: |
// driver-specific function
actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.responseFieldToStringArray("id",result)
System.out.println("actual_indices ------>>>>: " + actual_indices);
// driver-agnostic function
relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices);
// because we are "verifying" although this needs to be reorganized
return true;

View File

@ -0,0 +1,67 @@
# Azure AI Search driver adapter
The Azure AI Search driver adapter is a NoSQLBench adapter for the `azure-aisearch` driver, a Java driver
for connecting to and performing operations on an instance of a Azure AI Search vector database. The driver is
leveraged from GitHub at https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/search/azure-search-documents/.
## Run Commands (Remove prior to merge)
### Create Collection Schema
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.rampup dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Delete Collection
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.delete_index dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 similarity_function=cosine azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azure_aisearch,instance:vectors,vendor:azure_aisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### List Indexes
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.list_indexes dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Upload Documents
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.upload_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Search Documents
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.search_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
## Activity Parameters
The following parameters must be supplied to the adapter at runtime in order to successfully connect to an
instance of the [Azure AI Search database](https://learn.microsoft.com/en-us/rest/api/searchservice/?view=rest-searchservice-2024-07-01):
* `token` - In order to use the Weaviate database you must have an account. Once the account is created you can [request
an api key/token](https://weaviate.io/developers/wcs/quickstart#explore-the-details-panel). This key will need to be
provided any time a database connection is desired. Alternatively, the api key can be stored in a file securely and
referenced via the `token_file` config option pointing to the path of the file.
* `endpoint` - When a collection/index is created in the database the URI (aka endpoint) must be specified as well. The adapter will
use the default value of `localhost:8080` if none is provided at runtime.
* `api_version` - the api version to be used by the search client. Defaults to the latest service/api version supported
by the version of client SDK.
## Op Templates
The Azure AI Search adapter supports [**all basic operations**](../java/io/nosqlbench/adapter/azure-aisearch/ops) supported by the [Java
client SDK published by Azure AI Search](https://github.com/weaviate/java-client). The official Azure AI Search API reference can be
found at https://learn.microsoft.com/en-us/rest/api/searchservice/operation-groups?view=rest-searchservice-2024-07-01.
The operations include a full-fledged support for key APIs available in the Java SDK client.
The following are a couple high level API operations.
* Create or Update Index
* Delete Index
* List Indexes
* Upload Documents (vectors)
* (Vector) Search Documents (vectors)
## Examples
Check out the [full example workload available here](./activities/azure_aisearch_vectors_live.yaml).
---

View File

@ -55,7 +55,7 @@
<dependency>
<groupId>io.milvus</groupId>
<artifactId>milvus-sdk-java</artifactId>
<version>2.4.1</version>
<version>2.3.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/ch.qos.reload4j/reload4j replaces log4j 1.X-->
<dependency>

View File

@ -16,17 +16,18 @@
package io.nosqlbench.adapter.milvus.opdispensers;
import com.alibaba.fastjson.JSONObject;
import io.milvus.param.dml.InsertParam;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import com.alibaba.fastjson.JSONObject;
import io.milvus.param.dml.InsertParam;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
public class MilvusOpUtils {
public static Optional<LongFunction<List<JSONObject>>> getHighLevelRowsFunction(ParsedOp op, String opfield) {
@ -69,8 +70,6 @@ public class MilvusOpUtils {
ParsedOp valueTemplate = op.getAsSubOp(opfield, ParsedOp.SubOpNaming.SubKey);
Map<String, Object> testFieldsValues = valueTemplate.apply(0L);
List<LongFunction<InsertParam.Field>> fieldsF = new ArrayList<>(testFieldsValues.size());
for (String fieldName : testFieldsValues.keySet()) {
Object testFieldValue = testFieldsValues.get(fieldName);
if (!(testFieldValue instanceof List<?> list)) {

View File

@ -16,20 +16,37 @@
package io.nosqlbench.adapter.qdrant.opdispensers;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import io.nosqlbench.adapter.qdrant.QdrantDriverAdapter;
import io.nosqlbench.adapter.qdrant.ops.QdrantBaseOp;
import io.nosqlbench.adapter.qdrant.ops.QdrantCreateCollectionOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.grpc.Collections.*;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import io.qdrant.client.grpc.Collections.BinaryQuantization;
import io.qdrant.client.grpc.Collections.CompressionRatio;
import io.qdrant.client.grpc.Collections.CreateCollection;
import io.qdrant.client.grpc.Collections.HnswConfigDiff;
import io.qdrant.client.grpc.Collections.OptimizersConfigDiff;
import io.qdrant.client.grpc.Collections.ProductQuantization;
import io.qdrant.client.grpc.Collections.QuantizationConfig;
import io.qdrant.client.grpc.Collections.QuantizationType;
import io.qdrant.client.grpc.Collections.ScalarQuantization;
import io.qdrant.client.grpc.Collections.ShardingMethod;
import io.qdrant.client.grpc.Collections.SparseIndexConfig;
import io.qdrant.client.grpc.Collections.SparseVectorConfig;
import io.qdrant.client.grpc.Collections.SparseVectorParams;
import io.qdrant.client.grpc.Collections.VectorParams;
import io.qdrant.client.grpc.Collections.VectorParamsMap;
import io.qdrant.client.grpc.Collections.VectorsConfig;
import io.qdrant.client.grpc.Collections.WalConfigDiff;
public class QdrantCreateCollectionOpDispenser extends QdrantBaseOpDispenser<CreateCollection> {
private static final Logger logger = LogManager.getLogger(QdrantCreateCollectionOpDispenser.class);
@ -275,43 +292,6 @@ public class QdrantCreateCollectionOpDispenser extends QdrantBaseOpDispenser<Cre
return qcBuilder.build();
}
/**
* Build the {@link HnswConfigDiff} from the provided {@link ParsedOp}.
*
* @param fieldSpec The {@link ParsedOp} containing the hnsw config data
* @return The {@link HnswConfigDiff} built from the provided {@link ParsedOp}
* @see <a href="https://qdrant.tech/documentation/concepts/indexing/#vector-index">HNSW Config</a>
*/
@Deprecated
private HnswConfigDiff buildHnswConfigDiff(ParsedOp fieldSpec) {
HnswConfigDiff.Builder hnswConfigBuilder = HnswConfigDiff.newBuilder();
fieldSpec.getOptionalStaticValue("hnsw_config", Map.class).ifPresent(hnswConfigData -> {
if (hnswConfigData.isEmpty()) {
return;
} else {
if (hnswConfigData.containsKey("ef_construct")) {
hnswConfigBuilder.setEfConstruct(((Number) hnswConfigData.get("ef_construct")).longValue());
}
if (hnswConfigData.containsKey("m")) {
hnswConfigBuilder.setM(((Number) hnswConfigData.get("m")).intValue());
}
if (hnswConfigData.containsKey("full_scan_threshold")) {
hnswConfigBuilder.setFullScanThreshold(((Number) hnswConfigData.get("full_scan_threshold")).intValue());
}
if (hnswConfigData.containsKey("max_indexing_threads")) {
hnswConfigBuilder.setMaxIndexingThreads(((Number) hnswConfigData.get("max_indexing_threads")).intValue());
}
if (hnswConfigData.containsKey("on_disk")) {
hnswConfigBuilder.setOnDisk((Boolean) hnswConfigData.get("on_disk"));
}
if (hnswConfigData.containsKey("payload_m")) {
hnswConfigBuilder.setPayloadM(((Number) hnswConfigData.get("payload_m")).intValue());
}
}
});
return hnswConfigBuilder.build();
}
private LongFunction<HnswConfigDiff> buildHnswConfigDiff(LongFunction<Map> hnswConfigDiffMapLongFunc) {
return l -> this.buildHnswConfigDiff(hnswConfigDiffMapLongFunc.apply(l));
}

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>adapter-weaviate</artifactId>
<packaging>jar</packaging>
<parent>
<artifactId>mvn-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>${revision}</version>
<relativePath>../../mvn-defaults</relativePath>
</parent>
<name>${project.artifactId}</name>
<description>
An nosqlbench adapter driver module for the Weaviate database.
</description>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>nb-annotations</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapters-api</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.weaviate</groupId>
<artifactId>client</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
public class WeaviateAdapterUtils {
public static final String WEAVIATE = "weaviate";
public static List<String> splitNames(String input) {
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).toList();
}
public static List<Long> splitLongs(String input) {
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).map(Long::parseLong).toList();
}
/**
* Mask the digits in the given string with '*'
*
* @param unmasked The string to mask
* @return The masked string
*/
protected static String maskDigits(String unmasked) {
assert StringUtils.isNotBlank(unmasked) && StringUtils.isNotEmpty(unmasked);
int inputLength = unmasked.length();
StringBuilder masked = new StringBuilder(inputLength);
for (char ch : unmasked.toCharArray()) {
if (Character.isDigit(ch)) {
masked.append("*");
} else {
masked.append(ch);
}
}
return masked.toString();
}
// public static int[] intArrayFromMilvusSearchResults(String fieldName, R<SearchResults> result) {
// SearchResultsWrapper wrapper = new SearchResultsWrapper(result.getData().getResults());
// List<String> fieldData = (List<String>) wrapper.getFieldData(fieldName, 0);
// int[] indices = new int[fieldData.size()];
// for (int i = 0; i < indices.length; i++) {
// indices[i] = Integer.parseInt(fieldData.get(i));
// }
// return indices;
// }
}

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate;
import static io.nosqlbench.adapter.weaviate.WeaviateAdapterUtils.WEAVIATE;
import java.util.function.Function;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.activityimpl.uniform.BaseDriverAdapter;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.labels.NBLabels;
@Service(value = DriverAdapter.class, selector = WEAVIATE)
public class WeaviateDriverAdapter extends BaseDriverAdapter<WeaviateBaseOp<?>, WeaviateSpace> {
public WeaviateDriverAdapter(NBComponent parentComponent, NBLabels labels) {
super(parentComponent, labels);
}
@Override
public OpMapper<WeaviateBaseOp<?>> getOpMapper() {
return new WeaviateOpMapper(this);
}
@Override
public Function<String, ? extends WeaviateSpace> getSpaceInitializer(NBConfiguration cfg) {
return (s) -> new WeaviateSpace(s, cfg);
}
@Override
public NBConfigModel getConfigModel() {
return super.getConfigModel().add(WeaviateSpace.getConfigModel());
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate;
import static io.nosqlbench.adapter.weaviate.WeaviateAdapterUtils.WEAVIATE;
import io.nosqlbench.adapter.diag.DriverAdapterLoader;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.labels.NBLabels;
@Service(value = DriverAdapterLoader.class, selector = WEAVIATE)
public class WeaviateDriverAdapterLoader implements DriverAdapterLoader {
@Override
public WeaviateDriverAdapter load(NBComponent parent, NBLabels childLabels) {
return new WeaviateDriverAdapter(parent, childLabels);
}
}

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateBaseOpDispenser;
import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateCreateCollectionOpDispenser;
import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateCreateObjectsOpDispenser;
import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateDeleteCollectionOpDispenser;
import io.nosqlbench.adapter.weaviate.opsdispensers.WeaviateGetCollectionSchemaOpDispenser;
import io.nosqlbench.adapter.weaviate.types.WeaviateOpType;
import io.nosqlbench.adapters.api.activityimpl.OpDispenser;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.engine.api.templating.TypeAndTarget;
public class WeaviateOpMapper implements OpMapper<WeaviateBaseOp<?>> {
private static final Logger logger = LogManager.getLogger(WeaviateOpMapper.class);
private final WeaviateDriverAdapter adapter;
/**
* Create a new WeaviateOpMapper implementing the {@link OpMapper} interface.
*
* @param adapter The associated {@link WeaviateDriverAdapter}
*/
public WeaviateOpMapper(WeaviateDriverAdapter adapter) {
this.adapter = adapter;
}
/**
* Given an instance of a {@link ParsedOp} returns the appropriate
* {@link WeaviateBaseOpDispenser} subclass
*
* @param op The {@link ParsedOp} to be evaluated
* @return The correct {@link WeaviateBaseOpDispenser} subclass based on the op
* type
*/
@Override
public OpDispenser<? extends WeaviateBaseOp<?>> apply(ParsedOp op) {
TypeAndTarget<WeaviateOpType, String> typeAndTarget = op.getTypeAndTarget(WeaviateOpType.class, String.class,
"type", "target");
logger.info(() -> "Using '" + typeAndTarget.enumId + "' op type for op template '" + op.getName() + "'");
return switch (typeAndTarget.enumId) {
case delete_collection -> new WeaviateDeleteCollectionOpDispenser(adapter, op, typeAndTarget.targetFunction);
case create_collection -> new WeaviateCreateCollectionOpDispenser(adapter, op, typeAndTarget.targetFunction);
case get_collection_schema ->
new WeaviateGetCollectionSchemaOpDispenser(adapter, op, typeAndTarget.targetFunction);
case create_objects -> new WeaviateCreateObjectsOpDispenser(adapter, op, typeAndTarget.targetFunction);
// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " +
// "mapping parsed op " + op);
};
}
}

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import io.nosqlbench.nb.api.config.standard.ConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.config.standard.Param;
import io.nosqlbench.nb.api.errors.OpConfigError;
import io.weaviate.client.Config;
import io.weaviate.client.WeaviateAuthClient;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.v1.auth.exception.AuthException;
/**
* The {@code WeaviateSpace} class is a context object which stores all stateful
* contextual information needed to interact with the Weaviate database
* instance.
*
* @see <a href="https://weaviate.io/developers/weaviate/quickstart">Weaviate
* quick start guide</a>
* @see <a href="https://weaviate.io/developers/wcs/quickstart">Weaviate cloud
* quick start guide</a>
* @see <a href="https://github.com/weaviate/java-client">Weaviate Java
* client</a>
*/
public class WeaviateSpace implements AutoCloseable {
private final static Logger logger = LogManager.getLogger(WeaviateSpace.class);
private final String name;
private final NBConfiguration cfg;
protected WeaviateClient client;
/**
* Create a new WeaviateSpace Object which stores all stateful contextual
* information needed to interact with the <b>Weaviate</b> database instance.
*
* @param name The name of this space
* @param cfg The configuration ({@link NBConfiguration}) for this nb run
*/
public WeaviateSpace(String name, NBConfiguration cfg) {
this.name = name;
this.cfg = cfg;
}
public synchronized WeaviateClient getClient() throws AuthException {
if (client == null) {
client = createClient();
}
return client;
}
private WeaviateClient createClient() throws AuthException {
String uri = cfg.get("uri");
String scheme = cfg.getOptional("scheme").orElse("https");
var requiredToken = cfg.getOptional("token_file")
.map(Paths::get)
.map(
tokenFilePath -> {
try {
return Files.readAllLines(tokenFilePath).getFirst();
} catch (IOException e) {
String error = "Error while reading token from file:" + tokenFilePath;
logger.error(error, e);
throw new RuntimeException(e);
}
}
).orElseGet(
() -> cfg.getOptional("token")
.orElseThrow(() -> new RuntimeException("You must provide either a token_file or a token to " +
"configure a Weaviate client"))
);
if (requiredToken != null
&& (cfg.getOptional("username").isPresent() || cfg.getOptional("password").isPresent())) {
throw new OpConfigError("Username/Password combo cannot be used together with token/tokenFile");
}
logger.info("{}: Creating new Weaviate Client with (masked) token [{}], uri/endpoint [{}]",
this.name, WeaviateAdapterUtils.maskDigits(requiredToken), uri);
Config config = new Config(scheme, uri);
if (cfg.getOptional("username").isPresent() && cfg.getOptional("password").isPresent()) {
return WeaviateAuthClient.clientPassword(config, cfg.getOptional("username").get(),
cfg.getOptional("password").get(), null);
} else {
return WeaviateAuthClient.apiKey(config, requiredToken);
}
}
public static NBConfigModel getConfigModel() {
return ConfigModel.of(WeaviateSpace.class)
.add(
Param.optional("token_file", String.class, "the file to load the api token from")
)
.add(
Param.defaultTo("token", "weaviate")
.setDescription("the Weaviate api token to use to connect to the database")
)
.add(
Param.defaultTo("scheme", "http")
.setDescription("the scheme of the database. Defaults to http."))
.add(Param.defaultTo("uri", "localhost:8080").setDescription(
"the URI endpoint in which the database is running. Do not provide any suffix like https:// here.")
)
.add(Param.optional("username")
.setDescription("Username to be used for non-WCD clusters. Need Password config too."))
.add(Param.optional("password")
.setDescription("Password to be used for non-WCD clusters. Need Username config too."))
.add(
Param.defaultTo("timeout_ms", 3000)
.setDescription("sets the timeout in milliseconds for all requests. Defaults to 3000ms.")
)
.asReadOnly();
}
@Override
public void close() throws Exception {
client = null;
}
}

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.ops;
import java.util.function.LongFunction;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import io.nosqlbench.adapters.api.activityimpl.uniform.flowtypes.CycleOp;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
public abstract class WeaviateBaseOp<T> implements CycleOp<Object> {
protected final static Logger logger = LogManager.getLogger(WeaviateBaseOp.class);
protected final WeaviateClient client;
protected final T request;
protected final LongFunction<Object> apiCall;
public WeaviateBaseOp(WeaviateClient client, T requestParam) {
this.client = client;
this.request = requestParam;
this.apiCall = this::applyOp;
}
public WeaviateBaseOp(WeaviateClient client, T requestParam, LongFunction<Object> call) {
this.client = client;
this.request = requestParam;
this.apiCall = call;
}
@SuppressWarnings("unchecked")
@Override
public final Object apply(long value) {
logger.trace("applying op: " + this);
try {
Object result = applyOp(value);
if (result instanceof Result<?>) {
// Result<Boolean> result = client.misc().readyChecker().run();
//
// if (result.hasErrors()) {
// System.out.println(result.getError());
// return;
// }
// System.out.println(result.getResult());
if (((Result<Boolean>) result).hasErrors()) {
logger.error("Result status: {}", ((Result<Boolean>) result).getError().toString());
}
} else {
logger.warn("Op '" + this.toString() + "' did not return a Result 'Result<Boolean>' type." +
" Exception handling will be bypassed"
);
}
// return (Result<Boolean> result).getResult();
return result;
} catch (Exception e) {
if (e instanceof RuntimeException rte) {
throw rte;
} else {
throw new RuntimeException(e);
}
}
};
public abstract Object applyOp(long value);
@Override
public String toString() {
return "WeaviateOp(" + this.request.getClass().getSimpleName() + ")";
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.ops;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
import io.weaviate.client.v1.schema.model.WeaviateClass;
public class WeaviateCreateCollectionOp extends WeaviateBaseOp<WeaviateClass> {
public WeaviateCreateCollectionOp(WeaviateClient client, WeaviateClass request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
Result<Boolean> createResponse = null;
try {
createResponse = client.schema().classCreator().withClass(request).run();
logger.debug("Successfully created the collection with return code of {}", createResponse.getResult());
} catch (RuntimeException rte) {
throw rte;
}
return createResponse;
}
}

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.ops;
import java.util.Collections;
import java.util.HashMap;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
import io.weaviate.client.v1.data.api.ObjectCreator;
import io.weaviate.client.v1.data.model.WeaviateObject;
import io.weaviate.client.v1.data.replication.model.ConsistencyLevel;
public class WeaviateCreateObjectsOp extends WeaviateBaseOp<ObjectCreator> {
public WeaviateCreateObjectsOp(WeaviateClient client, ObjectCreator request) {
super(client, request);
}
@SuppressWarnings("serial")
@Override
public Object applyOp(long value) {
Result<WeaviateObject> response = null;
try {
response = client.data().creator().withClassName("Glove25").withProperties(new HashMap<String, Object>() {
{
put("key", "This is a key");
put("value", "This is the value for a given key");
}
}).withVector(Collections.nCopies(25, 0.12345f).toArray(new Float[0]))
.withConsistencyLevel(ConsistencyLevel.QUORUM).run();
logger.debug("Successfully inserted objects in the collection: {}", response.getResult());
} catch (RuntimeException rte) {
throw rte;
}
return response;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.ops;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
public class WeaviateDeleteCollectionOp extends WeaviateBaseOp<String> {
public WeaviateDeleteCollectionOp(WeaviateClient client, String request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
Result<Boolean> delColResponse = null;
try {
delColResponse = client.schema().classDeleter().withClassName(request).run();
if (delColResponse.hasErrors()) {
logger.error("Delete collection operation has errors {}",
delColResponse.getError().toString());
}
logger.debug("Successfully deleted the collection: {}", delColResponse.getResult().toString());
} catch (RuntimeException rte) {
throw rte;
}
return delColResponse;
}
}

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.ops;
import com.google.gson.GsonBuilder;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
import io.weaviate.client.v1.schema.model.Schema;
public class WeaviateGetCollectionSchemaOp extends WeaviateBaseOp<String> {
public WeaviateGetCollectionSchemaOp(WeaviateClient client, String request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
Result<Schema> getColSchemaResponse = null;
try {
getColSchemaResponse = client.schema().getter().run();
if (getColSchemaResponse.hasErrors()) {
logger.error("Get all collection schema operation has errors {}",
getColSchemaResponse.getError().toString());
}
logger.info("Successfully fetched entire schema for all the collections: \n{}",
new GsonBuilder().setPrettyPrinting().create()
.toJson(getColSchemaResponse.getResult()));
} catch (RuntimeException rte) {
throw rte;
}
return getColSchemaResponse;
}
}

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.nosqlbench.adapter.weaviate.opsdispensers;
import java.util.function.LongFunction;
import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter;
import io.nosqlbench.adapter.weaviate.WeaviateSpace;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.v1.auth.exception.AuthException;
public abstract class WeaviateBaseOpDispenser<T> extends BaseOpDispenser<WeaviateBaseOp<T>, WeaviateSpace> {
protected final LongFunction<WeaviateSpace> weaviateSpaceFunction;
protected final LongFunction<WeaviateClient> clientFunction;
private final LongFunction<? extends WeaviateBaseOp<T>> opF;
private final LongFunction<T> paramF;
protected WeaviateBaseOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, LongFunction<String> targetF) {
super((DriverAdapter) adapter, op);
this.weaviateSpaceFunction = adapter.getSpaceFunc(op);
this.clientFunction = (long l) -> {
try {
return this.weaviateSpaceFunction.apply(l).getClient();
} catch (AuthException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
};
this.paramF = getParamFunc(this.clientFunction, op, targetF);
this.opF = createOpFunc(paramF, this.clientFunction, op, targetF);
}
protected WeaviateDriverAdapter getDriverAdapter() {
return (WeaviateDriverAdapter) adapter;
}
public abstract LongFunction<T> getParamFunc(LongFunction<WeaviateClient> clientF, ParsedOp op,
LongFunction<String> targetF);
public abstract LongFunction<WeaviateBaseOp<T>> createOpFunc(LongFunction<T> paramF,
LongFunction<WeaviateClient> clientF, ParsedOp op, LongFunction<String> targetF);
@Override
public WeaviateBaseOp<T> getOp(long value) {
return opF.apply(value);
}
}

View File

@ -0,0 +1,61 @@
package io.nosqlbench.adapter.weaviate.opsdispensers;
/*
* Copyright (c) 2022 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.function.LongFunction;
import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter;
import io.nosqlbench.adapter.weaviate.WeaviateSpace;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.weaviate.client.WeaviateClient;
public abstract class WeaviateBaseOpDispenser<T> extends BaseOpDispenser<WeaviateBaseOp<T>, WeaviateSpace> {
protected final LongFunction<WeaviateSpace> weaviateSpaceFunction;
protected final LongFunction<WeaviateClient> clientFunction;
private final LongFunction<? extends WeaviateBaseOp<T>> opF;
private final LongFunction<T> paramF;
protected WeaviateBaseOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, LongFunction<String> targetF) {
super((DriverAdapter) adapter, op);
this.weaviateSpaceFunction = adapter.getSpaceFunc(op);
this.clientFunction = (long l) -> this.weaviateSpaceFunction.apply(l).getClient();
this.paramF = getParamFunc(this.clientFunction, op, targetF);
this.opF = createOpFunc(paramF, this.clientFunction, op, targetF);
}
protected WeaviateDriverAdapter getDriverAdapter() {
return (WeaviateDriverAdapter) adapter;
}
public abstract LongFunction<T> getParamFunc(LongFunction<WeaviateClient> clientF, ParsedOp op,
LongFunction<String> targetF);
public abstract LongFunction<WeaviateBaseOp<T>> createOpFunc(LongFunction<T> paramF,
LongFunction<WeaviateClient> clientF, ParsedOp op, LongFunction<String> targetF);
@Override
public WeaviateBaseOp<T> getOp(long value) {
return opF.apply(value);
}
}

View File

@ -0,0 +1,353 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.opsdispensers;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapter.weaviate.ops.WeaviateCreateCollectionOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.v1.misc.model.BQConfig;
import io.weaviate.client.v1.misc.model.BQConfig.BQConfigBuilder;
import io.weaviate.client.v1.misc.model.MultiTenancyConfig;
import io.weaviate.client.v1.misc.model.MultiTenancyConfig.MultiTenancyConfigBuilder;
import io.weaviate.client.v1.misc.model.PQConfig;
import io.weaviate.client.v1.misc.model.PQConfig.Encoder;
import io.weaviate.client.v1.misc.model.PQConfig.PQConfigBuilder;
import io.weaviate.client.v1.misc.model.ReplicationConfig;
import io.weaviate.client.v1.misc.model.ReplicationConfig.ReplicationConfigBuilder;
import io.weaviate.client.v1.misc.model.VectorIndexConfig;
import io.weaviate.client.v1.misc.model.VectorIndexConfig.VectorIndexConfigBuilder;
import io.weaviate.client.v1.schema.model.Property;
import io.weaviate.client.v1.schema.model.Property.PropertyBuilder;
import io.weaviate.client.v1.schema.model.WeaviateClass;
import io.weaviate.client.v1.schema.model.WeaviateClass.VectorConfig;
import io.weaviate.client.v1.schema.model.WeaviateClass.VectorConfig.VectorConfigBuilder;
import io.weaviate.client.v1.schema.model.WeaviateClass.WeaviateClassBuilder;;
/**
* @see <a href=
* "https://weaviate.io/developers/weaviate/api/rest#tag/schema/post/schema">API
* Reference</a>
* @see <a href=
* "https://weaviate.io/developers/weaviate/manage-data/collections">Collection
* docs</a>
*/
public class WeaviateCreateCollectionOpDispenser extends WeaviateBaseOpDispenser<WeaviateClass> {
public WeaviateCreateCollectionOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<WeaviateClass> getParamFunc(LongFunction<WeaviateClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
LongFunction<WeaviateClassBuilder> ebF = l -> WeaviateClass.builder().className(targetF.apply(l));
ebF = op.enhanceFuncOptionally(ebF, "description", String.class, WeaviateClassBuilder::description);
ebF = op.enhanceFuncOptionally(ebF, "vectorizer", String.class, WeaviateClassBuilder::vectorizer);
if (op.isDefined("properties")) {
LongFunction<List<Property>> propertiesListF = buildPropertiesStruct(op);
final LongFunction<WeaviateClassBuilder> propertiesF = ebF;
ebF = l -> propertiesF.apply(l).properties(propertiesListF.apply(l));
}
Optional<LongFunction<Map>> rfF = op.getAsOptionalFunction("replicationConfig", Map.class);
if (rfF.isPresent()) {
final LongFunction<WeaviateClassBuilder> rfFunc = ebF;
LongFunction<ReplicationConfig> repConfF = buildReplicationConfig(rfF.get());
ebF = l -> rfFunc.apply(l).replicationConfig(repConfF.apply(l));
}
Optional<LongFunction<Map>> mtcF = op.getAsOptionalFunction("multiTenancyConfig", Map.class);
if (mtcF.isPresent()) {
final LongFunction<WeaviateClassBuilder> multiTCFunc = ebF;
LongFunction<MultiTenancyConfig> multiTenantConfF = buildMultiTenancyConfig(mtcF.get());
ebF = l -> multiTCFunc.apply(l).multiTenancyConfig(multiTenantConfF.apply(l));
}
ebF = op.enhanceFuncOptionally(ebF, "vectorIndexType", String.class, WeaviateClassBuilder::vectorIndexType);
if(op.isDefined("vectorIndexConfig")) {
LongFunction<VectorIndexConfig> vecIdxConfF = buildVectorIndexConfig(op);
final LongFunction<WeaviateClassBuilder> vectorIndexConfigF = ebF;
ebF = l -> vectorIndexConfigF.apply(l).vectorIndexConfig(vecIdxConfF.apply(l));
}
if (op.isDefined("vectorConfig")) {
LongFunction<Map<String, VectorConfig>> vecConfF = buildVectorConfig(op);
final LongFunction<WeaviateClassBuilder> vectorConfigFunc = ebF;
ebF = l -> vectorConfigFunc.apply(l).vectorConfig(vecConfF.apply(l));
}
final LongFunction<WeaviateClassBuilder> lastF = ebF;
return l -> lastF.apply(l).build();
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<Map<String, VectorConfig>> buildVectorConfig(ParsedOp op) {
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("vectorConfig", Map.class);
return baseFunc.<LongFunction<Map<String, VectorConfig>>>map(mapLongFunc -> l -> {
Map<String, Object> vcMap = mapLongFunc.apply(l);
Map<String, VectorConfig> finalVecConf = new HashMap<>();
vcMap.forEach((k, v) -> {
if (v instanceof Map) {
VectorConfigBuilder vcBdr = VectorConfig.builder();
((Map<String, Object>) v).forEach((innerKey, innerVal) -> {
if ("vectorizer".equals(innerKey)) {
Map<String, Object> vectorizerMap = new HashMap<>(1);
vectorizerMap.put((String) innerVal, (String) innerVal);
vcBdr.vectorizer(vectorizerMap);
// https://weaviate.io/developers/weaviate/api/rest#tag/schema/post/schema
// Result status: WeaviateError(statusCode=422,
// messages=[WeaviateErrorMessage(message=class.VectorConfig.Vectorizer must be
// an object, got <nil>, throwable=null)])
// WeaviateError(statusCode=422,
// messages=[WeaviateErrorMessage(message=class.VectorConfig.Vectorizer must
// consist only 1 configuration, got: 0, throwable=null)])
logger.warn("For now, vectorizer is not properly implemented in named vector as its uncler");
}
if ("vectorIndexType".equals(innerKey)) {
vcBdr.vectorIndexType((String) innerVal);
}
if ("vectorIndexConfig".equals(innerKey)) {
vcBdr.vectorIndexConfig(extractVectorIndexConfig((Map<String, Object>) innerVal));
}
});
finalVecConf.put(k, vcBdr.build());
} else {
throw new OpConfigError("Expected a map type for the value of '" + k
+ "' named vector, but received " + v.getClass().getSimpleName());
}
});
return finalVecConf;
}).orElse(null);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<VectorIndexConfig> buildVectorIndexConfig(ParsedOp op) {
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("vectorIndexConfig", Map.class);
return baseFunc.<LongFunction<VectorIndexConfig>>map(mapLongFunc -> l -> {
Map<String, Object> nvMap = mapLongFunc.apply(l);
return extractVectorIndexConfig(nvMap);
}).orElse(null);
}
@SuppressWarnings({ "unchecked" })
private VectorIndexConfig extractVectorIndexConfig(Map<String, Object> nvMap) {
VectorIndexConfigBuilder vecIdxConfBuilder = VectorIndexConfig.builder();
nvMap.forEach((key, value) -> {
if (key.equals("cleanupIntervalSeconds")) {
vecIdxConfBuilder.cleanupIntervalSeconds(((Number) value).intValue());
}
if (key.equals("distance")) {
vecIdxConfBuilder.distance((String) value);
}
if (key.equals("ef")) {
vecIdxConfBuilder.ef(((Number) value).intValue());
}
if (key.equals("efConstruction")) {
vecIdxConfBuilder.efConstruction(((Number) value).intValue());
}
if (key.equals("maxConnections")) {
vecIdxConfBuilder.maxConnections(((Number) value).intValue());
}
if (key.equals("dynamicEfMin")) {
vecIdxConfBuilder.dynamicEfMin(((Number) value).intValue());
}
if (key.equals("dynamicEfMax")) {
vecIdxConfBuilder.dynamicEfMax(((Number) value).intValue());
}
if (key.equals("dynamicEfFactor")) {
vecIdxConfBuilder.dynamicEfFactor(((Number) value).intValue());
}
if (key.equals("flatSearchCutoff")) {
vecIdxConfBuilder.flatSearchCutoff(((Number) value).intValue());
}
if (key.equals("skip")) {
vecIdxConfBuilder.skip((Boolean) value);
}
if (key.equals("vectorCacheMaxObjects")) {
vecIdxConfBuilder.vectorCacheMaxObjects(((Number) value).longValue());
}
if (key.equals("bq")) {
BQConfigBuilder bqConfBuilder = BQConfig.builder();
if (value instanceof Map<?, ?>) {
((Map<String, Object>) value).forEach((bqKey, bqVal) -> {
if (bqKey.equals("enabled")) {
bqConfBuilder.enabled((Boolean) bqVal);
}
if (bqKey.equals("rescoreLimit")) {
bqConfBuilder.rescoreLimit(((Number) bqVal).longValue());
}
if (bqKey.equals("cache")) {
bqConfBuilder.cache((Boolean) bqVal);
}
});
} else {
throw new OpConfigError("Expected a map type for 'bq' leaf of the configuration");
}
vecIdxConfBuilder.bq(bqConfBuilder.build());
}
if (key.equals("pq")) {
PQConfigBuilder pqConfBuilder = PQConfig.builder();
if (value instanceof Map<?, ?>) {
((Map<String, Object>) value).forEach((pqKey, pqVal) -> {
if (pqKey.equals("enabled")) {
pqConfBuilder.enabled((Boolean) pqVal);
}
if (pqKey.equals("rescoreLimit")) {
pqConfBuilder.rescoreLimit(((Number) pqVal).longValue());
}
if (pqKey.equals("bitCompression")) {
pqConfBuilder.bitCompression((Boolean) pqVal);
}
if (pqKey.equals("segments")) {
pqConfBuilder.segments(((Number) pqVal).intValue());
}
if (pqKey.equals("centroids")) {
pqConfBuilder.centroids(((Number) pqVal).intValue());
}
if (pqKey.equals("trainingLimit")) {
pqConfBuilder.trainingLimit(((Number) pqVal).intValue());
}
if (pqKey.equals("cache")) {
pqConfBuilder.cache((Boolean) pqVal);
}
if (pqKey.equals("encoder")) {
PQConfig.Encoder.EncoderBuilder encBuilder = Encoder.builder();
if (pqVal instanceof Map<?, ?>) {
((Map<String, Object>) pqVal).forEach((encKey, encVal) -> {
if (encKey.equals("type")) {
encBuilder.type((String) encVal);
}
if (encKey.equals("distribution")) {
encBuilder.distribution((String) encVal);
}
});
} else {
throw new OpConfigError(
"Expected a map type for 'encoder's value of the configuration, but got "
+ value.getClass().getSimpleName());
}
pqConfBuilder.encoder(encBuilder.build());
}
});
} else {
throw new OpConfigError("Expected a map type for 'bq' leaf of the configuration, but got "
+ value.getClass().getSimpleName());
}
vecIdxConfBuilder.pq(pqConfBuilder.build());
}
});
return vecIdxConfBuilder.build();
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<List<Property>> buildPropertiesStruct(ParsedOp op) {
// if (!op.isDefined("properties")) {
// throw new OpConfigError("Must provide values for 'properties' in 'create_collection' op");
// }
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("properties", Map.class);
return baseFunc.<LongFunction<List<Property>>>map(mapLongFunc -> l -> {
Map<String, Object> nvMap = mapLongFunc.apply(l);
List<Property> propertyList = new ArrayList<>();
nvMap.forEach((name, value) -> {
// TODO -- see if we need to set name at the higher (i.e. Property) level or
// just at NetstedProperty level
PropertyBuilder propBuilder = Property.builder().name(name);
if (value instanceof Map) {
((Map<String, Object>) value).forEach((innerKey, innerValue) -> {
if (innerKey.equals("description")) {
propBuilder.description((String) innerValue);
}
if (innerKey.equals("dataType")) {
// logger.info(">>>>>>> property name '{}' has dataType '{}'", name,
// innerValue);
// https://weaviate.io/developers/weaviate/config-refs/datatypes#datatype-cross-reference
// is unsupported at this time in NB driver
propBuilder.dataType(Arrays.asList((String) innerValue));
}
if (innerKey.equals("tokenization")) {
propBuilder.tokenization(((String) innerValue));
}
if (innerKey.equals("indexFilterable")) {
propBuilder.indexFilterable((Boolean) innerValue);
}
if (innerKey.equals("indexSearchable")) {
propBuilder.indexSearchable((Boolean) innerValue);
}
});
} else {
throw new OpConfigError(
"Named vector properties must be a Map<String, Map<String, Object>>, but got "
+ value.getClass().getSimpleName() + " instead for the inner value");
}
propertyList.add(propBuilder.build());
});
return propertyList;
}).orElse(null);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private LongFunction<MultiTenancyConfig> buildMultiTenancyConfig(LongFunction<Map> mapLongFunction) {
return l -> {
MultiTenancyConfigBuilder mtcB = MultiTenancyConfig.builder();
mapLongFunction.apply(l).forEach((key, val) -> {
if (key.equals("enabled")) {
mtcB.enabled((Boolean) val);
}
});
return mtcB.build();
};
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<ReplicationConfig> buildReplicationConfig(LongFunction<Map> mapLongFunction) {
return l -> {
ReplicationConfigBuilder rConfB = ReplicationConfig.builder();
mapLongFunction.apply(l).forEach((key, val) -> {
if (key.equals("factor")) {
rConfB.factor(((Number) val).intValue());
}
});
return rConfB.build();
};
}
@Override
public LongFunction<WeaviateBaseOp<WeaviateClass>> createOpFunc(LongFunction<WeaviateClass> paramF,
LongFunction<WeaviateClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new WeaviateCreateCollectionOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.opsdispensers;
import java.util.function.LongFunction;
import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapter.weaviate.ops.WeaviateCreateObjectsOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.v1.data.api.ObjectCreator;
/**
* Create objects.
*
* @see <a href=
* "https://weaviate.io/developers/weaviate/manage-data/create">Create
* Objects</a>.
* @see <a href=
* "https://weaviate.io/developers/weaviate/api/rest#tag/objects/post/objects">Create
* Objects - REST API</a>.
*/
public class WeaviateCreateObjectsOpDispenser extends WeaviateBaseOpDispenser<ObjectCreator> {
public WeaviateCreateObjectsOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op, LongFunction<String> targetF) {
super(adapter, op, targetF);
}
public LongFunction<ObjectCreator> getParamFunc(LongFunction<WeaviateClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
// LongFunction<ObjectCreatorBuilder> ebF = l -> ObjectCreator.builder().className(targetF.apply(l));
//
// final LongFunction<ObjectCreatorBuilder> lastF = ebF;
// return l -> lastF.apply(l).build();
@SuppressWarnings("deprecation")
LongFunction<ObjectCreator> ebF = l -> {
try {
return ObjectCreator.class.newInstance().withClassName(targetF.apply(l));
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
};
return l -> ebF.apply(l);
}
@Override
public LongFunction<WeaviateBaseOp<ObjectCreator>> createOpFunc(LongFunction<ObjectCreator> paramF,
LongFunction<WeaviateClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new WeaviateCreateObjectsOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.opsdispensers;
import java.util.function.LongFunction;
import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapter.weaviate.ops.WeaviateDeleteCollectionOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.weaviate.client.WeaviateClient;
/**
* Delete a Weaviate collection.
*
* @see <a href=
* "https://weaviate.io/developers/weaviate/manage-data/collections#delete-a-collection">Delete
* Collection docs</a>.
* @see <a href=
* "https://weaviate.io/developers/weaviate/api/rest#tag/schema/delete/schema/{className}">Delete
* Collection REST API</a>.
*/
public class WeaviateDeleteCollectionOpDispenser extends WeaviateBaseOpDispenser<String> {
public WeaviateDeleteCollectionOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@Override
public LongFunction<String> getParamFunc(LongFunction<WeaviateClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
// LongFunction<String> ebF = l -> targetF.apply(l);
//
// final LongFunction<String> lastF = ebF;
// return l -> lastF.apply(l);
return l -> targetF.apply(l);
}
@Override
public LongFunction<WeaviateBaseOp<String>> createOpFunc(LongFunction<String> paramF,
LongFunction<WeaviateClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new WeaviateDeleteCollectionOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.opsdispensers;
import java.util.function.LongFunction;
import io.nosqlbench.adapter.weaviate.WeaviateDriverAdapter;
import io.nosqlbench.adapter.weaviate.ops.WeaviateBaseOp;
import io.nosqlbench.adapter.weaviate.ops.WeaviateGetCollectionSchemaOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.weaviate.client.WeaviateClient;
public class WeaviateGetCollectionSchemaOpDispenser extends WeaviateBaseOpDispenser<String> {
public WeaviateGetCollectionSchemaOpDispenser(WeaviateDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@Override
public LongFunction<String> getParamFunc(LongFunction<WeaviateClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
return l -> targetF.apply(l);
}
@Override
public LongFunction<WeaviateBaseOp<String>> createOpFunc(LongFunction<String> paramF,
LongFunction<WeaviateClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new WeaviateGetCollectionSchemaOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,24 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.weaviate.types;
public enum WeaviateOpType {
create_collection,
delete_collection,
get_collection_schema,
create_objects,
}

View File

@ -0,0 +1,327 @@
min_version: 5.21
description: |
This is a template for live vector search testing.
Template Variables:
schema: Install the schema required to run the test
rampup: Measure how long it takes to load a set of embeddings
search: Measure how the system responds to queries while it
is indexing recently ingested data.
search: Run vector search with a set of default (or overridden) parameters
In all of these phases, it is important to instance the metrics with distinct names.
Also, aggregates of recall should include total aggregate as well as a moving average.
scenarios:
weaviate_vectors:
delete_collection: >-
run tags==block:delete_collection
errors===stop
cycles===UNDEF threads===UNDEF
uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file)
schema_collection: >-
run tags==block:schema_collection
errors===stop
cycles===UNDEF threads===UNDEF
uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file)
get_collection_schema: >-
run tags==block:get_collection_schema
errors===stop
cycles===UNDEF threads===UNDEF
uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file)
# TODO - ,retry should be added to errors for rampup
rampup: >-
run tags==block:rampup
errors===warn,counter
cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO)
uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file)
count_vectors: >-
run tags==block:count_vectors
errors===stop
cycles===UNDEF threads===UNDEF
uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file)
search_points: >-
run tags==block:search_points
errors===warn,counter
cycles===TEMPLATE(testann_cycles,TEMPLATE(testsize,1000)) threads===TEMPLATE(testann_threads,AUTO)
uri=TEMPLATE(weaviatehost) token_file=TEMPLATE(token_file)
params:
driver: weaviate
instrument: true
bindings:
id_val: Identity();
id_val_uuid: ToHashedUUID() -> java.util.UUID
row_key: ToString()
row_key_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),long->ToString());
# filetype=hdf5 for TEMPLATE(filetype,hdf5)
test_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/test");
relevant_indices_hdf5: HdfFileToIntArray("testdata/TEMPLATE(dataset).hdf5", "/neighbors")
distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/distance")
# TODO - remove the local path
train_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/train");
train_floatlist_hdf5_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/train"));
# filetype=fvec for TEMPLATE(filetype,fvec)
test_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_query_vectors.fvec");
relevant_indices_fvec: IVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_indices_query.ivec");
distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec_batch: Mul(TEMPLATE(batch_size,10)L); ListSizedStepped(TEMPLATE(batch_size),FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0));
blocks:
delete_collection:
ops:
# https://weaviate.io/developers/weaviate/manage-data/collections#delete-a-collection
delete_col_op:
delete_collection: "TEMPLATE(collection)"
schema_collection1:
ops:
# https://weaviate.io/developers/weaviate/config-refs/schema#collection-object
create_col_op:
create_collection: "TEMPLATE(collection)"
description: "This is a key/value collection with value being the vector embedding"
vectorizer: "none"
vectorIndexType: "hnsw"
properties:
key:
description: "the key column, similar to partition key in Cassandra"
dataType: "text"
tokenization: "word"
indexFilterable: true
indexSearchable: true
value:
description: "the actual vector value column that stores embeddings"
dataType: "number[]"
indexFilterable: true
schema_collection:
ops:
# https://weaviate.io/developers/weaviate/config-refs/schema#collection-object
create_col_op:
create_collection: "TEMPLATE(collection)"
description: "This is a key/value collection with value being the vector embedding"
vectorConfig:
# below 'value' is a named-vector
value:
vectorizer: "none"
vectorIndexType: "hnsw"
vectorIndexConfig:
#hnsw specifc configs
cleanupIntervalSeconds: 300
distance: "cosine"
ef: 64 # -1 for dynamic ef, https://weaviate.io/developers/weaviate/concepts/vector-index#dynamic-ef
efConstruction: 128
maxConnections: 32 # default 64
dynamicEfMin: 100
dynamicEfMax: 500
dynamicEfFactor: 8
flatSearchCutoff: 40000
skip: false
vectorCacheMaxObjects: 1e12
#pq index specific configs
#pq:
#enabled: false
#trainingLimit: 100000
#segments: 2
#centroids: 256
#encoder:
#type: "kmeans"
#distribution: "log-normal"
#distribution: "log-normal"
#flat index specific
#vectorCacheMaxObjects: 1e12
#bq specific configs
#bq:
#enabled: false
#rescoreLimit: -1
#cache: false
#dynamic index specific configs:
#distance: "cosine"
#hnsw:
#...
#flat:
#...
#threshold: 10000
properties:
key:
description: "the key column, similar to partition key in Cassandra"
dataType: "text"
tokenization: "word"
indexFilterable: true
indexSearchable: true
value:
description: "the actual vector value column that stores embeddings"
dataType: "number[]"
indexFilterable: true
replicationConfig:
factor: 3
multiTenancyConfig:
enabled: true
get_collection_schema:
ops:
get_collection_schema_op:
get_collection_schema: "TEMPLATE(collection)"
rampup:
ops:
create_objects_op:
create_objects: "TEMPLATE(collection)"
objects:
- id: "{id_val_uuid}"
#properties:
# key: "{row_key}"
vectors:
# Only named vectors are supported at this time
value: "{train_floatlist_TEMPLATE(filetype)}"
#tenant: ""
search_points:
ops:
search_points_op:
search_points: "TEMPLATE(collection)"
timeout: 300 # 5 minutes
# https://github.com/weaviate/weaviate/blob/v1.9.0/lib/api/src/grpc/proto/points.proto#L21-L25
# 0 - All, 1 - Majority, 2 - Quorum
consistency: "Quorum"
with_payload: true
with_vector: true
limit: TEMPLATE(select_limit,100)
# Another option to set with payload is as follows
# with_payload: ["key1"]
# Another option to set with payload is as follows
# with_payload:
# include: ["key1"]
# exclude: ["key2"]
vector:
- name: "value"
values: "{test_floatlist_TEMPLATE(filetype)}"
#indices: "[1,7]"
verifier-init: |
relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
for (int k in List.of(100)) {
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
}
verifier: |
// driver-specific function
actual_indices=io.nosqlbench.adapter.weaviate.weaviateAdapterUtils.searchPointsResponseIdNumToIntArray(result)
// System.out.println("actual_indices ------>>>>: " + actual_indices);
// driver-agnostic function
relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices);
// because we are "verifying" although this needs to be reorganized
return true;
# More complex filtering available. See 'count_points' below for an example filter structure
create_payload_index:
ops:
create_payload_index_op:
create_payload_index: "TEMPLATE(collection)"
field_name: "field17"
field_type: "Keyword"
ordering: "Strong"
wait: true
# https://github.com/weaviate/weaviate/blob/v1.9.2/lib/api/src/grpc/proto/collections.proto#L395-L400
count_vectors:
ops:
count_points_op:
count_points: "TEMPLATE(collection)"
exact: true
# More complex filtering logic could be provided as follows
#filter:
# - clause: "must"
# condition: "match"
# key: "field1"
# value: "abc1"
# - clause: "must_not"
# condition: "match_any"
# key: "field2"
# value:
# - "abc2"
# - "abc3"
# - clause: "should"
# condition: "range"
# key: "field3"
# # any one of below
# value:
# gte: 10
# lte: 20
# gt: null
# lt: null
# - clause: "must"
# condition: "nested"
# key: "field4"
# nested:
# - condition: "match"
# key: "field5[].whatsup"
# value: "ni24maddy"
# - condition: "match"
# key: "field6"
# value: true
# - clause: "should"
# condition: "has_id"
# # no other keys are supported for this type
# key: "id"
# value:
# - 1
# - 2
# - 3
# - clause: "should"
# condition: "match"
# key: "field7"
# # special case of match is text
# text: "abc7"
# - clause: "should"
# condition: "geo_bounding_box"
# key: "field8"
# value:
# top_left:
# lat: 40.7128
# lon: -74.0060
# bottom_right:
# lat: 40.7128
# lon: -74.0060
# - clause: "must_not"
# condition: "geo_radius"
# key: "field9"
# value:
# center:
# lat: 40.7128
# lon: -74.0060
# radius: 100.0
# - clause: "must"
# condition: "geo_polygon"
# key: "field10"
# value:
# exterior_points:
# - lat: 30.7128
# lon: -34.0060
# interior_points:
# - lat: 42.7128
# lon: -54.0060
# - clause: "should"
# condition: "values_count"
# key: "field11"
# # Any one of below
# value:
# gte: 1
# lte: 10
# gt: null
# lt: null
# - clause: "must_not"
# condition: "is_empty"
# key: "field12"
# - clause: "must"
# condition: "is_null"
# key: "field13"
# - clause: "must"
# condition: "match_except"
# key: "field14"
# value:
# - 1
# - 2

View File

@ -0,0 +1,69 @@
# Weaviate driver adapter
The Weaviate driver adapter is a NoSQLBench adapter for the `weaviate` driver, an open-source Java driver
for connecting to and performing operations on an instance of a Weaviate vector database. The driver is
leveraged from GitHub at https://github.com/weaviate/java-client.
## Run Commands (Remove prior to merge)
### Create Collection Schema
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.rampup dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Delete Collection
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.delete_collection dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Get all schema
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.get_collection_schema dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Insert objects
```
TODO
```
### Read objects
```
TODO
```
### TODO - Work on the below
## activity parameters
The following parameters must be supplied to the adapter at runtime in order to successfully connect to an
instance of the [Weaviate database](https://weaviate.io/developers/weaviate):
* `token` - In order to use the Weaviate database you must have an account. Once the account is created you can [request
an api key/token](https://weaviate.io/developers/wcs/quickstart#explore-the-details-panel). This key will need to be
provided any time a database connection is desired. Alternatively, the api key can be stored in a file securely and
referenced via the `token_file` config option pointing to the path of the file.
* `uri` - When a collection/index is created in the database the URI (aka endpoint) must be specified as well. The adapter will
use the default value of `localhost:8080` if none is provided at runtime. Remember to *not* provide the `https://`
suffix.
* `scheme` - the scheme database. Defaults to `http`.
## Op Templates
The Weaviate adapter supports [**all basic operations**](../java/io/nosqlbench/adapter/weaviate/ops) supported by the [Java
driver published by Weaviate](https://github.com/weaviate/java-client). The official Weaviate API reference can be found at
https://weaviate.io/developers/weaviate/api/rest.
The operations include a full-fledged support for key APIs available in the Weaviate Java driver.
The following are a couple high level API operations.
* Create Collection
* Delete Collection
* Get Entire Schema
* Create Objects (vectors)
* Read Objects (vectors)
## Examples
Check out the [full example available here](./activities/weaviate_vectors_live.yaml).
---

View File

@ -116,7 +116,7 @@
<profile>
<id>adapter-jdbc-include</id>
<activation>
<activeByDefault>true</activeByDefault>
<activeByDefault>false</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -228,7 +228,7 @@
<profile>
<id>adapter-kafka-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -242,7 +242,7 @@
<profile>
<id>adapter-amqp-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -256,7 +256,7 @@
<profile>
<id>adapter-qdrant-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -267,6 +267,34 @@
</dependencies>
</profile>
<profile>
<id>adapter-weaviate-include</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapter-weaviate</artifactId>
<version>${revision}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>adapter-azure-aisearch-include</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapter-azure-aisearch</artifactId>
<version>${revision}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@ -87,7 +87,7 @@
<profile>
<id>adapter-jdbc-module</id>
<activation>
<activeByDefault>true</activeByDefault>
<activeByDefault>false</activeByDefault>
</activation>
<modules>
<module>adapter-jdbc</module>
@ -157,7 +157,7 @@
<profile>
<id>adapter-kafka-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-kafka</module>
@ -167,7 +167,7 @@
<profile>
<id>adapter-amqp-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-amqp</module>
@ -187,12 +187,32 @@
<profile>
<id>adapter-qdrant-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-qdrant</module>
</modules>
</profile>
<profile>
<id>adapter-weaviate-module</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-weaviate</module>
</modules>
</profile>
<profile>
<id>adapter-azure-aisearch-module</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-azure-aisearch</module>
</modules>
</profile>
</profiles>
</project>