Initial azure aisearch draft

This commit is contained in:
Madhavan Sridharan 2024-08-13 11:47:26 -04:00
parent 8fd79b912f
commit 049e7d614c
26 changed files with 1853 additions and 25 deletions

View File

@ -404,6 +404,16 @@
<version>4.8.2</version>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-search-documents</artifactId>
<version>11.7.0</version>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-identity</artifactId>
<version>1.13.2</version>
</dependency>
</dependencies>
</dependencyManagement>

View File

@ -0,0 +1,59 @@
<!--
~ Copyright (c) 2020-2024 nosqlbench
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--<?xml version="1.0" encoding="UTF-8"?>-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>adapter-azure-aisearch</artifactId>
<packaging>jar</packaging>
<parent>
<artifactId>mvn-defaults</artifactId>
<groupId>io.nosqlbench</groupId>
<version>${revision}</version>
<relativePath>../../mvn-defaults</relativePath>
</parent>
<name>${project.artifactId}</name>
<description>
An nosqlbench adapter driver module for the Azure AI Search database.
</description>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>nb-annotations</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapters-api</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-search-documents</artifactId>
</dependency>
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-identity</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.util.SearchPagedIterable;
public class AzureAISearchAdapterUtils {
public static final String AZURE_AI_SEARCH = "azure_aisearch";
public static List<String> splitNames(String input) {
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).toList();
}
public static List<Long> splitLongs(String input) {
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).map(Long::parseLong).toList();
}
/**
* Mask the numeric digits in the given string with '*'.
*
* @param unmasked The string to mask
* @return The masked string
*/
protected static String maskDigits(String unmasked) {
assert StringUtils.isNotBlank(unmasked) && StringUtils.isNotEmpty(unmasked);
int inputLength = unmasked.length();
StringBuilder masked = new StringBuilder(inputLength);
for (char ch : unmasked.toCharArray()) {
if (Character.isDigit(ch)) {
masked.append("*");
} else {
masked.append(ch);
}
}
return masked.toString();
}
public String[] responseFieldToStringArray(String fieldName, SearchPagedIterable response) {
return response.stream()
.map(searchResult -> searchResult.getDocument(SearchDocument.class).get(fieldName).toString())
.toArray(String[]::new);
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import java.util.function.Function;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.activityimpl.uniform.BaseDriverAdapter;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.labels.NBLabels;
import static io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.AZURE_AI_SEARCH;
@Service(value = DriverAdapter.class, selector = AZURE_AI_SEARCH)
public class AzureAISearchDriverAdapter extends BaseDriverAdapter<AzureAISearchBaseOp<?>, AzureAISearchSpace> {
public AzureAISearchDriverAdapter(NBComponent parentComponent, NBLabels labels) {
super(parentComponent, labels);
}
@Override
public OpMapper<AzureAISearchBaseOp<?>> getOpMapper() {
return new AzureAISearchOpMapper(this);
}
@Override
public Function<String, ? extends AzureAISearchSpace> getSpaceInitializer(NBConfiguration cfg) {
return (s) -> new AzureAISearchSpace(s, cfg);
}
@Override
public NBConfigModel getConfigModel() {
return super.getConfigModel().add(AzureAISearchSpace.getConfigModel());
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import static io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.AZURE_AI_SEARCH;
import io.nosqlbench.adapter.diag.DriverAdapterLoader;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.labels.NBLabels;
@Service(value = DriverAdapterLoader.class, selector = AZURE_AI_SEARCH)
public class AzureAISearchDriverAdapterLoader implements DriverAdapterLoader {
@Override
public AzureAISearchDriverAdapter load(NBComponent parent, NBLabels childLabels) {
return new AzureAISearchDriverAdapter(parent, childLabels);
}
}

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchBaseOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchCreateOrUpdateIndexOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchDeleteIndexOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchListIndexesOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchSearchDocumentsOpDispenser;
import io.nosqlbench.adapter.azureaisearch.opsdispenser.AzureAISearchUploadDocumentsOpDispenser;
import io.nosqlbench.adapter.azureaisearch.types.AzureAISearchOpType;
import io.nosqlbench.adapters.api.activityimpl.OpDispenser;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.engine.api.templating.TypeAndTarget;
public class AzureAISearchOpMapper implements OpMapper<AzureAISearchBaseOp<?>> {
private static final Logger logger = LogManager.getLogger(AzureAISearchOpMapper.class);
private final AzureAISearchDriverAdapter adapter;
/**
* Create a new {@code AzureAISearchOpMapper} implementing the {@link OpMapper}.
* interface.
*
* @param adapter The associated {@link AzureAISearchDriverAdapter}
*/
public AzureAISearchOpMapper(AzureAISearchDriverAdapter adapter) {
this.adapter = adapter;
}
/**
* Given an instance of a {@link ParsedOp} returns the appropriate
* {@link AzureAISearchBaseOpDispenser} subclass.
*
* @param op The {@link ParsedOp} to be evaluated
* @return The correct {@link AzureAISearchBaseOpDispenser} subclass based on
* the op type
*/
@Override
public OpDispenser<? extends AzureAISearchBaseOp<?>> apply(ParsedOp op) {
TypeAndTarget<AzureAISearchOpType, String> typeAndTarget = op.getTypeAndTarget(AzureAISearchOpType.class,
String.class, "type", "target");
logger.info(() -> "Using '" + typeAndTarget.enumId + "' op type for op template '" + op.getName() + "'");
return switch (typeAndTarget.enumId) {
case delete_index -> new AzureAISearchDeleteIndexOpDispenser(adapter, op, typeAndTarget.targetFunction);
case create_or_update_index ->
new AzureAISearchCreateOrUpdateIndexOpDispenser(adapter, op, typeAndTarget.targetFunction);
case list_indexes -> new AzureAISearchListIndexesOpDispenser(adapter, op, typeAndTarget.targetFunction);
case upload_documents -> new AzureAISearchUploadDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
case search_documents -> new AzureAISearchSearchDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " +
// "mapping parsed op " + op);
};
}
}

View File

@ -0,0 +1,141 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.TokenCredential;
import com.azure.identity.DefaultAzureCredentialBuilder;
import com.azure.search.documents.SearchServiceVersion;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
import io.nosqlbench.nb.api.config.standard.ConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.config.standard.Param;
/**
* The {@code AzureAISearchSpace} class is a context object which stores all
* stateful contextual information needed to interact with the
* <b>{@code Azure AI Search}</b> database instance.
*
* @see <a href=
* "https://learn.microsoft.com/en-us/azure/developer/java/sdk/troubleshooting-dependency-version-conflict">Troubleshooting
* guide</a>
* @see <a href=
* "https://learn.microsoft.com/en-us/java/api/overview/azure/search-documents-readme?view=azure-java-stable">AI
* Search quick start guide</a>
* @see <a href=
* "https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/search/azure-search-documents/">Azure
* AI Search Java searchIndexClient</a>
*/
public class AzureAISearchSpace implements AutoCloseable {
private final static Logger logger = LogManager.getLogger(AzureAISearchSpace.class);
private final String name;
private final NBConfiguration cfg;
protected SearchIndexClient searchIndexClient;
// protected SearchClient searchClient;
/**
* Create a new {@code AzureAISearchSpace} Object which stores all stateful
* contextual information needed to interact with the <b>Azure AI Search</b>
* database instance.
*
* @param name The name of this space
* @param cfg The configuration ({@link NBConfiguration}) for this nb run
*/
public AzureAISearchSpace(String name, NBConfiguration cfg) {
this.name = name;
this.cfg = cfg;
}
public synchronized SearchIndexClient getSearchIndexClient() {
if (searchIndexClient == null) {
searchIndexClient = createSearchClients();
}
return searchIndexClient;
}
// public synchronized SearchClient getSearchClient() {
// if (searchClient == null) {
// createSearchClients();
// }
// return searchClient;
// }
private SearchIndexClient createSearchClients() {
String uri = cfg.get("endpoint");
var requiredToken = cfg.getOptional("token_file").map(Paths::get).map(tokenFilePath -> {
try {
return Files.readAllLines(tokenFilePath).getFirst();
} catch (IOException e) {
String error = "Error while reading token from file:" + tokenFilePath;
logger.error(error, e);
throw new RuntimeException(e);
}
}).orElseGet(() -> cfg.getOptional("token").orElseThrow(() -> new RuntimeException(
"You must provide either a 'token_file' or a 'token' to configure a Azure AI Search client")));
logger.info("{}: Creating new Azure AI Search Client with (masked) token/key [{}], uri/endpoint [{}]",
this.name, AzureAISearchAdapterUtils.maskDigits(requiredToken), uri);
var searchIndexClientBuilder = new SearchIndexClientBuilder().endpoint(uri);
// var searchClientBuilder = new SearchClientBuilder().endpoint(uri);
if (!requiredToken.isBlank()) {
searchIndexClientBuilder = searchIndexClientBuilder.credential(new AzureKeyCredential(requiredToken));
// searchClientBuilder = searchClientBuilder.credential(new AzureKeyCredential(requiredToken));
} else {
TokenCredential tokenCredential = new DefaultAzureCredentialBuilder().build();
searchIndexClientBuilder = searchIndexClientBuilder.credential(tokenCredential);
// searchClientBuilder = searchClientBuilder.credential(tokenCredential);
}
// Should we leave these below to leverage the SearchServiceVersion.getLatest()?
String apiVersion = cfg.getOptional("api_version").orElse(SearchServiceVersion.V2024_07_01.name());
logger.warn(
"Latest search service version supported by this client is '{}', but we're using '{}' version. Ignore this warning if both are same.",
SearchServiceVersion.getLatest(), apiVersion);
// TODO - try to find a way to get rid of placeholder
// this.searchClient = searchClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion))
// .indexName("PLACEHOLDER").buildClient();
return searchIndexClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)).buildClient();
}
public static NBConfigModel getConfigModel() {
return ConfigModel.of(AzureAISearchSpace.class)
.add(Param.optional("token_file", String.class, "the file to load the api token/key from"))
.add(Param.defaultTo("token", "azure-aisearch-admin-key-changeme")
.setDescription("the Azure AI Search api token/key to use to connect to the database"))
.add(Param.defaultTo("endpoint", "localhost:8080").setDescription(
"the URI endpoint in which the database is running. Check out https://learn.microsoft.com/en-us/azure/search/search-create-service-portal."))
.add(Param.optional("api_version", String.class,
"the api version to be used. Example 'V2024-07-01'. Defaults to latest service version supported by the SDK client version"))
.asReadOnly();
}
@Override
public void close() throws Exception {
searchIndexClient = null;
}
}

View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import java.util.function.LongFunction;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapters.api.activityimpl.uniform.flowtypes.CycleOp;
public abstract class AzureAISearchBaseOp<T> implements CycleOp<Object> {
protected final static Logger logger = LogManager.getLogger(AzureAISearchBaseOp.class);
protected final SearchIndexClient searchIndexClient;
// protected final SearchClient searchClient;
protected final T request;
protected final LongFunction<Object> apiCall;
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam) {
this.searchIndexClient = searchIndexClient;
// TODO - figure out how to do this cleanly
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
this.request = requestParam;
this.apiCall = this::applyOp;
}
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam, LongFunction<Object> call) {
this.searchIndexClient = searchIndexClient;
// TODO - figure out how to do this cleanly
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
this.request = requestParam;
this.apiCall = call;
}
@SuppressWarnings("unchecked")
@Override
public final Object apply(long value) {
logger.trace("applying op: " + this);
try {
Object result = applyOp(value);
return result;
} catch (Exception e) {
if (e instanceof RuntimeException rte) {
throw rte;
} else {
throw new RuntimeException(e);
}
}
};
public abstract Object applyOp(long value);
@Override
public String toString() {
return "AzureAISearchBaseOp(" + this.request.getClass().getSimpleName() + ")";
}
}

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.search.documents.SearchClient;
import com.azure.search.documents.indexes.SearchIndexClient;
public abstract class AzureAISearchClientBaseOp<T> extends AzureAISearchBaseOp<T> {
protected final SearchClient searchClient;
public AzureAISearchClientBaseOp(SearchIndexClient searchIdxClient, SearchClient searchClnt, T requestParam) {
super(searchIdxClient, requestParam);
this.searchClient = searchClnt;
}
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.models.SearchIndex;
public class AzureAISearchCreateOrUpdateIndexOp extends AzureAISearchBaseOp<SearchIndex> {
public AzureAISearchCreateOrUpdateIndexOp(SearchIndexClient client, SearchIndex request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
SearchIndex createResponse = null;
try {
if (logger.isDebugEnabled()) {
request.getFields().forEach((field) -> {
logger.debug(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
field.getVectorSearchDimensions());
});
}
createResponse = searchIndexClient.createOrUpdateIndex(request);
logger.debug("Successfully created the collection with return code of {}", createResponse.toString());
} catch (RuntimeException rte) {
throw rte;
}
return createResponse;
}
}

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.search.documents.indexes.SearchIndexClient;
public class AzureAISearchDeleteIndexOp extends AzureAISearchBaseOp<String> {
public AzureAISearchDeleteIndexOp(SearchIndexClient client, String request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
try {
searchIndexClient.deleteIndex(request);
logger.debug("Successfully deleted the index: {}", request);
} catch (RuntimeException rte) {
throw rte;
}
return "Deleted";
}
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.core.http.rest.PagedIterable;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.models.SearchIndex;
public class AzureAISearchListIndexesOp extends AzureAISearchBaseOp<String> {
public AzureAISearchListIndexesOp(SearchIndexClient client, String request) {
super(client, request);
}
@Override
public Object applyOp(long value) {
try {
PagedIterable<SearchIndex> response = searchIndexClient.listIndexes();
response.forEach((index) -> {
logger.info("Indexes available are: Name: {}, ETag: {}", index.getName(), index.getETag());
index.getFields().forEach(field -> {
logger.info(
"Field Name: {}, Field isKey?: {}, Field Dimension: {}, Field Vector Search Profile: {}",
field.getName(), field.isKey(), field.getVectorSearchDimensions(),
field.getVectorSearchProfileName());
});
});
} catch (RuntimeException rte) {
throw rte;
}
return "Listed indexes";
}
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.core.util.Context;
import com.azure.search.documents.SearchClient;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.SearchOptions;
import com.azure.search.documents.util.SearchPagedIterable;
public class AzureAISearchSearchDocumentsOp extends AzureAISearchClientBaseOp<SearchOptions> {
public AzureAISearchSearchDocumentsOp(SearchIndexClient searchIndexClient, SearchClient searchClient,
SearchOptions request) {
super(searchIndexClient, searchClient, request);
}
@Override
public Object applyOp(long value) {
SearchPagedIterable searchDocsResponse = null;
try {
searchDocsResponse = searchClient.search(null, // we've not implemented other complex searches yet here.
request,
Context.NONE);
if (logger.isInfoEnabled()) {
searchDocsResponse.forEach((r) -> {
SearchDocument doc = r.getDocument(SearchDocument.class);
logger.debug(
"Successfully searched the index and returned id: {}, score: {}, vector embedding: {}",
doc.get("id"), r.getScore(), doc.get("value"));
});
}
} catch (RuntimeException rte) {
throw rte;
}
return searchDocsResponse;
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.ops;
import java.util.List;
import com.azure.search.documents.SearchClient;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.IndexDocumentsResult;
public class AzureAISearchUploadDocumentsOp extends AzureAISearchClientBaseOp<SearchDocument> {
public AzureAISearchUploadDocumentsOp(SearchIndexClient searchIndexClient, SearchClient searchClient,
SearchDocument request) {
super(searchIndexClient, searchClient, request);
}
@Override
public Object applyOp(long value) {
IndexDocumentsResult uploadDocsResponse = null;
try {
// request.getFields().forEach((field) -> {
// logger.info(
// ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>[AzureAISearchUploadDocumentsOp] SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
// field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
// field.getVectorSearchDimensions());
// });
uploadDocsResponse = searchClient.uploadDocuments(List.of(request));
if (logger.isDebugEnabled()) {
uploadDocsResponse.getResults().forEach((r) -> {
logger.debug(
"Successfully created the collection with return status code: {}, key: {}, succeeded?: {}, error message: {}",
r.getStatusCode(), r.getKey(), r.isSucceeded(), r.getErrorMessage());
});
}
} catch (RuntimeException rte) {
throw rte;
}
return uploadDocsResponse;
}
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchSpace;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.adapters.api.templating.ParsedOp;
public abstract class AzureAISearchBaseOpDispenser<T>
extends BaseOpDispenser<AzureAISearchBaseOp<T>, AzureAISearchSpace> {
protected final LongFunction<AzureAISearchSpace> azureAISearchSpaceFunction;
protected final LongFunction<SearchIndexClient> clientFunction;
private final LongFunction<? extends AzureAISearchBaseOp<T>> opF;
private final LongFunction<T> paramF;
@SuppressWarnings("rawtypes")
protected AzureAISearchBaseOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super((DriverAdapter) adapter, op);
this.azureAISearchSpaceFunction = adapter.getSpaceFunc(op);
this.clientFunction = (long l) -> {
try {
return this.azureAISearchSpaceFunction.apply(l).getSearchIndexClient();
} catch (Exception e) {
e.printStackTrace();
}
return null;
};
this.paramF = getParamFunc(this.clientFunction, op, targetF);
this.opF = createOpFunc(paramF, this.clientFunction, op, targetF);
}
protected AzureAISearchDriverAdapter getDriverAdapter() {
return (AzureAISearchDriverAdapter) adapter;
}
public abstract LongFunction<T> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF);
public abstract LongFunction<AzureAISearchBaseOp<T>> createOpFunc(LongFunction<T> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF);
@Override
public AzureAISearchBaseOp<T> getOp(long value) {
return opF.apply(value);
}
}

View File

@ -0,0 +1,407 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.indexes.models.BinaryQuantizationCompression;
import com.azure.search.documents.indexes.models.ExhaustiveKnnAlgorithmConfiguration;
import com.azure.search.documents.indexes.models.ExhaustiveKnnParameters;
import com.azure.search.documents.indexes.models.HnswAlgorithmConfiguration;
import com.azure.search.documents.indexes.models.HnswParameters;
import com.azure.search.documents.indexes.models.ScalarQuantizationCompression;
import com.azure.search.documents.indexes.models.ScalarQuantizationParameters;
import com.azure.search.documents.indexes.models.SearchField;
import com.azure.search.documents.indexes.models.SearchFieldDataType;
import com.azure.search.documents.indexes.models.SearchIndex;
import com.azure.search.documents.indexes.models.VectorSearch;
import com.azure.search.documents.indexes.models.VectorSearchAlgorithmConfiguration;
import com.azure.search.documents.indexes.models.VectorSearchAlgorithmMetric;
import com.azure.search.documents.indexes.models.VectorSearchCompression;
import com.azure.search.documents.indexes.models.VectorSearchCompressionTarget;
import com.azure.search.documents.indexes.models.VectorSearchProfile;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchCreateOrUpdateIndexOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
/**
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/create-or-update?view=rest-searchservice-2024-07-01&tabs=HTTP">API
* Reference</a>
* @see <a href=
* "https://learn.microsoft.com/en-us/java/api/com.azure.search.documents?view=azure-java-stable">Index
* docs</a>
*/
public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBaseOpDispenser<SearchIndex> {
private SearchField searchField;
private VectorSearchProfile vsProfile;
public AzureAISearchCreateOrUpdateIndexOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<SearchIndex> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>getParamFunc");
LongFunction<SearchIndex> ebF = l -> new SearchIndex(targetF.apply(l));
Optional<LongFunction<Map>> fieldsMapF = op.getAsOptionalFunction("fields", Map.class);
if (fieldsMapF.isPresent()) {
final LongFunction<List<SearchField>> searchFieldListF = buildFieldsStruct(op);
final LongFunction<SearchIndex> fieldsF = ebF;
ebF = l -> fieldsF.apply(l).setFields(searchFieldListF.apply(l));
}
Optional<LongFunction<Map>> vsearchMapF = op.getAsOptionalFunction("vectorSearch", Map.class);
if (vsearchMapF.isPresent()) {
final LongFunction<VectorSearch> vSearchF = buildVectorSearchStruct(op);
final LongFunction<SearchIndex> vsF = ebF;
ebF = l -> vsF.apply(l).setVectorSearch(vSearchF.apply(l));
}
final LongFunction<SearchIndex> lastF = ebF;
return l -> lastF.apply(l);
}
@SuppressWarnings({ "unchecked", "rawtypes", "static-access" })
private LongFunction<VectorSearch> buildVectorSearchStruct(ParsedOp op) {
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("vectorSearch", Map.class);
return baseFunc.<LongFunction<VectorSearch>>map(mapLongFunc -> l -> {
Map<String, Object> vsMap = mapLongFunc.apply(l);
VectorSearch vectorSearch = new VectorSearch();
vsMap.forEach((vsField, vsValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>vsField:{} vsValue:{}",
vsField, vsValue);
if (vsValue instanceof Map) {
((Map<String, Object>) vsValue).forEach((innerKey, innerValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>innerKey:{} innerValue:{}",
innerKey, innerValue);
if ("compressions".equals(vsField)) {
List<VectorSearchCompression> vsCompList = new ArrayList<>();
String kind;
if (((Map<String, Object>) innerValue).containsKey("kind")) {
kind = (String) ((Map<String, Object>) innerValue).get("kind");
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions>>>>kind:{}",
kind);
if (kind.equals("scalarQuantization")) {
ScalarQuantizationCompression sqComp = new ScalarQuantizationCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
kind, compressKey, compressValue);
if (compressKey.equals("kind")) {
sqComp.getKind().fromString((String) compressValue);
}
if (compressKey.equals("rerankWithOriginalVectors")) {
sqComp.setRerankWithOriginalVectors((Boolean) compressValue);
}
if (compressKey.equals("defaultOversampling")) {
sqComp.setDefaultOversampling(((Number) compressValue).doubleValue());
}
if (compressKey.equals("scalarQuantizationParameters")) {
ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters();
((Map<String, Object>) compressValue).forEach((sqKey, sqVal) -> {
if (sqKey.equals("quantizedDataType")) {
sqParams.setQuantizedDataType(
VectorSearchCompressionTarget.fromString((String) sqVal));
}
});
sqComp.setParameters(sqParams);
}
});
vsCompList.add(sqComp);
// vsCompList.add(buildVectorSearchCompression(bqComp, compressKey, compressValue, true));
} else {
// BinaryQuantization is assumed here
BinaryQuantizationCompression bqComp = new BinaryQuantizationCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
kind, compressKey, compressValue);
if (compressKey.equals("kind")) {
bqComp.getKind().fromString((String) compressValue);
}
if (compressKey.equals("rerankWithOriginalVectors")) {
bqComp.setRerankWithOriginalVectors((Boolean) compressValue);
}
if (compressKey.equals("defaultOversampling")) {
bqComp.setDefaultOversampling(((Number) compressValue).doubleValue());
}
});
vsCompList.add(bqComp);
// vsCompList.add(
// buildVectorSearchCompression(bqComp, compressKey, compressValue, false));
}
} else {
VectorSearchCompression vsComp = new VectorSearchCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
null, compressKey, compressValue);
if (compressKey.equals("kind")) {
vsComp.getKind().fromString((String) compressValue);
}
if (compressKey.equals("rerankWithOriginalVectors")) {
vsComp.setRerankWithOriginalVectors((Boolean) compressValue);
}
if (compressKey.equals("defaultOversampling")) {
vsComp.setDefaultOversampling(((Number) compressValue).doubleValue());
}
});
vsCompList.add(vsComp);
}
vectorSearch.setCompressions(vsCompList);
vectorSearch.getCompressions().forEach((comp) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions FINAL: Name:{}",
comp.getCompressionName());
});
}
if ("algorithms".equals(vsField)) {
List<VectorSearchAlgorithmConfiguration> vsAlgoList = new ArrayList<>();
String kind;
if (((Map<String, Object>) innerValue).containsKey("kind")) {
kind = (String) ((Map<String, Object>) innerValue).get("kind");
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{}",
kind);
if("hnsw".equals(kind)) {
HnswAlgorithmConfiguration hnswAlgoConf = new HnswAlgorithmConfiguration(innerKey);
((Map<String, Object>) innerValue).forEach((hnswKey, hnswValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{}",
kind, hnswKey, hnswValue);
if ("hnswParameters".equals(hnswKey)) {
((Map<String, Object>) innerValue)
.forEach((hnswParamsKey, hnswParamsValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{} hnswParamsKey:{} hnswParamsValue:{}",
kind, hnswKey, hnswValue, hnswParamsKey,
hnswParamsValue);
HnswParameters hnswParams = new HnswParameters();
if ("m".equals(hnswParamsKey)) {
hnswParams.setM(((Number) hnswParamsValue).intValue());
}
if ("efConstruction".equals(hnswParamsKey)) {
hnswParams.setEfConstruction(
((Number) hnswParamsValue).intValue());
}
if ("efSearch".equals(hnswParamsKey)) {
hnswParams
.setEfSearch(((Number) hnswParamsValue).intValue());
}
if ("metric".equals(hnswParamsKey)) {
hnswParams.setMetric(VectorSearchAlgorithmMetric
.fromString((String) hnswParamsValue));
}
hnswAlgoConf.setParameters(hnswParams);
});
}
});
vsAlgoList.add(hnswAlgoConf);
}
if ("exhaustiveKnn".equals(kind)) {
ExhaustiveKnnAlgorithmConfiguration exhausKnnAlgoConf = new ExhaustiveKnnAlgorithmConfiguration(
innerKey);
((Map<String, Object>) innerValue).forEach((algoKey, algoValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{}",
kind, algoKey, algoValue);
if (algoKey.equals("exhaustiveKnnParameters")) {
ExhaustiveKnnParameters eKnnParms = new ExhaustiveKnnParameters();
((Map<String, Object>) algoValue).forEach((ekpKey, ekpVal) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{} ekpKey:{} ekpVal:{}",
kind, algoKey, algoValue, ekpKey, ekpVal);
if (ekpKey.equals("quantizedDataType")) {
eKnnParms.setMetric(
VectorSearchAlgorithmMetric.fromString((String) ekpVal));
}
});
exhausKnnAlgoConf.setParameters(eKnnParms);
}
});
vsAlgoList.add(exhausKnnAlgoConf);
}
}
vectorSearch.setAlgorithms(vsAlgoList);
vectorSearch.getAlgorithms().forEach((algo) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms FINAL: Name:{}",
algo.getName());
});
}
if ("profiles".equals(vsField)) {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles");
List<VectorSearchProfile> vsProfileList = new ArrayList<>();
// VectorSearchProfile vsProfile = new VectorSearchProfile(innerKey, null);
((Map<String, Object>) vsValue).forEach((profKey, profVal) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{}",
profKey, profVal);
((Map<String, Object>) profVal).forEach((pK, pV) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{} pK:{} pV:{}",
profKey, profVal, pK, pV);
if ("algorithm".equals(pK)) {
vsProfile = new VectorSearchProfile(profKey, (String) pV);
}
if ("compression".equals(pK)) {
vsProfile.setCompressionName((String) pV);
}
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: Name:{}>>>AlgoName:{}>>>CompressionName:{}",
vsProfile.getName(), vsProfile.getAlgorithmConfigurationName(),
vsProfile.getCompressionName());
});
vsProfileList.add(vsProfile);
});
vectorSearch.setProfiles(vsProfileList);
vectorSearch.getProfiles().forEach((profile) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles FINAL: Name:{} AlgorithmConfName:{}",
profile.getName(), profile.getAlgorithmConfigurationName());
});
}
});
} else {
throw new OpConfigError(
"Vector Search properties must be a Map<String, Map<String, Object>>, but got "
+ vsValue.getClass().getSimpleName() + " instead for the inner value");
}
});
return vectorSearch;
}).orElse(null);
}
@SuppressWarnings({ "unchecked", "static-access" })
private VectorSearchCompression buildVectorSearchCompression(VectorSearchCompression vsComp, String key, Object val,
boolean isSQ) {
if (key.equals("kind")) {
vsComp.getKind().fromString((String) val);
}
if (key.equals("rerankWithOriginalVectors")) {
vsComp.setRerankWithOriginalVectors((Boolean) val);
}
if (key.equals("defaultOversampling")) {
vsComp.setDefaultOversampling(((Number) val).doubleValue());
}
if (isSQ) {
if (key.equals("scalarQuantizationParameters")) {
ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters();
((Map<String, Object>) val).forEach((sqKey, sqVal) -> {
if (sqKey.equals("quantizedDataType")) {
sqParams.setQuantizedDataType(VectorSearchCompressionTarget.fromString((String) sqVal));
}
});
((ScalarQuantizationCompression) vsComp).setParameters(sqParams);
}
}
return vsComp;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<List<SearchField>> buildFieldsStruct(ParsedOp op) {
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct");
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("fields", Map.class);
return baseFunc.<LongFunction<List<SearchField>>>map(mapLongFunc -> l -> {
Map<String, Object> fMap = mapLongFunc.apply(l);
List<SearchField> fieldsList = new ArrayList<>();
fMap.forEach((fName, fValue) -> {
if (fValue instanceof Map) {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{}",
fName, fValue);
((Map<String, Object>) fValue).forEach((innerKey, innerValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{} fName:{} fValue:{}",
fName, fValue, innerKey, innerValue);
if (innerKey.equals("type")) {
searchField = new SearchField(fName, SearchFieldDataType.fromString((String) innerValue));
}
if (innerKey.equals("key")) {
searchField.setKey((Boolean) innerValue);
}
if (innerKey.equals("dimensions")) {
searchField.setVectorSearchDimensions(((Number) innerValue).intValue());
}
if (innerKey.equals("vectorSearchProfile")) {
searchField.setVectorSearchProfileName((String) innerValue);
logger.debug("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {} %n",
searchField.getVectorSearchProfileName());
}
if (innerKey.equals("filterable")) {
searchField.setFilterable((Boolean) innerValue);
}
if (innerKey.equals("sortable")) {
searchField.setSortable(((Boolean) innerValue));
}
if (innerKey.equals("searchable")) {
searchField.setSearchable((Boolean) innerValue);
}
if (innerKey.equals("facetable")) {
searchField.setFacetable((Boolean) innerValue);
}
if (innerKey.equals("retrievable")) {
// For now we're ignoring this as this is internally set to 'hidden' property's
// value by the searchIndexClient
}
if (innerKey.equals("hidden")) {
searchField.setHidden((Boolean) innerValue);
}
});
} else {
throw new OpConfigError(
"Fields properties must be a Map<String, Map<String, Object>>, but got "
+ fValue.getClass().getSimpleName() + " instead for the inner value");
}
fieldsList.add(searchField);
if (logger.isDebugEnabled()) {
fieldsList.forEach((field) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>> fields FINAL: Name:{} VSProfileName:{}",
field.getName(), field.getVectorSearchProfileName());
});
}
});
return fieldsList;
}).orElse(null);
}
@Override
public LongFunction<AzureAISearchBaseOp<SearchIndex>> createOpFunc(LongFunction<SearchIndex> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchCreateOrUpdateIndexOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchDeleteIndexOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
/**
* Delete an Azure AI Search index.
*
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/delete?view=rest-searchservice-2024-07-01&tabs=HTTP">Delete
* Index docs</a>.
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/">REST
* API</a>.
*/
public class AzureAISearchDeleteIndexOpDispenser extends AzureAISearchBaseOpDispenser<String> {
public AzureAISearchDeleteIndexOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@Override
public LongFunction<String> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
return l -> targetF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<String>> createOpFunc(LongFunction<String> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchDeleteIndexOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchListIndexesOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
public class AzureAISearchListIndexesOpDispenser extends AzureAISearchBaseOpDispenser<String> {
public AzureAISearchListIndexesOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@Override
public LongFunction<String> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
return l -> targetF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<String>> createOpFunc(LongFunction<String> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchListIndexesOp(clientF.apply(l), paramF.apply(l));
}
}

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.LongFunction;
import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.SearchOptions;
import com.azure.search.documents.models.VectorQuery;
import com.azure.search.documents.models.VectorSearchOptions;
import com.azure.search.documents.models.VectorizedQuery;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchSearchDocumentsOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError;
/**
* @see https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery
* @see https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request
*/
public class AzureAISearchSearchDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchOptions> {
public AzureAISearchSearchDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<SearchOptions> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
LongFunction<SearchOptions> ebF = l -> new SearchOptions();
Optional<LongFunction<Boolean>> countFunc = op.getAsOptionalFunction("count", Boolean.class);
if (countFunc.isPresent()) {
final LongFunction<SearchOptions> countLF = ebF;
ebF = l -> countLF.apply(l).setIncludeTotalCount(countFunc.get().apply(l));
}
LongFunction<String> selectFunc = op.getAsRequiredFunction("select", String.class);
final LongFunction<SearchOptions> selectLF = ebF;
ebF = l -> selectLF.apply(l).setSelect(selectFunc.apply(l));
final LongFunction<SearchOptions> vqLF = ebF;
ebF = l -> vqLF.apply(l).setVectorSearchOptions(buildVectorSearchOptionsStruct(op).apply(l));
final LongFunction<SearchOptions> lastF = ebF;
return l -> lastF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<SearchOptions>> createOpFunc(LongFunction<SearchOptions> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchSearchDocumentsOp(clientF.apply(l),
clientF.apply(l).getSearchClient(targetF.apply(l)), paramF.apply(l));
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private LongFunction<VectorSearchOptions> buildVectorSearchOptionsStruct(ParsedOp op) {
if (!op.isDefined("vectorQueries")) {
throw new OpConfigError("Must provide values for 'vectorQueries' in 'search_documents' op");
}
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("vectorQueries", Map.class);
return baseFunc.<LongFunction<VectorSearchOptions>>map(mapLongFunc -> l -> {
Map<String, Object> vsoMap = mapLongFunc.apply(l);
VectorSearchOptions vsOpts = new VectorSearchOptions();
if (!vsoMap.containsKey("vector")) {
throw new OpConfigError(
"Must provide list of float values for 'vector' field within 'vectorQueries' of 'search_documents' op");
}
VectorQuery vectorizableQuery = new VectorizedQuery((List<Float>) vsoMap.get("vector"));
if (vsoMap.containsKey("exhaustive"))
vectorizableQuery.setExhaustive((Boolean) vsoMap.get("exhaustive"));
if (vsoMap.containsKey("fields"))
vectorizableQuery.setFields(new String[] { (String) vsoMap.get("fields") });
if (vsoMap.containsKey("weight"))
vectorizableQuery.setWeight(((Number) vsoMap.get("weight")).floatValue());
if (vsoMap.containsKey("k"))
vectorizableQuery.setKNearestNeighborsCount(((Number) vsoMap.get("k")).intValue());
vsOpts.setQueries(vectorizableQuery);
return vsOpts;
}).orElse(null);
}
}

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.opsdispenser;
import java.util.Map;
import java.util.function.LongFunction;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient;
import io.nosqlbench.adapter.azureaisearch.AzureAISearchDriverAdapter;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchUploadDocumentsOp;
import io.nosqlbench.adapters.api.templating.ParsedOp;
/**
* @see <a href=
* "https://learn.microsoft.com/en-us/rest/api/searchservice/documents/?view=rest-searchservice-2024-07-01&tabs=HTTP">API
* Reference</a>
* @see <a href=
* "https://learn.microsoft.com/en-us/java/api/com.azure.search.documents?view=azure-java-stable">Index
* docs</a>
*/
public class AzureAISearchUploadDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchDocument> {
public AzureAISearchUploadDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) {
super(adapter, op, targetF);
}
@SuppressWarnings("rawtypes")
@Override
public LongFunction<SearchDocument> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) {
LongFunction<SearchDocument> ebF = l -> new SearchDocument();
LongFunction<Map> fieldsMapF = op.getAsRequiredFunction("fields", Map.class);
final LongFunction<SearchDocument> fieldF = buildFieldsStruct(fieldsMapF);
ebF = l -> fieldF.apply(l);
final LongFunction<SearchDocument> lastF = ebF;
return l -> lastF.apply(l);
}
@Override
public LongFunction<AzureAISearchBaseOp<SearchDocument>> createOpFunc(LongFunction<SearchDocument> paramF,
LongFunction<SearchIndexClient> clientF, ParsedOp op, LongFunction<String> targetF) {
return l -> new AzureAISearchUploadDocumentsOp(clientF.apply(l),
clientF.apply(l).getSearchClient(targetF.apply(l)), paramF.apply(l));
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<SearchDocument> buildFieldsStruct(LongFunction<Map> fieldsFunction) {
return l -> {
Map<String, Object> fields = fieldsFunction.apply(l);
var doc = new SearchDocument();
fields.forEach((key, val) -> {
doc.put(key, val);
});
return doc;
};
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2020-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.azureaisearch.types;
public enum AzureAISearchOpType {
create_or_update_index,
delete_index,
list_indexes,
upload_documents,
search_documents,
}

View File

@ -0,0 +1,174 @@
min_version: 5.21
description: |
This is a template for live vector search testing.
Template Variables:
schema: Install the schema required to run the test
rampup: Measure how long it takes to load a set of embeddings
search: Measure how the system responds to queries while it
is indexing recently ingested data.
search: Run vector search with a set of default (or overridden) parameters
In all of these phases, it is important to instance the metrics with distinct names.
Also, aggregates of recall should include total aggregate as well as a moving average.
TEMPLATE(token_file)
TEMPLATE(token)
scenarios:
azure_aisearch_vectors:
delete_index: >-
run tags==block:delete_index
errors===stop
cycles===UNDEF threads===UNDEF
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
create_or_update_index: >-
run tags==block:create_or_update_index
errors===stop
cycles===UNDEF threads===UNDEF
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
list_indexes: >-
run tags==block:list_indexes
errors===stop
cycles===UNDEF threads===UNDEF
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
upload_documents: >-
run tags==block:upload_documents
errors===warn,counter
cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO)
token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost)
search_documents: >-
run tags==block:search_documents
errors===warn,counter
cycles===TEMPLATE(testann_cycles,TEMPLATE(testsize,1000)) threads===TEMPLATE(testann_threads,AUTO)
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
params:
driver: azure_aisearch
instrument: true
bindings:
id_val: Identity();
id_val_uuid: ToHashedUUID() -> java.util.UUID
row_key: ToString()
row_key_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),long->ToString());
# filetype=hdf5 for TEMPLATE(filetype,hdf5)
test_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/test");
relevant_indices_hdf5: HdfFileToIntArray("local/testdata/TEMPLATE(dataset).hdf5", "/neighbors")
distance_floatlist_hdf5: HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/distance")
# TODO - remove the 'local' keyword in path
train_floatlist_hdf5: HdfFileToFloatList("local/testdata/TEMPLATE(dataset).hdf5", "/train");
train_floatlist_hdf5_batch: Mul(TEMPLATE(batch_size)L); ListSizedStepped(TEMPLATE(batch_size),HdfFileToFloatList("testdata/TEMPLATE(dataset).hdf5", "/train"));
# filetype=fvec for TEMPLATE(filetype,fvec)
test_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_query_vectors.fvec");
relevant_indices_fvec: IVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_indices_query.ivec");
distance_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(testsize)_distances_count.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec: FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0);
train_floatlist_fvec_batch: Mul(TEMPLATE(batch_size,10)L); ListSizedStepped(TEMPLATE(batch_size),FVecReader("testdata/TEMPLATE(dataset)_TEMPLATE(trainsize)_base_vectors.fvec",TEMPLATE(dimensions),0));
##############################################
# NOTE: An Azure AI Search index name must start and end with alphanumeric characters and contain only lowercase letters, digits or dashes.
##############################################
blocks:
delete_index:
ops:
# https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/delete?view=rest-searchservice-2024-07-01&tabs=HTTP
delete_idx_op:
delete_index: "TEMPLATE(collection)"
create_or_update_index:
ops:
# https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/create-or-update?view=rest-searchservice-2024-07-01&tabs=HTTP
create_or_update_index_op:
create_or_update_index: "TEMPLATE(collection)"
fields:
id:
type: "Edm.String" # Data types - https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types#edm-data-types-for-vector-fields
key: true
filterable: true
sortable: true
searchable: true
facetable: false
retrievable: true
hidden: false
value:
type: "Collection(Edm.Single)"
dimensions: TEMPLATE(dimensions)
vectorSearchProfile: "vector-profile-hnsw-scalar-1"
hidden: false
searchable: true
retrievable: true
filterable: false
sortable: false
facetable: false
vectorSearch:
compressions:
scalar-quantization-1:
kind: "scalarQuantization"
rerankWithOriginalVectors: true
defaultOversampling: 1
scalarQuantizationParameters:
quantizedDataType: "int8"
algorithms:
hnsw-sq-1:
kind: "hnsw" # or "exhaustiveKnn"
hnswParameters:
m: 32
efConstruction: 100
efSearch: 100
metric: "TEMPLATE(similarity_function)"
#exhaustiveKnnParameters:
#metric: "TEMPLATE(similarity_function)"
profiles:
vector-profile-hnsw-scalar-1:
algorithm: "hnsw-sq-1"
compression: "scalar-quantization-1"
list_indexes:
ops:
# https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/list?view=rest-searchservice-2024-07-01&tabs=HTTP
list_indexes_op:
list_indexes: "DUMMY_PLACEHOLDER"
upload_documents:
ops:
upload_documents_op:
upload_documents: "TEMPLATE(collection)"
fields:
id: "{row_key}"
value: "{train_floatlist_TEMPLATE(filetype)}"
search_documents:
ops:
search_documents_op:
search_documents: "TEMPLATE(collection)"
count: false
select: "id, value"
vectorQueries:
kind: "vector"
vector: "{test_floatlist_TEMPLATE(filetype)}"
exhaustive: false
fields: "value"
weight: 1.0
k: TEMPLATE(select_limit,100)
verifier-init: |
relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
for (int k in List.of(100)) {
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
}
verifier: |
// driver-specific function
actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.responseFieldToStringArray("id",result)
System.out.println("actual_indices ------>>>>: " + actual_indices);
// driver-agnostic function
relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices);
// because we are "verifying" although this needs to be reorganized
return true;

View File

@ -0,0 +1,67 @@
# Azure AI Search driver adapter
The Azure AI Search driver adapter is a NoSQLBench adapter for the `azure-aisearch` driver, a Java driver
for connecting to and performing operations on an instance of a Azure AI Search vector database. The driver is
leveraged from GitHub at https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/search/azure-search-documents/.
## Run Commands (Remove prior to merge)
### Create Collection Schema
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar weaviate_vector_live weaviate_vectors.rampup dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=Glove_25 weaviatehost=letsweave-czgwdrw9.weaviate.network token_file=${workspace_loc:/nosqlbench}/local/weaviate/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:weaviate_1255,instance:vectors,vendor:weaviate_wcd" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Delete Collection
```
java -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.delete_index dimensions=25 testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 similarity_function=cosine azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azure_aisearch,instance:vectors,vendor:azure_aisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### List Indexes
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.list_indexes dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Upload Documents
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.upload_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
### Search Documents
```
java --enable-preview -jar ${workspace_loc:/nosqlbench}/nb5/target/nb5.jar azure_aisearch_vectors_live azure_aisearch_vectors.search_documents dimensions=25 similarity_function=cosine testsize=10000 trainsize=1183514 dataset=glove-25-angular filetype=hdf5 collection=glove_25 azureaisearchhost=https://stratperf-aisearch-central-india-free-tier.search.windows.net token_file=${workspace_loc:/nosqlbench}/local/azure_aisearch/apikey --progress console:1s -v --add-labels "dimensions:25,dataset=glove-25" --add-labels="target:azureaisearch,instance:vectors,vendor:azureaisearch" --report-prompush-to https://vector-perf.feat.apps.paas.datastax.com:8427/api/v1/import/prometheus/metrics/job/nosqlbench/instance/vectors --annotators "[{'type':'log','level':'info'},{'type':'grafana','baseurl':'https://vector-perf.feat.apps.paas.datastax.com/'}]" --report-interval 10 --show-stacktraces --logs-max 5
```
## Activity Parameters
The following parameters must be supplied to the adapter at runtime in order to successfully connect to an
instance of the [Azure AI Search database](https://learn.microsoft.com/en-us/rest/api/searchservice/?view=rest-searchservice-2024-07-01):
* `token` - In order to use the Weaviate database you must have an account. Once the account is created you can [request
an api key/token](https://weaviate.io/developers/wcs/quickstart#explore-the-details-panel). This key will need to be
provided any time a database connection is desired. Alternatively, the api key can be stored in a file securely and
referenced via the `token_file` config option pointing to the path of the file.
* `endpoint` - When a collection/index is created in the database the URI (aka endpoint) must be specified as well. The adapter will
use the default value of `localhost:8080` if none is provided at runtime.
* `api_version` - the api version to be used by the search client. Defaults to the latest service/api version supported
by the version of client SDK.
## Op Templates
The Azure AI Search adapter supports [**all basic operations**](../java/io/nosqlbench/adapter/azure-aisearch/ops) supported by the [Java
client SDK published by Azure AI Search](https://github.com/weaviate/java-client). The official Azure AI Search API reference can be
found at https://learn.microsoft.com/en-us/rest/api/searchservice/operation-groups?view=rest-searchservice-2024-07-01.
The operations include a full-fledged support for key APIs available in the Java SDK client.
The following are a couple high level API operations.
* Create or Update Index
* Delete Index
* List Indexes
* Upload Documents (vectors)
* (Vector) Search Documents (vectors)
## Examples
Check out the [full example workload available here](./activities/azure_aisearch_vectors_live.yaml).
---

View File

@ -55,7 +55,7 @@
<dependency>
<groupId>io.milvus</groupId>
<artifactId>milvus-sdk-java</artifactId>
<version>2.4.1</version>
<version>2.3.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/ch.qos.reload4j/reload4j replaces log4j 1.X-->
<dependency>

View File

@ -74,7 +74,7 @@
<profile>
<id>adapter-cqld4-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -102,7 +102,7 @@
<profile>
<id>adapter-http-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -130,7 +130,7 @@
<profile>
<id>adapter-tcp-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -144,7 +144,7 @@
<profile>
<id>adapter-dataapi-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -158,7 +158,7 @@
<profile>
<id>adapter-dynamodb-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -172,7 +172,7 @@
<profile>
<id>adapter-mongodb-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -186,7 +186,7 @@
<profile>
<id>adapter-pulsar-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -200,7 +200,7 @@
<profile>
<id>adapter-s4j-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -214,7 +214,7 @@
<profile>
<id>adapter-neo4j-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -228,7 +228,7 @@
<profile>
<id>adapter-kafka-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -242,7 +242,7 @@
<profile>
<id>adapter-amqp-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -256,7 +256,7 @@
<profile>
<id>adapter-qdrant-include</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
@ -281,6 +281,20 @@
</dependencies>
</profile>
<profile>
<id>adapter-azure-aisearch-include</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapter-azure-aisearch</artifactId>
<version>${revision}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@ -57,7 +57,7 @@
<profile>
<id>adapter-cqld4-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-cqld4</module>
@ -77,7 +77,7 @@
<profile>
<id>adapter-http-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-http</module>
@ -97,7 +97,7 @@
<profile>
<id>adapter-tcp-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-tcp</module>
@ -107,7 +107,7 @@
<profile>
<id>adapter-dynamodb-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-dynamodb</module>
@ -117,7 +117,7 @@
<profile>
<id>adapter-mongodb-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-mongodb</module>
@ -127,7 +127,7 @@
<profile>
<id>adapter-neo4j-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-neo4j</module>
@ -137,7 +137,7 @@
<profile>
<id>adapter-pulsar-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-pulsar</module>
@ -147,7 +147,7 @@
<profile>
<id>adapter-s4j-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-s4j</module>
@ -157,7 +157,7 @@
<profile>
<id>adapter-kafka-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-kafka</module>
@ -167,7 +167,7 @@
<profile>
<id>adapter-amqp-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-amqp</module>
@ -177,7 +177,7 @@
<profile>
<id>adapter-dataapi-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-dataapi</module>
@ -187,7 +187,7 @@
<profile>
<id>adapter-qdrant-module</id>
<activation>
<activeByDefault>false</activeByDefault>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-qdrant</module>
@ -204,5 +204,15 @@
</modules>
</profile>
<profile>
<id>adapter-azure-aisearch-module</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>adapter-azure-aisearch</module>
</modules>
</profile>
</profiles>
</project>