Merge pull request #2012 from nosqlbench/driver/weaviate

Code cleanups in Azure AI Search driver adapter
This commit is contained in:
Madhavan 2024-08-14 23:14:28 -04:00 committed by GitHub
commit 30783a385f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 70 additions and 194 deletions

View File

@ -24,7 +24,6 @@ import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.util.SearchPagedIterable; import com.azure.search.documents.util.SearchPagedIterable;
public class AzureAISearchAdapterUtils { public class AzureAISearchAdapterUtils {
public static final String AZURE_AI_SEARCH = "azure_aisearch"; public static final String AZURE_AI_SEARCH = "azure_aisearch";
public static List<String> splitNames(String input) { public static List<String> splitNames(String input) {
@ -57,9 +56,18 @@ public class AzureAISearchAdapterUtils {
return masked.toString(); return masked.toString();
} }
public String[] responseFieldToStringArray(String fieldName, SearchPagedIterable response) { /**
return response.stream() * Prepares an integer array of the indices of keys containing the result
.map(searchResult -> searchResult.getDocument(SearchDocument.class).get(fieldName).toString()) * vectors.
.toArray(String[]::new); *
* @param field field to search for the index values of the vectors.
* @param response results from which we need to search for the indexes.
* @return an {@code int[]} of the indexes of the vectors.
*/
public static int[] searchDocumentsResponseIdToIntArray(String field, SearchPagedIterable response) {
return response.stream().mapToInt(r -> {
SearchDocument returnObj = r.getDocument(SearchDocument.class);
return Integer.valueOf((String) returnObj.get(field));
}).toArray();
} }
} }

View File

@ -68,8 +68,8 @@ public class AzureAISearchOpMapper implements OpMapper<AzureAISearchBaseOp<?>> {
case upload_documents -> new AzureAISearchUploadDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction); case upload_documents -> new AzureAISearchUploadDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
case search_documents -> new AzureAISearchSearchDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction); case search_documents -> new AzureAISearchSearchDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " + // default -> throw new RuntimeException(
// "mapping parsed op " + op); // "Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " + "mapping parsed op " + op);
}; };
} }
} }

View File

@ -56,7 +56,6 @@ public class AzureAISearchSpace implements AutoCloseable {
private final NBConfiguration cfg; private final NBConfiguration cfg;
protected SearchIndexClient searchIndexClient; protected SearchIndexClient searchIndexClient;
// protected SearchClient searchClient;
/** /**
* Create a new {@code AzureAISearchSpace} Object which stores all stateful * Create a new {@code AzureAISearchSpace} Object which stores all stateful
@ -78,13 +77,6 @@ public class AzureAISearchSpace implements AutoCloseable {
return searchIndexClient; return searchIndexClient;
} }
// public synchronized SearchClient getSearchClient() {
// if (searchClient == null) {
// createSearchClients();
// }
// return searchClient;
// }
private SearchIndexClient createSearchClients() { private SearchIndexClient createSearchClients() {
String uri = cfg.get("endpoint"); String uri = cfg.get("endpoint");
var requiredToken = cfg.getOptional("token_file").map(Paths::get).map(tokenFilePath -> { var requiredToken = cfg.getOptional("token_file").map(Paths::get).map(tokenFilePath -> {
@ -98,27 +90,21 @@ public class AzureAISearchSpace implements AutoCloseable {
}).orElseGet(() -> cfg.getOptional("token").orElseThrow(() -> new RuntimeException( }).orElseGet(() -> cfg.getOptional("token").orElseThrow(() -> new RuntimeException(
"You must provide either a 'token_file' or a 'token' to configure a Azure AI Search client"))); "You must provide either a 'token_file' or a 'token' to configure a Azure AI Search client")));
logger.info("{}: Creating new Azure AI Search Client with (masked) token/key [{}], uri/endpoint [{}]", logger.info(() -> "Creating new Azure AI Search Client with (masked) token/key ["
this.name, AzureAISearchAdapterUtils.maskDigits(requiredToken), uri); + AzureAISearchAdapterUtils.maskDigits(requiredToken) + "], uri/endpoint [" + uri + "]");
var searchIndexClientBuilder = new SearchIndexClientBuilder().endpoint(uri); var searchIndexClientBuilder = new SearchIndexClientBuilder().endpoint(uri);
// var searchClientBuilder = new SearchClientBuilder().endpoint(uri);
if (!requiredToken.isBlank()) { if (!requiredToken.isBlank()) {
searchIndexClientBuilder = searchIndexClientBuilder.credential(new AzureKeyCredential(requiredToken)); searchIndexClientBuilder = searchIndexClientBuilder.credential(new AzureKeyCredential(requiredToken));
// searchClientBuilder = searchClientBuilder.credential(new AzureKeyCredential(requiredToken));
} else { } else {
TokenCredential tokenCredential = new DefaultAzureCredentialBuilder().build(); TokenCredential tokenCredential = new DefaultAzureCredentialBuilder().build();
searchIndexClientBuilder = searchIndexClientBuilder.credential(tokenCredential); searchIndexClientBuilder = searchIndexClientBuilder.credential(tokenCredential);
// searchClientBuilder = searchClientBuilder.credential(tokenCredential);
} }
// Should we leave these below to leverage the SearchServiceVersion.getLatest()? // Should we leave these below to leverage the SearchServiceVersion.getLatest()?
String apiVersion = cfg.getOptional("api_version").orElse(SearchServiceVersion.V2024_07_01.name()); String apiVersion = cfg.getOptional("api_version").orElse(SearchServiceVersion.V2024_07_01.name());
logger.warn( logger.warn(
"Latest search service version supported by this client is '{}', but we're using '{}' version. Ignore this warning if both are same.", () -> "Latest search service version supported by this client is '" + SearchServiceVersion.getLatest()
SearchServiceVersion.getLatest(), apiVersion); + "', but we're using '" + apiVersion + "' version. Ignore this warning if both are same.");
// TODO - try to find a way to get rid of placeholder
// this.searchClient = searchClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion))
// .indexName("PLACEHOLDER").buildClient();
return searchIndexClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)).buildClient(); return searchIndexClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)).buildClient();
} }

View File

@ -31,30 +31,24 @@ public abstract class AzureAISearchBaseOp<T> implements CycleOp<Object> {
protected final static Logger logger = LogManager.getLogger(AzureAISearchBaseOp.class); protected final static Logger logger = LogManager.getLogger(AzureAISearchBaseOp.class);
protected final SearchIndexClient searchIndexClient; protected final SearchIndexClient searchIndexClient;
// protected final SearchClient searchClient;
protected final T request; protected final T request;
protected final LongFunction<Object> apiCall; protected final LongFunction<Object> apiCall;
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam) { public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam) {
this.searchIndexClient = searchIndexClient; this.searchIndexClient = searchIndexClient;
// TODO - figure out how to do this cleanly
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
this.request = requestParam; this.request = requestParam;
this.apiCall = this::applyOp; this.apiCall = this::applyOp;
} }
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam, LongFunction<Object> call) { public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam, LongFunction<Object> call) {
this.searchIndexClient = searchIndexClient; this.searchIndexClient = searchIndexClient;
// TODO - figure out how to do this cleanly
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
this.request = requestParam; this.request = requestParam;
this.apiCall = call; this.apiCall = call;
} }
@SuppressWarnings("unchecked")
@Override @Override
public final Object apply(long value) { public final Object apply(long value) {
logger.trace("applying op: " + this); logger.trace(() -> "applying op: " + this);
try { try {
Object result = applyOp(value); Object result = applyOp(value);

View File

@ -28,16 +28,8 @@ public class AzureAISearchCreateOrUpdateIndexOp extends AzureAISearchBaseOp<Sear
public Object applyOp(long value) { public Object applyOp(long value) {
SearchIndex createResponse = null; SearchIndex createResponse = null;
try { try {
if (logger.isDebugEnabled()) {
request.getFields().forEach((field) -> {
logger.debug(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
field.getVectorSearchDimensions());
});
}
createResponse = searchIndexClient.createOrUpdateIndex(request); createResponse = searchIndexClient.createOrUpdateIndex(request);
logger.debug("Successfully created the collection with return code of {}", createResponse.toString()); logger.debug("Successfully created the collection with return response: {}", createResponse.toString());
} catch (RuntimeException rte) { } catch (RuntimeException rte) {
throw rte; throw rte;
} }

View File

@ -30,12 +30,11 @@ public class AzureAISearchListIndexesOp extends AzureAISearchBaseOp<String> {
try { try {
PagedIterable<SearchIndex> response = searchIndexClient.listIndexes(); PagedIterable<SearchIndex> response = searchIndexClient.listIndexes();
response.forEach((index) -> { response.forEach((index) -> {
logger.info("Indexes available are: Name: {}, ETag: {}", index.getName(), index.getETag()); logger.info(() -> "Indexes available are: Name: " + index.getName() + ", ETag: " + index.getETag());
index.getFields().forEach(field -> { index.getFields().forEach(field -> {
logger.info( logger.info(() -> "Field Name: " + field.getName() + ", Field isKey?: " + field.isKey()
"Field Name: {}, Field isKey?: {}, Field Dimension: {}, Field Vector Search Profile: {}", + ", Field Dimension: " + field.getVectorSearchDimensions()
field.getName(), field.isKey(), field.getVectorSearchDimensions(), + ", Field Vector Search Profile: " + field.getVectorSearchProfileName());
field.getVectorSearchProfileName());
}); });
}); });
} catch (RuntimeException rte) { } catch (RuntimeException rte) {

View File

@ -17,7 +17,6 @@ package io.nosqlbench.adapter.azureaisearch.ops;
import com.azure.core.util.Context; import com.azure.core.util.Context;
import com.azure.search.documents.SearchClient; import com.azure.search.documents.SearchClient;
import com.azure.search.documents.SearchDocument;
import com.azure.search.documents.indexes.SearchIndexClient; import com.azure.search.documents.indexes.SearchIndexClient;
import com.azure.search.documents.models.SearchOptions; import com.azure.search.documents.models.SearchOptions;
import com.azure.search.documents.util.SearchPagedIterable; import com.azure.search.documents.util.SearchPagedIterable;
@ -36,14 +35,6 @@ public class AzureAISearchSearchDocumentsOp extends AzureAISearchClientBaseOp<Se
searchDocsResponse = searchClient.search(null, // we've not implemented other complex searches yet here. searchDocsResponse = searchClient.search(null, // we've not implemented other complex searches yet here.
request, request,
Context.NONE); Context.NONE);
if (logger.isInfoEnabled()) {
searchDocsResponse.forEach((r) -> {
SearchDocument doc = r.getDocument(SearchDocument.class);
logger.debug(
"Successfully searched the index and returned id: {}, score: {}, vector embedding: {}",
doc.get("id"), r.getScore(), doc.get("value"));
});
}
} catch (RuntimeException rte) { } catch (RuntimeException rte) {
throw rte; throw rte;
} }

View File

@ -33,18 +33,12 @@ public class AzureAISearchUploadDocumentsOp extends AzureAISearchClientBaseOp<Se
public Object applyOp(long value) { public Object applyOp(long value) {
IndexDocumentsResult uploadDocsResponse = null; IndexDocumentsResult uploadDocsResponse = null;
try { try {
// request.getFields().forEach((field) -> {
// logger.info(
// ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>[AzureAISearchUploadDocumentsOp] SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
// field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
// field.getVectorSearchDimensions());
// });
uploadDocsResponse = searchClient.uploadDocuments(List.of(request)); uploadDocsResponse = searchClient.uploadDocuments(List.of(request));
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {
uploadDocsResponse.getResults().forEach((r) -> { uploadDocsResponse.getResults().forEach((r) -> {
logger.debug( logger.debug(() -> "Successfully created the collection with return status code: "
"Successfully created the collection with return status code: {}, key: {}, succeeded?: {}, error message: {}", + r.getStatusCode() + ", key: " + r.getKey() + ", succeeded?: " + r.isSucceeded()
r.getStatusCode(), r.getKey(), r.isSucceeded(), r.getErrorMessage()); + ", error message: " + r.getErrorMessage());
}); });
} }
} catch (RuntimeException rte) { } catch (RuntimeException rte) {

View File

@ -66,7 +66,6 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
@Override @Override
public LongFunction<SearchIndex> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op, public LongFunction<SearchIndex> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
LongFunction<String> targetF) { LongFunction<String> targetF) {
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>getParamFunc");
LongFunction<SearchIndex> ebF = l -> new SearchIndex(targetF.apply(l)); LongFunction<SearchIndex> ebF = l -> new SearchIndex(targetF.apply(l));
Optional<LongFunction<Map>> fieldsMapF = op.getAsOptionalFunction("fields", Map.class); Optional<LongFunction<Map>> fieldsMapF = op.getAsOptionalFunction("fields", Map.class);
@ -94,28 +93,16 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
Map<String, Object> vsMap = mapLongFunc.apply(l); Map<String, Object> vsMap = mapLongFunc.apply(l);
VectorSearch vectorSearch = new VectorSearch(); VectorSearch vectorSearch = new VectorSearch();
vsMap.forEach((vsField, vsValue) -> { vsMap.forEach((vsField, vsValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>vsField:{} vsValue:{}",
vsField, vsValue);
if (vsValue instanceof Map) { if (vsValue instanceof Map) {
((Map<String, Object>) vsValue).forEach((innerKey, innerValue) -> { ((Map<String, Object>) vsValue).forEach((innerKey, innerValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>innerKey:{} innerValue:{}",
innerKey, innerValue);
if ("compressions".equals(vsField)) { if ("compressions".equals(vsField)) {
List<VectorSearchCompression> vsCompList = new ArrayList<>(); List<VectorSearchCompression> vsCompList = new ArrayList<>();
String kind; String kind;
if (((Map<String, Object>) innerValue).containsKey("kind")) { if (((Map<String, Object>) innerValue).containsKey("kind")) {
kind = (String) ((Map<String, Object>) innerValue).get("kind"); kind = (String) ((Map<String, Object>) innerValue).get("kind");
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions>>>>kind:{}",
kind);
if (kind.equals("scalarQuantization")) { if (kind.equals("scalarQuantization")) {
ScalarQuantizationCompression sqComp = new ScalarQuantizationCompression(innerKey); ScalarQuantizationCompression sqComp = new ScalarQuantizationCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> { ((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
kind, compressKey, compressValue);
if (compressKey.equals("kind")) { if (compressKey.equals("kind")) {
sqComp.getKind().fromString((String) compressValue); sqComp.getKind().fromString((String) compressValue);
} }
@ -137,15 +124,11 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
} }
}); });
vsCompList.add(sqComp); vsCompList.add(sqComp);
// vsCompList.add(buildVectorSearchCompression(bqComp, compressKey, compressValue, true));
} else { } else {
// BinaryQuantization is assumed here // BinaryQuantization is assumed here
BinaryQuantizationCompression bqComp = new BinaryQuantizationCompression(innerKey); BinaryQuantizationCompression bqComp = new BinaryQuantizationCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> { ((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
kind, compressKey, compressValue);
if (compressKey.equals("kind")) { if (compressKey.equals("kind")) {
bqComp.getKind().fromString((String) compressValue); bqComp.getKind().fromString((String) compressValue);
} }
@ -157,15 +140,10 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
} }
}); });
vsCompList.add(bqComp); vsCompList.add(bqComp);
// vsCompList.add(
// buildVectorSearchCompression(bqComp, compressKey, compressValue, false));
} }
} else { } else {
VectorSearchCompression vsComp = new VectorSearchCompression(innerKey); VectorSearchCompression vsComp = new VectorSearchCompression(innerKey);
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> { ((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
null, compressKey, compressValue);
if (compressKey.equals("kind")) { if (compressKey.equals("kind")) {
vsComp.getKind().fromString((String) compressValue); vsComp.getKind().fromString((String) compressValue);
} }
@ -179,33 +157,18 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
vsCompList.add(vsComp); vsCompList.add(vsComp);
} }
vectorSearch.setCompressions(vsCompList); vectorSearch.setCompressions(vsCompList);
vectorSearch.getCompressions().forEach((comp) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions FINAL: Name:{}",
comp.getCompressionName());
});
} }
if ("algorithms".equals(vsField)) { if ("algorithms".equals(vsField)) {
List<VectorSearchAlgorithmConfiguration> vsAlgoList = new ArrayList<>(); List<VectorSearchAlgorithmConfiguration> vsAlgoList = new ArrayList<>();
String kind; String kind;
if (((Map<String, Object>) innerValue).containsKey("kind")) { if (((Map<String, Object>) innerValue).containsKey("kind")) {
kind = (String) ((Map<String, Object>) innerValue).get("kind"); kind = (String) ((Map<String, Object>) innerValue).get("kind");
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{}",
kind);
if("hnsw".equals(kind)) { if("hnsw".equals(kind)) {
HnswAlgorithmConfiguration hnswAlgoConf = new HnswAlgorithmConfiguration(innerKey); HnswAlgorithmConfiguration hnswAlgoConf = new HnswAlgorithmConfiguration(innerKey);
((Map<String, Object>) innerValue).forEach((hnswKey, hnswValue) -> { ((Map<String, Object>) innerValue).forEach((hnswKey, hnswValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{}",
kind, hnswKey, hnswValue);
if ("hnswParameters".equals(hnswKey)) { if ("hnswParameters".equals(hnswKey)) {
((Map<String, Object>) innerValue) ((Map<String, Object>) innerValue)
.forEach((hnswParamsKey, hnswParamsValue) -> { .forEach((hnswParamsKey, hnswParamsValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{} hnswParamsKey:{} hnswParamsValue:{}",
kind, hnswKey, hnswValue, hnswParamsKey,
hnswParamsValue);
HnswParameters hnswParams = new HnswParameters(); HnswParameters hnswParams = new HnswParameters();
if ("m".equals(hnswParamsKey)) { if ("m".equals(hnswParamsKey)) {
hnswParams.setM(((Number) hnswParamsValue).intValue()); hnswParams.setM(((Number) hnswParamsValue).intValue());
@ -232,15 +195,9 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
ExhaustiveKnnAlgorithmConfiguration exhausKnnAlgoConf = new ExhaustiveKnnAlgorithmConfiguration( ExhaustiveKnnAlgorithmConfiguration exhausKnnAlgoConf = new ExhaustiveKnnAlgorithmConfiguration(
innerKey); innerKey);
((Map<String, Object>) innerValue).forEach((algoKey, algoValue) -> { ((Map<String, Object>) innerValue).forEach((algoKey, algoValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{}",
kind, algoKey, algoValue);
if (algoKey.equals("exhaustiveKnnParameters")) { if (algoKey.equals("exhaustiveKnnParameters")) {
ExhaustiveKnnParameters eKnnParms = new ExhaustiveKnnParameters(); ExhaustiveKnnParameters eKnnParms = new ExhaustiveKnnParameters();
((Map<String, Object>) algoValue).forEach((ekpKey, ekpVal) -> { ((Map<String, Object>) algoValue).forEach((ekpKey, ekpVal) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{} ekpKey:{} ekpVal:{}",
kind, algoKey, algoValue, ekpKey, ekpVal);
if (ekpKey.equals("quantizedDataType")) { if (ekpKey.equals("quantizedDataType")) {
eKnnParms.setMetric( eKnnParms.setMetric(
VectorSearchAlgorithmMetric.fromString((String) ekpVal)); VectorSearchAlgorithmMetric.fromString((String) ekpVal));
@ -253,45 +210,21 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
} }
} }
vectorSearch.setAlgorithms(vsAlgoList); vectorSearch.setAlgorithms(vsAlgoList);
vectorSearch.getAlgorithms().forEach((algo) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms FINAL: Name:{}",
algo.getName());
});
} }
if ("profiles".equals(vsField)) { if ("profiles".equals(vsField)) {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles");
List<VectorSearchProfile> vsProfileList = new ArrayList<>(); List<VectorSearchProfile> vsProfileList = new ArrayList<>();
// VectorSearchProfile vsProfile = new VectorSearchProfile(innerKey, null);
((Map<String, Object>) vsValue).forEach((profKey, profVal) -> { ((Map<String, Object>) vsValue).forEach((profKey, profVal) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{}",
profKey, profVal);
((Map<String, Object>) profVal).forEach((pK, pV) -> { ((Map<String, Object>) profVal).forEach((pK, pV) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{} pK:{} pV:{}",
profKey, profVal, pK, pV);
if ("algorithm".equals(pK)) { if ("algorithm".equals(pK)) {
vsProfile = new VectorSearchProfile(profKey, (String) pV); vsProfile = new VectorSearchProfile(profKey, (String) pV);
} }
if ("compression".equals(pK)) { if ("compression".equals(pK)) {
vsProfile.setCompressionName((String) pV); vsProfile.setCompressionName((String) pV);
} }
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: Name:{}>>>AlgoName:{}>>>CompressionName:{}",
vsProfile.getName(), vsProfile.getAlgorithmConfigurationName(),
vsProfile.getCompressionName());
}); });
vsProfileList.add(vsProfile); vsProfileList.add(vsProfile);
}); });
vectorSearch.setProfiles(vsProfileList); vectorSearch.setProfiles(vsProfileList);
vectorSearch.getProfiles().forEach((profile) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles FINAL: Name:{} AlgorithmConfName:{}",
profile.getName(), profile.getAlgorithmConfigurationName());
});
} }
}); });
} else { } else {
@ -304,48 +237,15 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
}).orElse(null); }).orElse(null);
} }
@SuppressWarnings({ "unchecked", "static-access" })
private VectorSearchCompression buildVectorSearchCompression(VectorSearchCompression vsComp, String key, Object val,
boolean isSQ) {
if (key.equals("kind")) {
vsComp.getKind().fromString((String) val);
}
if (key.equals("rerankWithOriginalVectors")) {
vsComp.setRerankWithOriginalVectors((Boolean) val);
}
if (key.equals("defaultOversampling")) {
vsComp.setDefaultOversampling(((Number) val).doubleValue());
}
if (isSQ) {
if (key.equals("scalarQuantizationParameters")) {
ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters();
((Map<String, Object>) val).forEach((sqKey, sqVal) -> {
if (sqKey.equals("quantizedDataType")) {
sqParams.setQuantizedDataType(VectorSearchCompressionTarget.fromString((String) sqVal));
}
});
((ScalarQuantizationCompression) vsComp).setParameters(sqParams);
}
}
return vsComp;
}
@SuppressWarnings({ "unchecked", "rawtypes" }) @SuppressWarnings({ "unchecked", "rawtypes" })
private LongFunction<List<SearchField>> buildFieldsStruct(ParsedOp op) { private LongFunction<List<SearchField>> buildFieldsStruct(ParsedOp op) {
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct");
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("fields", Map.class); Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("fields", Map.class);
return baseFunc.<LongFunction<List<SearchField>>>map(mapLongFunc -> l -> { return baseFunc.<LongFunction<List<SearchField>>>map(mapLongFunc -> l -> {
Map<String, Object> fMap = mapLongFunc.apply(l); Map<String, Object> fMap = mapLongFunc.apply(l);
List<SearchField> fieldsList = new ArrayList<>(); List<SearchField> fieldsList = new ArrayList<>();
fMap.forEach((fName, fValue) -> { fMap.forEach((fName, fValue) -> {
if (fValue instanceof Map) { if (fValue instanceof Map) {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{}",
fName, fValue);
((Map<String, Object>) fValue).forEach((innerKey, innerValue) -> { ((Map<String, Object>) fValue).forEach((innerKey, innerValue) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{} fName:{} fValue:{}",
fName, fValue, innerKey, innerValue);
if (innerKey.equals("type")) { if (innerKey.equals("type")) {
searchField = new SearchField(fName, SearchFieldDataType.fromString((String) innerValue)); searchField = new SearchField(fName, SearchFieldDataType.fromString((String) innerValue));
} }
@ -357,8 +257,6 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
} }
if (innerKey.equals("vectorSearchProfile")) { if (innerKey.equals("vectorSearchProfile")) {
searchField.setVectorSearchProfileName((String) innerValue); searchField.setVectorSearchProfileName((String) innerValue);
logger.debug("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {} %n",
searchField.getVectorSearchProfileName());
} }
if (innerKey.equals("filterable")) { if (innerKey.equals("filterable")) {
searchField.setFilterable((Boolean) innerValue); searchField.setFilterable((Boolean) innerValue);
@ -386,13 +284,6 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
+ fValue.getClass().getSimpleName() + " instead for the inner value"); + fValue.getClass().getSimpleName() + " instead for the inner value");
} }
fieldsList.add(searchField); fieldsList.add(searchField);
if (logger.isDebugEnabled()) {
fieldsList.forEach((field) -> {
logger.debug(
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>> fields FINAL: Name:{} VSProfileName:{}",
field.getName(), field.getVectorSearchProfileName());
});
}
}); });
return fieldsList; return fieldsList;
}).orElse(null); }).orElse(null);

View File

@ -24,6 +24,9 @@ import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchListIndexesOp; import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchListIndexesOp;
import io.nosqlbench.adapters.api.templating.ParsedOp; import io.nosqlbench.adapters.api.templating.ParsedOp;
/**
* Lists the indexes available.
*/
public class AzureAISearchListIndexesOpDispenser extends AzureAISearchBaseOpDispenser<String> { public class AzureAISearchListIndexesOpDispenser extends AzureAISearchBaseOpDispenser<String> {
public AzureAISearchListIndexesOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, public AzureAISearchListIndexesOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
LongFunction<String> targetF) { LongFunction<String> targetF) {

View File

@ -33,8 +33,12 @@ import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.nb.api.errors.OpConfigError; import io.nosqlbench.nb.api.errors.OpConfigError;
/** /**
* @see https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery * @see <a href=
* @see https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request * "https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery">
* Search GET API<a/>
* @see <a href=
* "https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request">How
* to query/vector search</a>
*/ */
public class AzureAISearchSearchDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchOptions> { public class AzureAISearchSearchDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchOptions> {
public AzureAISearchSearchDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op, public AzureAISearchSearchDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,

View File

@ -37,7 +37,8 @@ scenarios:
run tags==block:upload_documents run tags==block:upload_documents
errors===warn,counter errors===warn,counter
cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO) cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO)
token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost) endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
#token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost)
search_documents: >- search_documents: >-
run tags==block:search_documents run tags==block:search_documents
@ -94,6 +95,18 @@ blocks:
facetable: false facetable: false
retrievable: true retrievable: true
hidden: false hidden: false
# Caused by: com.azure.core.exception.HttpResponseException: Status code 400, "{"error":{"code":"OperationNotAllowed","message":"The request is invalid. Details: definition : The searchable field 'id' must be of type Edm.String or Collection(Edm.String) or Collection(Edm.Single).","details":[{"code":"CannotEnableFieldForSearching","message":"The searchable field 'id' must be of type Edm.String or Collection(Edm.String) or Collection(Edm.Single). Parameters: definition"}]}}"
# Caused by: com.azure.core.exception.HttpResponseException: Status code 400, "{"error":{"code":"InvalidRequestParameter","message":"The request is invalid. Details: definition : The key field 'id' must be of type Edm.String.","details":[{"code":"InvalidKeyField","message":"The key field 'id' must be of type Edm.String. Parameters: definition"}]}}"
# Caused by: com.azure.core.exception.HttpResponseException: Status code 400, "{"error":{"code":"InvalidRequestParameter","message":"The request is invalid. Details: definition : The key field 'id' is marked as non-retrievable. Please set the 'retrievable' property for this field to 'true' or leave it unset.","details":[{"code":"InvalidKeyField","message":"The key field 'id' is marked as non-retrievable. Please set the 'retrievable' property for this field to 'true' or leave it unset. Parameters: definition"}]}}"
# dummy_key:
# type: "Edm.Int32" # Data types - https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types#edm-data-types-for-vector-fields
# key: false
# filterable: true
# sortable: true
# searchable: true
# facetable: false
# retrievable: true
# hidden: false
value: value:
type: "Collection(Edm.Single)" type: "Collection(Edm.Single)"
dimensions: TEMPLATE(dimensions) dimensions: TEMPLATE(dimensions)
@ -139,6 +152,7 @@ blocks:
upload_documents: "TEMPLATE(collection)" upload_documents: "TEMPLATE(collection)"
fields: fields:
id: "{row_key}" id: "{row_key}"
# dummy_key: "{id_val}"
value: "{train_floatlist_TEMPLATE(filetype)}" value: "{train_floatlist_TEMPLATE(filetype)}"
search_documents: search_documents:
@ -165,8 +179,8 @@ blocks:
} }
verifier: | verifier: |
// driver-specific function // driver-specific function
actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.responseFieldToStringArray("id",result) actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.searchDocumentsResponseIdToIntArray("id",result)
System.out.println("actual_indices ------>>>>: " + actual_indices); // System.out.println("actual_indices ------>>>>: " + actual_indices);
// driver-agnostic function // driver-agnostic function
relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices); relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices);
// because we are "verifying" although this needs to be reorganized // because we are "verifying" although this needs to be reorganized