mirror of
https://github.com/nosqlbench/nosqlbench.git
synced 2025-02-25 18:55:28 -06:00
Merge pull request #2012 from nosqlbench/driver/weaviate
Code cleanups in Azure AI Search driver adapter
This commit is contained in:
commit
30783a385f
@ -24,13 +24,12 @@ import com.azure.search.documents.SearchDocument;
|
||||
import com.azure.search.documents.util.SearchPagedIterable;
|
||||
|
||||
public class AzureAISearchAdapterUtils {
|
||||
|
||||
public static final String AZURE_AI_SEARCH = "azure_aisearch";
|
||||
|
||||
public static List<String> splitNames(String input) {
|
||||
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
|
||||
return Arrays.stream(input.split("( +| *, *)")).filter(StringUtils::isNotBlank).toList();
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Long> splitLongs(String input) {
|
||||
assert StringUtils.isNotBlank(input) && StringUtils.isNotEmpty(input);
|
||||
@ -57,9 +56,18 @@ public class AzureAISearchAdapterUtils {
|
||||
return masked.toString();
|
||||
}
|
||||
|
||||
public String[] responseFieldToStringArray(String fieldName, SearchPagedIterable response) {
|
||||
return response.stream()
|
||||
.map(searchResult -> searchResult.getDocument(SearchDocument.class).get(fieldName).toString())
|
||||
.toArray(String[]::new);
|
||||
/**
|
||||
* Prepares an integer array of the indices of keys containing the result
|
||||
* vectors.
|
||||
*
|
||||
* @param field field to search for the index values of the vectors.
|
||||
* @param response results from which we need to search for the indexes.
|
||||
* @return an {@code int[]} of the indexes of the vectors.
|
||||
*/
|
||||
public static int[] searchDocumentsResponseIdToIntArray(String field, SearchPagedIterable response) {
|
||||
return response.stream().mapToInt(r -> {
|
||||
SearchDocument returnObj = r.getDocument(SearchDocument.class);
|
||||
return Integer.valueOf((String) returnObj.get(field));
|
||||
}).toArray();
|
||||
}
|
||||
}
|
@ -68,8 +68,8 @@ public class AzureAISearchOpMapper implements OpMapper<AzureAISearchBaseOp<?>> {
|
||||
case upload_documents -> new AzureAISearchUploadDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
|
||||
case search_documents -> new AzureAISearchSearchDocumentsOpDispenser(adapter, op, typeAndTarget.targetFunction);
|
||||
|
||||
// default -> throw new RuntimeException("Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " +
|
||||
// "mapping parsed op " + op);
|
||||
// default -> throw new RuntimeException(
|
||||
// "Unrecognized op type '" + typeAndTarget.enumId.name() + "' while " + "mapping parsed op " + op);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -56,7 +56,6 @@ public class AzureAISearchSpace implements AutoCloseable {
|
||||
private final NBConfiguration cfg;
|
||||
|
||||
protected SearchIndexClient searchIndexClient;
|
||||
// protected SearchClient searchClient;
|
||||
|
||||
/**
|
||||
* Create a new {@code AzureAISearchSpace} Object which stores all stateful
|
||||
@ -78,13 +77,6 @@ public class AzureAISearchSpace implements AutoCloseable {
|
||||
return searchIndexClient;
|
||||
}
|
||||
|
||||
// public synchronized SearchClient getSearchClient() {
|
||||
// if (searchClient == null) {
|
||||
// createSearchClients();
|
||||
// }
|
||||
// return searchClient;
|
||||
// }
|
||||
|
||||
private SearchIndexClient createSearchClients() {
|
||||
String uri = cfg.get("endpoint");
|
||||
var requiredToken = cfg.getOptional("token_file").map(Paths::get).map(tokenFilePath -> {
|
||||
@ -98,27 +90,21 @@ public class AzureAISearchSpace implements AutoCloseable {
|
||||
}).orElseGet(() -> cfg.getOptional("token").orElseThrow(() -> new RuntimeException(
|
||||
"You must provide either a 'token_file' or a 'token' to configure a Azure AI Search client")));
|
||||
|
||||
logger.info("{}: Creating new Azure AI Search Client with (masked) token/key [{}], uri/endpoint [{}]",
|
||||
this.name, AzureAISearchAdapterUtils.maskDigits(requiredToken), uri);
|
||||
logger.info(() -> "Creating new Azure AI Search Client with (masked) token/key ["
|
||||
+ AzureAISearchAdapterUtils.maskDigits(requiredToken) + "], uri/endpoint [" + uri + "]");
|
||||
|
||||
var searchIndexClientBuilder = new SearchIndexClientBuilder().endpoint(uri);
|
||||
// var searchClientBuilder = new SearchClientBuilder().endpoint(uri);
|
||||
if (!requiredToken.isBlank()) {
|
||||
searchIndexClientBuilder = searchIndexClientBuilder.credential(new AzureKeyCredential(requiredToken));
|
||||
// searchClientBuilder = searchClientBuilder.credential(new AzureKeyCredential(requiredToken));
|
||||
} else {
|
||||
TokenCredential tokenCredential = new DefaultAzureCredentialBuilder().build();
|
||||
searchIndexClientBuilder = searchIndexClientBuilder.credential(tokenCredential);
|
||||
// searchClientBuilder = searchClientBuilder.credential(tokenCredential);
|
||||
}
|
||||
// Should we leave these below to leverage the SearchServiceVersion.getLatest()?
|
||||
String apiVersion = cfg.getOptional("api_version").orElse(SearchServiceVersion.V2024_07_01.name());
|
||||
logger.warn(
|
||||
"Latest search service version supported by this client is '{}', but we're using '{}' version. Ignore this warning if both are same.",
|
||||
SearchServiceVersion.getLatest(), apiVersion);
|
||||
// TODO - try to find a way to get rid of placeholder
|
||||
// this.searchClient = searchClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion))
|
||||
// .indexName("PLACEHOLDER").buildClient();
|
||||
() -> "Latest search service version supported by this client is '" + SearchServiceVersion.getLatest()
|
||||
+ "', but we're using '" + apiVersion + "' version. Ignore this warning if both are same.");
|
||||
return searchIndexClientBuilder.serviceVersion(SearchServiceVersion.valueOf(apiVersion)).buildClient();
|
||||
}
|
||||
|
||||
|
@ -31,30 +31,24 @@ public abstract class AzureAISearchBaseOp<T> implements CycleOp<Object> {
|
||||
protected final static Logger logger = LogManager.getLogger(AzureAISearchBaseOp.class);
|
||||
|
||||
protected final SearchIndexClient searchIndexClient;
|
||||
// protected final SearchClient searchClient;
|
||||
protected final T request;
|
||||
protected final LongFunction<Object> apiCall;
|
||||
|
||||
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam) {
|
||||
this.searchIndexClient = searchIndexClient;
|
||||
// TODO - figure out how to do this cleanly
|
||||
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
|
||||
this.request = requestParam;
|
||||
this.apiCall = this::applyOp;
|
||||
}
|
||||
|
||||
public AzureAISearchBaseOp(SearchIndexClient searchIndexClient, T requestParam, LongFunction<Object> call) {
|
||||
this.searchIndexClient = searchIndexClient;
|
||||
// TODO - figure out how to do this cleanly
|
||||
// this.searchClient = searchIndexClient.getSearchClient("PLACEHOLDER");
|
||||
this.request = requestParam;
|
||||
this.apiCall = call;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public final Object apply(long value) {
|
||||
logger.trace("applying op: " + this);
|
||||
logger.trace(() -> "applying op: " + this);
|
||||
|
||||
try {
|
||||
Object result = applyOp(value);
|
||||
|
@ -28,16 +28,8 @@ public class AzureAISearchCreateOrUpdateIndexOp extends AzureAISearchBaseOp<Sear
|
||||
public Object applyOp(long value) {
|
||||
SearchIndex createResponse = null;
|
||||
try {
|
||||
if (logger.isDebugEnabled()) {
|
||||
request.getFields().forEach((field) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
|
||||
field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
|
||||
field.getVectorSearchDimensions());
|
||||
});
|
||||
}
|
||||
createResponse = searchIndexClient.createOrUpdateIndex(request);
|
||||
logger.debug("Successfully created the collection with return code of {}", createResponse.toString());
|
||||
logger.debug("Successfully created the collection with return response: {}", createResponse.toString());
|
||||
} catch (RuntimeException rte) {
|
||||
throw rte;
|
||||
}
|
||||
|
@ -30,12 +30,11 @@ public class AzureAISearchListIndexesOp extends AzureAISearchBaseOp<String> {
|
||||
try {
|
||||
PagedIterable<SearchIndex> response = searchIndexClient.listIndexes();
|
||||
response.forEach((index) -> {
|
||||
logger.info("Indexes available are: Name: {}, ETag: {}", index.getName(), index.getETag());
|
||||
logger.info(() -> "Indexes available are: Name: " + index.getName() + ", ETag: " + index.getETag());
|
||||
index.getFields().forEach(field -> {
|
||||
logger.info(
|
||||
"Field Name: {}, Field isKey?: {}, Field Dimension: {}, Field Vector Search Profile: {}",
|
||||
field.getName(), field.isKey(), field.getVectorSearchDimensions(),
|
||||
field.getVectorSearchProfileName());
|
||||
logger.info(() -> "Field Name: " + field.getName() + ", Field isKey?: " + field.isKey()
|
||||
+ ", Field Dimension: " + field.getVectorSearchDimensions()
|
||||
+ ", Field Vector Search Profile: " + field.getVectorSearchProfileName());
|
||||
});
|
||||
});
|
||||
} catch (RuntimeException rte) {
|
||||
|
@ -17,7 +17,6 @@ package io.nosqlbench.adapter.azureaisearch.ops;
|
||||
|
||||
import com.azure.core.util.Context;
|
||||
import com.azure.search.documents.SearchClient;
|
||||
import com.azure.search.documents.SearchDocument;
|
||||
import com.azure.search.documents.indexes.SearchIndexClient;
|
||||
import com.azure.search.documents.models.SearchOptions;
|
||||
import com.azure.search.documents.util.SearchPagedIterable;
|
||||
@ -36,14 +35,6 @@ public class AzureAISearchSearchDocumentsOp extends AzureAISearchClientBaseOp<Se
|
||||
searchDocsResponse = searchClient.search(null, // we've not implemented other complex searches yet here.
|
||||
request,
|
||||
Context.NONE);
|
||||
if (logger.isInfoEnabled()) {
|
||||
searchDocsResponse.forEach((r) -> {
|
||||
SearchDocument doc = r.getDocument(SearchDocument.class);
|
||||
logger.debug(
|
||||
"Successfully searched the index and returned id: {}, score: {}, vector embedding: {}",
|
||||
doc.get("id"), r.getScore(), doc.get("value"));
|
||||
});
|
||||
}
|
||||
} catch (RuntimeException rte) {
|
||||
throw rte;
|
||||
}
|
||||
|
@ -33,18 +33,12 @@ public class AzureAISearchUploadDocumentsOp extends AzureAISearchClientBaseOp<Se
|
||||
public Object applyOp(long value) {
|
||||
IndexDocumentsResult uploadDocsResponse = null;
|
||||
try {
|
||||
// request.getFields().forEach((field) -> {
|
||||
// logger.info(
|
||||
// ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>[AzureAISearchUploadDocumentsOp] SearchIndex: Name:{}-ProfileName:{}-Type:{}-Dimension:{}",
|
||||
// field.getName(), field.getVectorSearchProfileName(), field.getType().toString(),
|
||||
// field.getVectorSearchDimensions());
|
||||
// });
|
||||
uploadDocsResponse = searchClient.uploadDocuments(List.of(request));
|
||||
if (logger.isDebugEnabled()) {
|
||||
uploadDocsResponse.getResults().forEach((r) -> {
|
||||
logger.debug(
|
||||
"Successfully created the collection with return status code: {}, key: {}, succeeded?: {}, error message: {}",
|
||||
r.getStatusCode(), r.getKey(), r.isSucceeded(), r.getErrorMessage());
|
||||
logger.debug(() -> "Successfully created the collection with return status code: "
|
||||
+ r.getStatusCode() + ", key: " + r.getKey() + ", succeeded?: " + r.isSucceeded()
|
||||
+ ", error message: " + r.getErrorMessage());
|
||||
});
|
||||
}
|
||||
} catch (RuntimeException rte) {
|
||||
|
@ -66,7 +66,6 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
@Override
|
||||
public LongFunction<SearchIndex> getParamFunc(LongFunction<SearchIndexClient> clientF, ParsedOp op,
|
||||
LongFunction<String> targetF) {
|
||||
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>getParamFunc");
|
||||
LongFunction<SearchIndex> ebF = l -> new SearchIndex(targetF.apply(l));
|
||||
|
||||
Optional<LongFunction<Map>> fieldsMapF = op.getAsOptionalFunction("fields", Map.class);
|
||||
@ -94,28 +93,16 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
Map<String, Object> vsMap = mapLongFunc.apply(l);
|
||||
VectorSearch vectorSearch = new VectorSearch();
|
||||
vsMap.forEach((vsField, vsValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>vsField:{} vsValue:{}",
|
||||
vsField, vsValue);
|
||||
if (vsValue instanceof Map) {
|
||||
((Map<String, Object>) vsValue).forEach((innerKey, innerValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>innerKey:{} innerValue:{}",
|
||||
innerKey, innerValue);
|
||||
if ("compressions".equals(vsField)) {
|
||||
List<VectorSearchCompression> vsCompList = new ArrayList<>();
|
||||
String kind;
|
||||
if (((Map<String, Object>) innerValue).containsKey("kind")) {
|
||||
kind = (String) ((Map<String, Object>) innerValue).get("kind");
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions>>>>kind:{}",
|
||||
kind);
|
||||
if (kind.equals("scalarQuantization")) {
|
||||
ScalarQuantizationCompression sqComp = new ScalarQuantizationCompression(innerKey);
|
||||
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
|
||||
kind, compressKey, compressValue);
|
||||
if (compressKey.equals("kind")) {
|
||||
sqComp.getKind().fromString((String) compressValue);
|
||||
}
|
||||
@ -137,15 +124,11 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
}
|
||||
});
|
||||
vsCompList.add(sqComp);
|
||||
// vsCompList.add(buildVectorSearchCompression(bqComp, compressKey, compressValue, true));
|
||||
} else {
|
||||
// BinaryQuantization is assumed here
|
||||
|
||||
BinaryQuantizationCompression bqComp = new BinaryQuantizationCompression(innerKey);
|
||||
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
|
||||
kind, compressKey, compressValue);
|
||||
if (compressKey.equals("kind")) {
|
||||
bqComp.getKind().fromString((String) compressValue);
|
||||
}
|
||||
@ -157,15 +140,10 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
}
|
||||
});
|
||||
vsCompList.add(bqComp);
|
||||
// vsCompList.add(
|
||||
// buildVectorSearchCompression(bqComp, compressKey, compressValue, false));
|
||||
}
|
||||
} else {
|
||||
VectorSearchCompression vsComp = new VectorSearchCompression(innerKey);
|
||||
((Map<String, Object>) innerValue).forEach((compressKey, compressValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>kind:{} compressKey:{} compressValue:{}",
|
||||
null, compressKey, compressValue);
|
||||
if (compressKey.equals("kind")) {
|
||||
vsComp.getKind().fromString((String) compressValue);
|
||||
}
|
||||
@ -179,33 +157,18 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
vsCompList.add(vsComp);
|
||||
}
|
||||
vectorSearch.setCompressions(vsCompList);
|
||||
vectorSearch.getCompressions().forEach((comp) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>compressions FINAL: Name:{}",
|
||||
comp.getCompressionName());
|
||||
});
|
||||
}
|
||||
if ("algorithms".equals(vsField)) {
|
||||
List<VectorSearchAlgorithmConfiguration> vsAlgoList = new ArrayList<>();
|
||||
String kind;
|
||||
if (((Map<String, Object>) innerValue).containsKey("kind")) {
|
||||
kind = (String) ((Map<String, Object>) innerValue).get("kind");
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{}",
|
||||
kind);
|
||||
if("hnsw".equals(kind)) {
|
||||
HnswAlgorithmConfiguration hnswAlgoConf = new HnswAlgorithmConfiguration(innerKey);
|
||||
((Map<String, Object>) innerValue).forEach((hnswKey, hnswValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{}",
|
||||
kind, hnswKey, hnswValue);
|
||||
if ("hnswParameters".equals(hnswKey)) {
|
||||
((Map<String, Object>) innerValue)
|
||||
.forEach((hnswParamsKey, hnswParamsValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} hnswKey:{} hnswValue:{} hnswParamsKey:{} hnswParamsValue:{}",
|
||||
kind, hnswKey, hnswValue, hnswParamsKey,
|
||||
hnswParamsValue);
|
||||
HnswParameters hnswParams = new HnswParameters();
|
||||
if ("m".equals(hnswParamsKey)) {
|
||||
hnswParams.setM(((Number) hnswParamsValue).intValue());
|
||||
@ -232,15 +195,9 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
ExhaustiveKnnAlgorithmConfiguration exhausKnnAlgoConf = new ExhaustiveKnnAlgorithmConfiguration(
|
||||
innerKey);
|
||||
((Map<String, Object>) innerValue).forEach((algoKey, algoValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{}",
|
||||
kind, algoKey, algoValue);
|
||||
if (algoKey.equals("exhaustiveKnnParameters")) {
|
||||
ExhaustiveKnnParameters eKnnParms = new ExhaustiveKnnParameters();
|
||||
((Map<String, Object>) algoValue).forEach((ekpKey, ekpVal) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms>>>>kind:{} algoKey:{} algoValue:{} ekpKey:{} ekpVal:{}",
|
||||
kind, algoKey, algoValue, ekpKey, ekpVal);
|
||||
if (ekpKey.equals("quantizedDataType")) {
|
||||
eKnnParms.setMetric(
|
||||
VectorSearchAlgorithmMetric.fromString((String) ekpVal));
|
||||
@ -253,45 +210,21 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
}
|
||||
}
|
||||
vectorSearch.setAlgorithms(vsAlgoList);
|
||||
vectorSearch.getAlgorithms().forEach((algo) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>algorithms FINAL: Name:{}",
|
||||
algo.getName());
|
||||
});
|
||||
}
|
||||
if ("profiles".equals(vsField)) {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles");
|
||||
List<VectorSearchProfile> vsProfileList = new ArrayList<>();
|
||||
// VectorSearchProfile vsProfile = new VectorSearchProfile(innerKey, null);
|
||||
((Map<String, Object>) vsValue).forEach((profKey, profVal) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{}",
|
||||
profKey, profVal);
|
||||
((Map<String, Object>) profVal).forEach((pK, pV) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: profKey:{} profVal:{} pK:{} pV:{}",
|
||||
profKey, profVal, pK, pV);
|
||||
if ("algorithm".equals(pK)) {
|
||||
vsProfile = new VectorSearchProfile(profKey, (String) pV);
|
||||
}
|
||||
if ("compression".equals(pK)) {
|
||||
vsProfile.setCompressionName((String) pV);
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles: Name:{}>>>AlgoName:{}>>>CompressionName:{}",
|
||||
vsProfile.getName(), vsProfile.getAlgorithmConfigurationName(),
|
||||
vsProfile.getCompressionName());
|
||||
});
|
||||
vsProfileList.add(vsProfile);
|
||||
});
|
||||
vectorSearch.setProfiles(vsProfileList);
|
||||
vectorSearch.getProfiles().forEach((profile) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>VectorSearch>>>>buildVectorSearchStruct>>>>profiles FINAL: Name:{} AlgorithmConfName:{}",
|
||||
profile.getName(), profile.getAlgorithmConfigurationName());
|
||||
});
|
||||
}
|
||||
});
|
||||
} else {
|
||||
@ -304,48 +237,15 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
}).orElse(null);
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "unchecked", "static-access" })
|
||||
private VectorSearchCompression buildVectorSearchCompression(VectorSearchCompression vsComp, String key, Object val,
|
||||
boolean isSQ) {
|
||||
if (key.equals("kind")) {
|
||||
vsComp.getKind().fromString((String) val);
|
||||
}
|
||||
if (key.equals("rerankWithOriginalVectors")) {
|
||||
vsComp.setRerankWithOriginalVectors((Boolean) val);
|
||||
}
|
||||
if (key.equals("defaultOversampling")) {
|
||||
vsComp.setDefaultOversampling(((Number) val).doubleValue());
|
||||
}
|
||||
if (isSQ) {
|
||||
if (key.equals("scalarQuantizationParameters")) {
|
||||
ScalarQuantizationParameters sqParams = new ScalarQuantizationParameters();
|
||||
((Map<String, Object>) val).forEach((sqKey, sqVal) -> {
|
||||
if (sqKey.equals("quantizedDataType")) {
|
||||
sqParams.setQuantizedDataType(VectorSearchCompressionTarget.fromString((String) sqVal));
|
||||
}
|
||||
});
|
||||
((ScalarQuantizationCompression) vsComp).setParameters(sqParams);
|
||||
}
|
||||
}
|
||||
return vsComp;
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||
private LongFunction<List<SearchField>> buildFieldsStruct(ParsedOp op) {
|
||||
logger.debug(">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct");
|
||||
Optional<LongFunction<Map>> baseFunc = op.getAsOptionalFunction("fields", Map.class);
|
||||
return baseFunc.<LongFunction<List<SearchField>>>map(mapLongFunc -> l -> {
|
||||
Map<String, Object> fMap = mapLongFunc.apply(l);
|
||||
List<SearchField> fieldsList = new ArrayList<>();
|
||||
fMap.forEach((fName, fValue) -> {
|
||||
if (fValue instanceof Map) {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{}",
|
||||
fName, fValue);
|
||||
((Map<String, Object>) fValue).forEach((innerKey, innerValue) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>>fName:{} fValue:{} fName:{} fValue:{}",
|
||||
fName, fValue, innerKey, innerValue);
|
||||
if (innerKey.equals("type")) {
|
||||
searchField = new SearchField(fName, SearchFieldDataType.fromString((String) innerValue));
|
||||
}
|
||||
@ -357,8 +257,6 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
}
|
||||
if (innerKey.equals("vectorSearchProfile")) {
|
||||
searchField.setVectorSearchProfileName((String) innerValue);
|
||||
logger.debug("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {} %n",
|
||||
searchField.getVectorSearchProfileName());
|
||||
}
|
||||
if (innerKey.equals("filterable")) {
|
||||
searchField.setFilterable((Boolean) innerValue);
|
||||
@ -386,13 +284,6 @@ public class AzureAISearchCreateOrUpdateIndexOpDispenser extends AzureAISearchBa
|
||||
+ fValue.getClass().getSimpleName() + " instead for the inner value");
|
||||
}
|
||||
fieldsList.add(searchField);
|
||||
if (logger.isDebugEnabled()) {
|
||||
fieldsList.forEach((field) -> {
|
||||
logger.debug(
|
||||
">>>>>>>>>>>>AzureAISearchCreateOrUpdateIndexOpDispenser>>>>>>>>>>>>buildFieldsStruct>>>> fields FINAL: Name:{} VSProfileName:{}",
|
||||
field.getName(), field.getVectorSearchProfileName());
|
||||
});
|
||||
}
|
||||
});
|
||||
return fieldsList;
|
||||
}).orElse(null);
|
||||
|
@ -24,6 +24,9 @@ import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchBaseOp;
|
||||
import io.nosqlbench.adapter.azureaisearch.ops.AzureAISearchListIndexesOp;
|
||||
import io.nosqlbench.adapters.api.templating.ParsedOp;
|
||||
|
||||
/**
|
||||
* Lists the indexes available.
|
||||
*/
|
||||
public class AzureAISearchListIndexesOpDispenser extends AzureAISearchBaseOpDispenser<String> {
|
||||
public AzureAISearchListIndexesOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
|
||||
LongFunction<String> targetF) {
|
||||
|
@ -33,8 +33,12 @@ import io.nosqlbench.adapters.api.templating.ParsedOp;
|
||||
import io.nosqlbench.nb.api.errors.OpConfigError;
|
||||
|
||||
/**
|
||||
* @see https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery
|
||||
* @see https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request
|
||||
* @see <a href=
|
||||
* "https://learn.microsoft.com/en-us/rest/api/searchservice/documents/search-get?view=rest-searchservice-2024-07-01&tabs=HTTP#rawvectorquery">
|
||||
* Search GET API<a/>
|
||||
* @see <a href=
|
||||
* "https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2024-07-01%2Cfilter-2024-07-01%2Cbuiltin-portal#vector-query-request">How
|
||||
* to query/vector search</a>
|
||||
*/
|
||||
public class AzureAISearchSearchDocumentsOpDispenser extends AzureAISearchBaseOpDispenser<SearchOptions> {
|
||||
public AzureAISearchSearchDocumentsOpDispenser(AzureAISearchDriverAdapter adapter, ParsedOp op,
|
||||
|
@ -37,7 +37,8 @@ scenarios:
|
||||
run tags==block:upload_documents
|
||||
errors===warn,counter
|
||||
cycles===TEMPLATE(train_cycles,TEMPLATE(trainsize,1000)) threads===TEMPLATE(train_threads,AUTO)
|
||||
token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost)
|
||||
endpoint=TEMPLATE(azureaisearchhost) token_file=TEMPLATE(token_file)
|
||||
#token_file===TEMPLATE(token_file) endpoint===TEMPLATE(azureaisearchhost)
|
||||
|
||||
search_documents: >-
|
||||
run tags==block:search_documents
|
||||
@ -94,6 +95,18 @@ blocks:
|
||||
facetable: false
|
||||
retrievable: true
|
||||
hidden: false
|
||||
# Caused by: com.azure.core.exception.HttpResponseException: Status code 400, "{"error":{"code":"OperationNotAllowed","message":"The request is invalid. Details: definition : The searchable field 'id' must be of type Edm.String or Collection(Edm.String) or Collection(Edm.Single).","details":[{"code":"CannotEnableFieldForSearching","message":"The searchable field 'id' must be of type Edm.String or Collection(Edm.String) or Collection(Edm.Single). Parameters: definition"}]}}"
|
||||
# Caused by: com.azure.core.exception.HttpResponseException: Status code 400, "{"error":{"code":"InvalidRequestParameter","message":"The request is invalid. Details: definition : The key field 'id' must be of type Edm.String.","details":[{"code":"InvalidKeyField","message":"The key field 'id' must be of type Edm.String. Parameters: definition"}]}}"
|
||||
# Caused by: com.azure.core.exception.HttpResponseException: Status code 400, "{"error":{"code":"InvalidRequestParameter","message":"The request is invalid. Details: definition : The key field 'id' is marked as non-retrievable. Please set the 'retrievable' property for this field to 'true' or leave it unset.","details":[{"code":"InvalidKeyField","message":"The key field 'id' is marked as non-retrievable. Please set the 'retrievable' property for this field to 'true' or leave it unset. Parameters: definition"}]}}"
|
||||
# dummy_key:
|
||||
# type: "Edm.Int32" # Data types - https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types#edm-data-types-for-vector-fields
|
||||
# key: false
|
||||
# filterable: true
|
||||
# sortable: true
|
||||
# searchable: true
|
||||
# facetable: false
|
||||
# retrievable: true
|
||||
# hidden: false
|
||||
value:
|
||||
type: "Collection(Edm.Single)"
|
||||
dimensions: TEMPLATE(dimensions)
|
||||
@ -139,6 +152,7 @@ blocks:
|
||||
upload_documents: "TEMPLATE(collection)"
|
||||
fields:
|
||||
id: "{row_key}"
|
||||
# dummy_key: "{id_val}"
|
||||
value: "{train_floatlist_TEMPLATE(filetype)}"
|
||||
|
||||
search_documents:
|
||||
@ -154,21 +168,21 @@ blocks:
|
||||
fields: "value"
|
||||
weight: 1.0
|
||||
k: TEMPLATE(select_limit,100)
|
||||
verifier-init: |
|
||||
relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
|
||||
for (int k in List.of(100)) {
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
|
||||
}
|
||||
verifier: |
|
||||
// driver-specific function
|
||||
actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.responseFieldToStringArray("id",result)
|
||||
System.out.println("actual_indices ------>>>>: " + actual_indices);
|
||||
// driver-agnostic function
|
||||
relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices);
|
||||
// because we are "verifying" although this needs to be reorganized
|
||||
return true;
|
||||
verifier-init: |
|
||||
relevancy= new io.nosqlbench.nb.api.engine.metrics.wrappers.RelevancyMeasures(_parsed_op);
|
||||
for (int k in List.of(100)) {
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.recall("recall",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.precision("precision",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.F1("F1",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.reciprocal_rank("RR",k));
|
||||
relevancy.addFunction(io.nosqlbench.engine.extensions.computefunctions.RelevancyFunctions.average_precision("AP",k));
|
||||
}
|
||||
verifier: |
|
||||
// driver-specific function
|
||||
actual_indices=io.nosqlbench.adapter.azureaisearch.AzureAISearchAdapterUtils.searchDocumentsResponseIdToIntArray("id",result)
|
||||
// System.out.println("actual_indices ------>>>>: " + actual_indices);
|
||||
// driver-agnostic function
|
||||
relevancy.accept({relevant_indices_TEMPLATE(filetype)},actual_indices);
|
||||
// because we are "verifying" although this needs to be reorganized
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user