initial commit

This commit is contained in:
Mark Wolters 2024-07-03 01:08:17 -04:00
parent 9e3b6241f1
commit 6ce0ead9f7
13 changed files with 547 additions and 0 deletions

View File

@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (c) 2024 nosqlbench
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
~
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.nosqlbench</groupId>
<artifactId>nosqlbench</artifactId>
<version>5.21.1-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>adapter-ragstack</artifactId>
<properties>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>nb-annotations</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapters-api</artifactId>
<version>${revision}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.datastax.astra</groupId>
<artifactId>astra-db-java</artifactId>
<version>1.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.python/jython -->
<dependency>
<groupId>org.python</groupId>
<artifactId>jython</artifactId>
<version>2.7.3</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.ragstack;
import io.nosqlbench.adapter.ragstack.ops.RagstackBaseOp;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.activityimpl.uniform.BaseDriverAdapter;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.labels.NBLabels;
import java.util.function.Function;
@Service(value = DriverAdapter.class, selector = "ragstack")
public class RagstackDriverAdapter extends BaseDriverAdapter<RagstackBaseOp, RagstackSpace> {
public RagstackDriverAdapter(NBComponent parent, NBLabels childLabels) {
super(parent, childLabels);
}
@Override
public OpMapper getOpMapper() {
return new RagstackOpMapper(this);
}
@Override
public Function<String, ? extends RagstackSpace> getSpaceInitializer(NBConfiguration cfg) {
return (s) -> new RagstackSpace(s, cfg);
}
@Override
public NBConfigModel getConfigModel() {
return super.getConfigModel().add(RagstackSpace.getConfigModel());
}
}

View File

@ -0,0 +1,30 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.ragstack;
import io.nosqlbench.adapter.diag.DriverAdapterLoader;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.nb.api.components.core.NBComponent;
import io.nosqlbench.nb.api.labels.NBLabels;
@Service(value = DriverAdapterLoader.class, selector = "ragstack")
public class RagstackDriverAdapterLoader implements DriverAdapterLoader {
@Override
public RagstackDriverAdapter load(NBComponent parent, NBLabels childLabels) {
return new RagstackDriverAdapter(parent, childLabels);
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.ragstack;
import io.nosqlbench.adapter.ragstack.opdispensers.RagstackLoadDatasetOpDispenser;
import io.nosqlbench.adapter.ragstack.ops.RagstackBaseOp;
import io.nosqlbench.adapter.ragstack.ops.RagstackOpType;
import io.nosqlbench.adapters.api.activityimpl.OpDispenser;
import io.nosqlbench.adapters.api.activityimpl.OpMapper;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import io.nosqlbench.engine.api.templating.TypeAndTarget;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class RagstackOpMapper implements OpMapper<RagstackBaseOp> {
private static final Logger logger = LogManager.getLogger(RagstackOpMapper.class);
private final RagstackDriverAdapter adapter;
public RagstackOpMapper(RagstackDriverAdapter ragstackDriverAdapter) {
this.adapter = ragstackDriverAdapter;
}
@Override
public OpDispenser<? extends RagstackBaseOp> apply(ParsedOp op) {
TypeAndTarget<RagstackOpType, String> typeAndTarget = op.getTypeAndTarget(
RagstackOpType.class,
String.class,
"type",
"collection"
);
logger.debug(() -> "Using '" + typeAndTarget.enumId + "' op type for op template '" + op.getName() + "'");
return switch (typeAndTarget.enumId) {
case load_dataset -> new RagstackLoadDatasetOpDispenser(adapter, op, typeAndTarget.targetFunction);
};
}
}

View File

@ -0,0 +1,197 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.ragstack;
import io.nosqlbench.nb.api.config.standard.ConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfigModel;
import io.nosqlbench.nb.api.config.standard.NBConfiguration;
import io.nosqlbench.nb.api.config.standard.Param;
import io.nosqlbench.nb.api.errors.BasicError;
import lombok.Getter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.python.core.PyObject;
import org.python.util.PythonInterpreter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Optional;
public class RagstackSpace {
private final static Logger logger = LogManager.getLogger(RagstackSpace.class);
private final NBConfiguration config;
private final String name;
@Getter
private String astraToken;
@Getter
private String astraApiEndpoint;
@Getter
private String openApiKey;
@Getter
private String namespace;
@Getter
private String collection;
private PyObject vstore;
public RagstackSpace(String name, NBConfiguration cfg) {
this.config = cfg;
this.name = name;
setToken();
setApiEndpoint();
setOpenApiKey();
setCollection();
setupPython();
}
public PyObject getVstore() {
return vstore;
}
private void setupPython() {
try (PythonInterpreter pyInterp = new PythonInterpreter()) {
pyInterp.exec("import os");
pyInterp.exec("from datasets import load_dataset");
pyInterp.exec("from dotenv import load_dotenv");
pyInterp.exec("from langchain_community.document_loaders import PyPDFDirectoryLoader");
pyInterp.exec("from langchain_astradb import AstraDBVectorStore");
pyInterp.exec("from langchain_openai import OpenAIEmbeddings");
pyInterp.exec("from langchain_core.documents import Document");
pyInterp.exec("vstore = AstraDBVectorStore(\n" +
" embedding=OpenAIEmbeddings(),\n" +
" collection_name=" + this.collection + ",\n" +
" token=os.environ[\"" + this.astraToken + "\"],\n" +
" api_endpoint=os.environ[\"" + this.astraApiEndpoint + "\"],\n" +
")");
vstore = pyInterp.get("vstore");
}
}
private void setApiEndpoint() {
Optional<String> epConfig = config.getOptional("astraApiEndpoint");
Optional<String> epFileConfig = config.getOptional("astraApiEndpointFile");
if (epConfig.isPresent() && epFileConfig.isPresent()) {
throw new BasicError("You can only configure one of astraApiEndpoint or astraApiEndpointFile");
}
if (epConfig.isEmpty() && epFileConfig.isEmpty()) {
throw new BasicError("You must configure one of astraApiEndpoint or astraApiEndpointFile");
}
epFileConfig
.map(Path::of)
.map(p -> {
try {
return Files.readString(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.map(String::trim)
.ifPresent(ep -> this.astraApiEndpoint = ep);
epConfig.ifPresent(ep -> this.astraApiEndpoint = ep);
}
private void setOpenApiKey() {
Optional<String> oakConfig = config.getOptional("openApiKey");
Optional<String> oakFileConfig = config.getOptional("openApiKeyFile");
if (oakConfig.isPresent() && oakFileConfig.isPresent()) {
throw new BasicError("You can only configure one of openApiKey or openApiKeyFile");
}
if (oakConfig.isEmpty() && oakFileConfig.isEmpty()) {
throw new BasicError("You must configure one of openApiKey or openApiKeyFile");
}
oakFileConfig
.map(Path::of)
.map(p -> {
try {
return Files.readString(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.map(String::trim)
.ifPresent(ep -> this.openApiKey = ep);
oakConfig.ifPresent(ep -> this.openApiKey = ep);
}
private void setCollection() {
Optional<String> maybeNamespace = config.getOptional("namespace");
maybeNamespace.ifPresent(n -> this.namespace = n);
Optional<String> maybeCollection = config.getOptional("collection");
maybeCollection.ifPresent(c -> this.collection = c);
}
private void setToken() {
String tokenFileContents = null;
Optional<String> tokenFilePath = config.getOptional("astraTokenFile");
if (tokenFilePath.isPresent()) {
tokenFileContents = getTokenFileContents(tokenFilePath.get());
}
this.astraToken = (tokenFileContents != null) ? tokenFileContents : config.get("astraToken");
}
private String getTokenFileContents(String filePath) {
Path path = Paths.get(filePath);
try {
return Files.readAllLines(path).getFirst();
} catch (IOException e) {
String error = "Error while reading token from file:" + path;
logger.error(error, e);
throw new RuntimeException(e);
}
}
public static NBConfigModel getConfigModel() {
return ConfigModel.of(RagstackSpace.class)
.add(
Param.optional("astraTokenFile", String.class)
.setDescription("file to load the Astra token from")
)
.add(
Param.optional("astraToken", String.class)
.setDescription("the Astra token used to connect to the database")
)
.add(
Param.optional("astraApiEndpoint", String.class)
.setDescription("the API endpoint for the Astra database")
)
.add(
Param.optional("astraApiEndpointFile", String.class)
.setDescription("file to load the API endpoint for the Astra database")
)
.add(
Param.optional("openApiKeyFile", String.class)
.setDescription("")
)
.add(
Param.optional("openApiKey", String.class)
.setDescription("")
)
.add(
Param.defaultTo("namespace", "default_namespace")
.setDescription("The Astra namespace to use")
)
.add(
Param.optional("collection", String.class)
.setDescription("optional collection to use")
)
.asReadOnly();
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.adapter.ragstack.opdispensers;
import io.nosqlbench.adapter.ragstack.RagstackDriverAdapter;
import io.nosqlbench.adapter.ragstack.ops.RagstackBaseOp;
import io.nosqlbench.adapters.api.activityimpl.OpDispenser;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import java.util.function.LongFunction;
public class RagstackLoadDatasetOpDispenser extends RagstackOpDispenser {
public RagstackLoadDatasetOpDispenser(RagstackDriverAdapter adapter, ParsedOp op, LongFunction<String> targetFunction) {
super(adapter, op, targetFunction);
}
@Override
public RagstackBaseOp getOp(long value) {
return null;
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.ragstack.opdispensers;
import com.datastax.astra.client.model.*;
import io.nosqlbench.adapter.ragstack.RagstackSpace;
import io.nosqlbench.adapter.ragstack.ops.RagstackBaseOp;
import io.nosqlbench.adapters.api.activityimpl.BaseOpDispenser;
import io.nosqlbench.adapters.api.activityimpl.uniform.DriverAdapter;
import io.nosqlbench.adapters.api.templating.ParsedOp;
import java.util.*;
import java.util.function.LongFunction;
public abstract class RagstackOpDispenser extends BaseOpDispenser<RagstackBaseOp, RagstackSpace> {
protected final LongFunction<String> targetFunction;
protected final LongFunction<RagstackSpace> spaceFunction;
protected RagstackOpDispenser(DriverAdapter<? extends RagstackBaseOp, RagstackSpace> adapter, ParsedOp op,
LongFunction<String> targetFunction) {
super(adapter, op);
this.targetFunction = targetFunction;
this.spaceFunction = adapter.getSpaceFunc(op);
}
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.adapter.ragstack.ops;
import io.nosqlbench.adapters.api.activityimpl.uniform.flowtypes.CycleOp;
public abstract class RagstackBaseOp implements CycleOp {
}

View File

@ -0,0 +1,21 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.nosqlbench.adapter.ragstack.ops;
public enum RagstackOpType {
load_dataset,
}

View File

@ -0,0 +1,9 @@
scenarios:
default:
create_collection: run driver=ragstack tags==blocks:load_dataset cycles=1
blocks:
load_dataset:
ops:
op1:
create_collection: "loadDataset"

View File

@ -267,6 +267,20 @@
</dependencies>
</profile>
<profile>
<id>adapter-ragstack-include</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>io.nosqlbench</groupId>
<artifactId>adapter-ragstack</artifactId>
<version>${revision}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@ -193,6 +193,15 @@
<module>adapter-qdrant</module>
</modules>
</profile>
<profile>
<id>adapter-ragstack-module</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<modules>
<module>adapter-ragstack</module>
</modules>
</profile>
</profiles>
</project>

View File

@ -62,6 +62,7 @@
<!-- Documentation -->
<module>nb-docsys</module>
<module>nb-adapters/adapter-ragstack</module>
</modules>