code cleanup

This commit is contained in:
Mark Wolters 2024-03-06 17:46:29 -04:00
parent efb624d595
commit 01d84be8d4
7 changed files with 0 additions and 582 deletions

View File

@ -1,69 +0,0 @@
/*
* Copyright (c) 2023-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.adapter.pinecone.datamappers.functions.hdf_to_pcfilter;
import io.jhdf.HdfFile;
import io.jhdf.api.Dataset;
import io.nosqlbench.nb.api.nbio.NBIO;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetFilter;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetParser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.FilteredDatasetParser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.SingleConditionFilterByKeyword;
import java.util.function.LongFunction;
/**
* Binding function that accepts a long input value for the cycle and returns a string consisting of a
* portion of the Pinecone filter predicate parsed from a single record in an HDF5 dataset
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfToPcPredicatesByKeyword implements LongFunction<String> {
private final HdfFile hdfFile;
private final Dataset dataset;
private final int recordCount;
private final FilteredDatasetParser parser;
private final DatasetFilter filter;
/**
* Create a new binding function that accepts a long input value for the cycle and returns a string
* @param filename
* @param datasetname
* @param parsername
* @param filterportion
*/
public HdfToPcPredicatesByKeyword(String filename, String datasetname, String parsername, String filterportion) {
hdfFile = new HdfFile(NBIO.all().search(filename).one().asPath());
dataset = hdfFile.getDatasetByPath(datasetname);
recordCount = dataset.getDimensions()[0];
parser = DatasetParser.filteredParserFactory(parsername);
filter = new SingleConditionFilterByKeyword(filterportion);
parser.setFilter(filter);
}
@Override
public String apply(long l) {
long[] sliceOffset = {(l % recordCount)};
int[] sliceDimensions = {1};
String raw = ((String[])dataset.getData(sliceOffset, sliceDimensions))[0];
return parser.parse(raw).replaceAll("match", "EQ");
}
}

View File

@ -1,70 +0,0 @@
/*
* Copyright (c) 2023-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.adapter.pinecone.datamappers.functions.hdf_to_pcfilter;
import io.jhdf.HdfFile;
import io.jhdf.api.Dataset;
import io.nosqlbench.nb.api.nbio.NBIO;
import io.nosqlbench.virtdata.api.annotations.Categories;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.annotations.ThreadSafeMapper;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetFilter;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetParser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.FilteredDatasetParser;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.MultiConditionFilterByLevel;
import java.util.function.LongFunction;
/**
* Binding function that accepts a long input value for the cycle and returns a string consisting of a
* portion of the Pinecone filter predicate parsed from a single record in an HDF5 dataset
*/
@ThreadSafeMapper
@Categories(Category.experimental)
public class HdfToPcPredicatesByLevel implements LongFunction<String> {
private final HdfFile hdfFile;
private final Dataset dataset;
private final int recordCount;
private final FilteredDatasetParser parser;
private final DatasetFilter filter;
/**
* Create a new binding function that accepts a long input value for the cycle and returns a string
* @param filename
* @param datasetname
* @param parsername
* @param level
* @param isValue
*/
public HdfToPcPredicatesByLevel(String filename, String datasetname, String parsername, int level, boolean isValue) {
hdfFile = new HdfFile(NBIO.all().search(filename).one().asPath());
dataset = hdfFile.getDatasetByPath(datasetname);
recordCount = dataset.getDimensions()[0];
parser = DatasetParser.filteredParserFactory(parsername);
filter = new MultiConditionFilterByLevel(level, isValue);
parser.setFilter(filter);
}
@Override
public String apply(long l) {
long[] sliceOffset = {(l % recordCount)};
int[] sliceDimensions = {1};
String raw = ((String[])dataset.getData(sliceOffset, sliceDimensions))[0];
return parser.parse(raw);
}
}

View File

@ -1,78 +0,0 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json;
import com.google.gson.JsonObject;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetFilter;
public class MultiConditionFilterByKeyword implements DatasetFilter {
private final String filterString;
private static final String AND = "and";
private static final String OR = "or";
private static final String FIELD = "field";
private static final String OPERATOR = "operator";
private static final String COMPARATOR = "comparator";
private static final String COMMA = ",";
private static final String VALUE = "value";
public MultiConditionFilterByKeyword(String filterString) {
this.filterString = filterString;
}
@Override
public String applyFilter(JsonObject json) {
if (json.has(AND)) {
return parseConditions(json, AND);
} else if (json.has(OR)) {
return parseConditions(json, OR);
} else {
throw new RuntimeException("Unknown predicate type: " + json.keySet());
}
}
private String parseConditions(JsonObject json, String conditionType) {
StringBuilder sb = new StringBuilder();
switch (filterString) {
case FIELD: {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet()
.forEach(field -> sb.append(field).append(COMMA)));
sb.deleteCharAt(sb.length() - 1);
return sb.toString().replaceAll("\"", ""); // remove quotes from sb;
}
case OPERATOR: {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet()
.forEach(field -> condition.getAsJsonObject().get(field).getAsJsonObject().keySet()
.forEach(operator -> sb.append(operator).append(COMMA))));
sb.deleteCharAt(sb.length() - 1);
return sb.toString().replaceAll("\"", ""); // remove quotes from sb;
}
case COMPARATOR: {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet().forEach(field -> {
JsonObject p = condition.getAsJsonObject().get(field).getAsJsonObject();
p.keySet().forEach(operator -> sb.append(p.get(operator).getAsJsonObject().get(VALUE) ).append(COMMA));
}));
sb.deleteCharAt(sb.length() - 1);
return sb.toString().replaceAll("\"", ""); // remove quotes from sb;
}
default: {
throw new RuntimeException("Unknown filter string: " + filterString);
}
}
}
}

View File

@ -1,95 +0,0 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json;
import com.google.gson.JsonObject;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetFilter;
public class MultiConditionFilterByLevel implements DatasetFilter {
private final int filterLevel;
private final boolean isValue;
private static final String AND = "and";
private static final String OR = "or";
private static final String COMMA = ",";
private static final String VALUE = "value";
public MultiConditionFilterByLevel(int filterLevel, boolean isValue) {
this.filterLevel = filterLevel;
this.isValue = isValue;
}
@Override
public String applyFilter(JsonObject json) {
if (json.has(AND)) {
return parseConditions(json, AND);
} else if (json.has(OR)) {
return parseConditions(json, OR);
} else {
throw new RuntimeException("Unknown predicate type: " + json.keySet());
}
}
private String parseConditions(JsonObject json, String conditionType) {
StringBuilder sb = new StringBuilder();
switch (filterLevel) {
case 1: {
if (isValue) {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet()
.forEach(field -> sb.append(condition.getAsJsonObject().get(field).getAsJsonObject().get(VALUE)).append(COMMA)));
} else {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet()
.forEach(field -> sb.append(field).append(COMMA)));
}
sb.deleteCharAt(sb.length() - 1);
return sb.toString().replaceAll("\"", ""); // remove quotes from sb;
}
case 2: {
if (isValue) {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet()
.forEach(field -> condition.getAsJsonObject().get(field).getAsJsonObject().keySet()
.forEach(operator -> sb.append(condition.getAsJsonObject().get(field).getAsJsonObject().get(operator).getAsJsonObject().get(VALUE)).append(COMMA))));
} else {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet()
.forEach(field -> condition.getAsJsonObject().get(field).getAsJsonObject().keySet()
.forEach(operator -> sb.append(operator).append(COMMA))));
}
sb.deleteCharAt(sb.length() - 1);
return sb.toString().replaceAll("\"", ""); // remove quotes from sb;
}
case 3: {
if (isValue) {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet().forEach(field -> {
JsonObject p = condition.getAsJsonObject().get(field).getAsJsonObject();
p.keySet().forEach(operator -> sb.append(p.get(operator).getAsJsonObject().get(VALUE)).append(COMMA));
}));
} else {
json.get(conditionType).getAsJsonArray().forEach(condition -> condition.getAsJsonObject().keySet().forEach(field -> {
JsonObject p = condition.getAsJsonObject().get(field).getAsJsonObject();
p.keySet().forEach(operator -> sb.append(operator).append(COMMA));
}));
}
sb.deleteCharAt(sb.length() - 1);
return sb.toString().replaceAll("\"", ""); // remove quotes from sb;
}
default: {
throw new RuntimeException("Unsupported Filter Level: " + filterLevel);
}
}
}
}

View File

@ -1,64 +0,0 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json;
import com.google.gson.JsonObject;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.DatasetFilter;
public class SingleConditionFilterByKeyword implements DatasetFilter {
private final String filterString;
private static final String AND = "and";
private static final String OR = "or";
private static final String FIELD = "field";
private static final String OPERATOR = "operator";
private static final String COMPARATOR = "comparator";
private static final String COMMA = ",";
private static final String VALUE = "value";
public SingleConditionFilterByKeyword(String filterString) {
this.filterString = filterString;
}
@Override
public String applyFilter(JsonObject json) {
if (json.has(AND)) {
return parseConditions(json, AND);
} else if (json.has(OR)) {
return parseConditions(json, OR);
} else {
throw new RuntimeException("Unknown predicate type: " + json.keySet());
}
}
private String parseConditions(JsonObject json, String conditionType) {
return switch (filterString) {
case FIELD ->
json.get(conditionType).getAsJsonArray().get(0).getAsJsonObject().keySet().stream().findFirst()
.get().replaceAll("\"", "");
case OPERATOR ->
json.get(conditionType).getAsJsonArray().get(0).getAsJsonObject().entrySet().stream().findFirst()
.get().getValue().getAsJsonObject().keySet().stream().findFirst().get().replaceAll("\"", "");
case COMPARATOR ->
json.get(conditionType).getAsJsonArray().get(0).getAsJsonObject().entrySet().stream().findFirst()
.get().getValue().getAsJsonObject().entrySet().stream().findFirst().get().getValue().getAsJsonObject()
.get(VALUE).toString().replaceAll("\"", "");
default -> throw new RuntimeException("Unknown filter string: " + filterString);
};
}
}

View File

@ -1,55 +0,0 @@
/*
* Copyright (c) 2023-2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.to_cql;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class CqlDatasetParserTest {
String test1 = "{\"conditions\": {\"and\": [{\"a\": {\"match\": {\"value\": 53}}}]}}";
String test2 = "{\"conditions\": {\"and\": [{\"a\": {\"match\": {\"value\": \"thirteen\"}}}, {\"b\": {\"match\": {\"value\": \"fifty-four\"}}}]}}";
String test3 = "{\"conditions\": {\"and\": [{\"a\": {\"match\": {\"value\": 13}}}, {\"b\": {\"match\": {\"value\": 54}}}, {\"a\": {\"match\": {\"value\": 154}}}]}}";
String test4 = "{\"conditions\": {\"or\": [{\"a\": {\"match\": {\"value\": 9}}}, {\"a\": {\"match\": {\"value\": 71}}}]}}";
String test5 = "{\"conditions\": {\"or\": [{\"a\": {\"match\": {\"value\": 9}}}, {\"a\": {\"match\": {\"value\": 71}}}, {\"a\": {\"match\": {\"value\": 7}}}]}}";
String test6 = "{\"conditions\": {\"or\": [{\"b\": {\"match\": {\"value\": \"foo\"}}}, {\"b\": {\"match\": {\"value\": \"bar\"}}}]}}";
@Test
public void testParse() {
CqlDatasetParser parser = new CqlDatasetParser();
String parsed = parser.parse(test1);
assertEquals("WHERE a=53", parsed);
parsed = parser.parse(test2);
assertEquals("WHERE a='thirteen' and b='fifty-four'", parsed);
parsed = parser.parse(test3);
assertEquals("WHERE a=13 and b=54 and a=154", parsed);
parsed = parser.parse(test4);
assertEquals("WHERE a IN(9,71)", parsed);
parsed = parser.parse(test5);
assertEquals("WHERE a IN(9,71,7)", parsed);
parsed = parser.parse(test6);
assertEquals("WHERE b IN('foo','bar')", parsed);
}
}

View File

@ -1,151 +0,0 @@
/*
* Copyright (c) 2024 nosqlbench
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.to_pineconefilter;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.MultiConditionFilterByKeyword;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.MultiConditionFilterByLevel;
import io.nosqlbench.virtdata.library.hdf5.from_long.to_string.predicate_parser.from_json.SingleConditionFilterByKeyword;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class PineconeFilterParserTest {
private static final String test1 = """
{
"conditions": {
"and": [
{
"department_name": {
"match": {
"value": "Divided Shoes"
}
}
},
{
"department_type": {
"match": {
"value": "Footwear"
}
}
}
]
}
}""";
private final static String test2 = """
{
"conditions": {
"or": [
{
"a": {
"range": {
"gt": 0.30328462495055897, "lt": 0.5387830095849688
}
}
},
{
"a": {
"range": {
"gt": 0.20352843950042498, "lt": 0.8888583828498567
}
}
}
]
}""";
@Test
public void testComparatorParseByLevel() {
PineconeFilterParser parser = new PineconeFilterParser();
MultiConditionFilterByLevel mcf = new MultiConditionFilterByLevel(3, true);
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("Divided Shoes,Footwear", parsed);
}
@Test
public void testComparatorParseByKeyword() {
PineconeFilterParser parser = new PineconeFilterParser();
MultiConditionFilterByKeyword mcf = new MultiConditionFilterByKeyword("comparator");
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("Divided Shoes,Footwear", parsed);
}
@Test
public void testComparatorSingleParseByKeyword() {
PineconeFilterParser parser = new PineconeFilterParser();
SingleConditionFilterByKeyword scf = new SingleConditionFilterByKeyword("comparator");
parser.setFilter(scf);
String parsed = parser.parse(test1);
assertEquals("Divided Shoes", parsed);
}
@Test
public void testFieldParseByLevel() {
PineconeFilterParser parser = new PineconeFilterParser();
MultiConditionFilterByLevel mcf = new MultiConditionFilterByLevel(1, false);
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("department_name,department_type", parsed);
}
@Test
public void testFieldParseByKeyword() {
PineconeFilterParser parser = new PineconeFilterParser();
MultiConditionFilterByKeyword mcf = new MultiConditionFilterByKeyword("field");
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("department_name,department_type", parsed);
}
@Test
public void testFieldSingleParseByKeyword() {
PineconeFilterParser parser = new PineconeFilterParser();
SingleConditionFilterByKeyword scf = new SingleConditionFilterByKeyword("field");
parser.setFilter(scf);
String parsed = parser.parse(test1);
assertEquals("department_name", parsed);
}
@Test
public void testOperatorParseByLevel() {
PineconeFilterParser parser = new PineconeFilterParser();
MultiConditionFilterByLevel mcf = new MultiConditionFilterByLevel(2, false);
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("match,match", parsed);
}
@Test
public void testOperatorParseByKeyword() {
PineconeFilterParser parser = new PineconeFilterParser();
MultiConditionFilterByKeyword mcf = new MultiConditionFilterByKeyword("operator");
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("match,match", parsed);
}
@Test
public void testOperatorSingleParseByKeyword() {
PineconeFilterParser parser = new PineconeFilterParser();
SingleConditionFilterByKeyword mcf = new SingleConditionFilterByKeyword("operator");
parser.setFilter(mcf);
String parsed = parser.parse(test1);
assertEquals("match", parsed);
}
}