allow all/any/none semantics on tag matching

This commit is contained in:
Jonathan Shook 2020-07-17 17:30:14 -05:00
parent adc4bdbdc2
commit e5d02dae56
2 changed files with 132 additions and 84 deletions

View File

@ -18,77 +18,112 @@
package io.nosqlbench.engine.api.util;
import java.util.*;
import java.util.function.BiFunction;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* This class makes it easy to associate tags and tag values with {@link Tagged}
* items, filtering matching Tagged items from a set of candidates.
* <H2>TagFilter Synopsis</H2>
* <p>
* This class makes it easy to associate tags and tag values with {@link Tagged} items, filtering matching Tagged items
* from a set of candidates.</p>
*
* <ul>
* <li><em>tags</em> are the actual tags attached to a {@link Tagged} item.</li>
* <li><em>filters</em> are the names and values used to filter the tag sets.</li>
* <li><em>tags</em> are the actual tags attached to a {@link Tagged} item.</li>
* <li><em>filters</em> are the names and values used to filter the tag sets.</li>
* </ul>
* Tag names and filter names must be simple words. Filter values can have regex expressions, however.
*
* <H2>Tag Names and Values</H2>
* <p>
* Any type which implements the Tagged interface can provide a set of tags in the form of a map. These are free-form,
* although they must
* </p>
*
* <H2>Tag Filters</H2>
* <p>Tag names and filter names must be simple words. Filter values can have regex expressions, however.
* When a filter value starts and ends with a single quote, the quotes are removed as a convencience
* for deal with shell escapes, etc. This means that value <strong>'five-oh.*five'</strong>
* is the same as <strong>five-oh.*five</strong>, except that the former will not cause undesirable
* shell expansion on command lines.
* <p>
* When a Tagged item is filtered, the following checks are made for each
* tag specified in the filter:
* shell expansion on command lines.</p>
*
* <p>When a Tagged item is filtered, the following checks are made for each tag specified in the filter:</p>
*
* <ol>
* <li>The Tagged item must have a tag with the same name as a filter.</li>
* <li>If the filter has a value in addition to the tag name, then the Tagged item
* must also have a value for that tag name. Furthermore, the value has to match.</li>
* <li>If the filter value, converted to a Regex, matches the tag value,
* it is deemed to be a match.</li>
* <li>The Tagged item must have a tag with the same name as a filter.</li>
* <li>If the filter has a value in addition to the tag name, then the Tagged item must also have a value
* for that tag name. Furthermore, the value has to match.</li>
* <li>If the filter value, converted to a Regex, matches the tag value, it is deemed to be a match.</li>
* </ol>
* <p>
* Because advanced tag usage can sometimes be unintuitive, the tag filtering logic has
*
* <p>Because advanced tag usage can sometimes be unintuitive, the tag filtering logic has
* a built-in log which can explain why a candidate item did or did not match a particular
* set of filters.
* set of filters.</p>
*
* <h2>Tag Filters</h2>
* <p>
* All of the following forms are acceptable for a filter spec:
* <UL>
* <li>name1=value1 name2=value2</li>
* <li>name1:value1, name2=value2</li>
* <li>name1=value1 name2=value2,name3:value3</li>
* <li>name1='.*fast.*', name2=1+</li>
* </UL>
*
* <p>
* That is, you can use spaces or commas between tag (name,value) pairs, and you can also use colons or equals
* between the actual tag names and values. This is not to support mixed formatting, but it does allow for some
* flexibility when integrating with other formats. Extra spaces between (name,value) pairs are ignored.</p>
*
* <p>As well, you can include regex patterns in your tag filter values. You can also use single quotes to
* guard against shell expansion of internal characters or spaces. However, the following forms are not acceptable
* for a tag spec:
*
* <dl>
* <dt>name1: value1</dt>
* <dd>no extra spaces between the key and value</dd>
* <dt>name-foo__bar:value1</dt>
* <dd>No non-word characters in tag names</dd>
* <dt>name1: value two</dt>
* <dd>no spaces in tag values</dd>
* <dt>name1: 'value two'</dt>
* <dd>no spaces in tag values, even with single-quotes</dd>
* </dl>
*/
public class TagFilter {
public static TagFilter MATCH_ALL = new TagFilter("");
private Map<String, String> filter = new LinkedHashMap<>();
private Conjugate conjugate = Conjugate.all;
private final static Pattern conjugateForm = Pattern.compile("^(?<conjugate>\\w+)\\((?<filter>.+)\\)$",Pattern.DOTALL|Pattern.MULTILINE);
private enum Conjugate {
any((i,j) -> (j>0)),
all((i,j) -> (i.intValue()==j.intValue())),
none((i,j) -> (j ==0));
private final BiFunction<Integer, Integer, Boolean> matchfunc;
Conjugate(BiFunction<Integer,Integer,Boolean> matchfunc) {
this.matchfunc = matchfunc;
}
}
/**
* Create a new tag filter. A tag filter is comprised of zero or more tag names, each with an
* optional value. The tag spec is a simple string format that contains zero or
* more tag names with optional values.
* <p>
* All of the following forms are acceptable for a filter spec:
* <UL>
* <li>name1=value1 name2=value2</li>
* <li>name1:value1, name2=value2</li>
* <li>name1=value1 name2=value2,name3:value3</li>
* <li>name1='.*fast.*', name2=1+</li>
* </UL>
* <p>
* That is, you can use spaces or commas between tag (name,value) pairs, and you can also use
* colons or equals between the actual tag names and values. This is not to support mixed formatting, but it
* does allow for some flexibility when integrating with other formats. Extra spaces between (name,value)
* pairs are ignored.</p>
* <p>As well, you can include regex patterns in your tag filter values. You can also use single quotes to
* guard against </p>
* <p>
* However, the following forms are not acceptable for a tag spec:
* <dl>
* <dt>name1: value1</dt>
* <dd>no extra spaces between the key and value</dd>
* <dt>name-foo__bar:value1</dt>
* <dd>No non-word characters in tag names</dd>
* <dt>name1: value two</dt>
* <dd>no spaces in tag values</dd>
* <dt>name1: 'value two'</dt>
* <dd>no spaces in tag values, even with single-quotes</dd>
* </dl>
* <p>Create a new tag filter. A tag filter is comprised of zero or more tag names, each with an optional value.
* The tag spec is a simple string format that contains zero or more tag names with optional values.</p>
*
* @param filterSpec a filter spec as explained in the javadoc
* @param filterSpec
* a filter spec as explained in the javadoc
*/
public TagFilter(String filterSpec) {
if ((filterSpec != null) && (!filterSpec.isEmpty())) {
filterSpec=unquote(filterSpec);
filterSpec = unquote(filterSpec);
Matcher cmatcher = conjugateForm.matcher(filterSpec);
if (cmatcher.matches()) {
filterSpec=cmatcher.group("filter");
conjugate = Conjugate.valueOf(cmatcher.group("conjugate").toLowerCase());
}
String[] keyvalues = filterSpec.split("[,] *");
for (String assignment : keyvalues) {
@ -105,28 +140,30 @@ public class TagFilter {
}
private static String unquote(String filterSpec) {
for (String s : new String[]{"'","\""}) {
if (filterSpec.indexOf(s)==0 && filterSpec.indexOf(s,1)==filterSpec.length()-1) {
filterSpec=filterSpec.substring(1,filterSpec.length()-1);
for (String s : new String[]{"'", "\""}) {
if (filterSpec.indexOf(s) == 0 && filterSpec.indexOf(s, 1) == filterSpec.length() - 1) {
filterSpec = filterSpec.substring(1, filterSpec.length() - 1);
}
}
return filterSpec;
}
/**
* Although this method could early-exit for certain conditions, the full tag matching logic
* is allowed to complete in order to present more complete diagnostic information back
* to the user.
* Although this method could early-exit for certain conditions, the full tag matching logic is allowed to complete
* in order to present more complete diagnostic information back to the user.
*
* @param tags
* The tags associated with a Tagged item.
*
* @param tags The tags associated with a Tagged item.
* @return a Result telling whether the tags matched and why or why not
*/
protected Result matches(Map<String, String> tags) {
List<String> log = new ArrayList<>();
boolean matched = true;
int totalKeyMatches=0;
for (String filterkey : filter.keySet()) {
boolean matchedKey = true;
String filterval = filter.get(filterkey);
String itemval = tags.get(filterkey);
@ -142,22 +179,24 @@ public class TagFilter {
log.add("(☑, ) " + detail + ": matched names");
} else {
log.add("(☐, ) " + detail + ": did not match)");
matched = false;
matchedKey = false;
}
} else {
Pattern filterpattern = Pattern.compile("^" + filterval + "$");
if (itemval == null) {
log.add("(☑,☐) " + detail + ": null tag value did not match '" + filterpattern + "'");
matched = false;
matchedKey = false;
} else if (filterpattern.matcher(itemval).matches()) {
log.add("(☑,☑) " + detail + ": matched pattern '" + filterpattern + "'");
} else {
log.add("(☑,☐) " + detail + ": did not match '" + filterpattern + "'");
matched = false;
matchedKey = false;
}
}
totalKeyMatches += matchedKey ? 1 : 0;
}
boolean matched = conjugate.matchfunc.apply(filter.size(),totalKeyMatches);
return new Result(matched, log);
}

View File

@ -36,8 +36,8 @@ public class TagFilterTest {
@Test
public void testEmptyTagFilterDoesMatch() {
Map<String,String> itemtags = new HashMap<>() {{
put("a","tag");
Map<String, String> itemtags = new HashMap<>() {{
put("a", "tag");
}};
TagFilter tf = new TagFilter("");
assertThat(tf.matches(itemtags).matched()).isTrue();
@ -45,7 +45,7 @@ public class TagFilterTest {
@Test
public void testSomeFilterTagsNoItemTagsDoesNotMatch() {
Map<String,String> itemtags = new HashMap<>() {{
Map<String, String> itemtags = new HashMap<>() {{
}};
TagFilter tf = new TagFilter("tag=foo");
assertThat(tf.matches(itemtags).matched()).isFalse();
@ -54,8 +54,8 @@ public class TagFilterTest {
@Test
public void testEmptyTagFilterValueDoesMatch() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","two");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "two");
}};
TagFilter tf = new TagFilter("");
assertThat(tf.matches(itemtags).matched()).isTrue();
@ -64,15 +64,15 @@ public class TagFilterTest {
@Test
public void testMatchingTagKeyValueDoesMatch() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","two");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "two");
}};
TagFilter tf = new TagFilter("one");
TagFilter.Result result = tf.matches(itemtags);
assertThat(result.matched()).isTrue();
Map<String,String> itemtags2 = new HashMap<>() {{
put("one",null);
Map<String, String> itemtags2 = new HashMap<>() {{
put("one", null);
}};
assertThat(tf.matches(itemtags2).matched()).isTrue();
}
@ -80,8 +80,8 @@ public class TagFilterTest {
@Test
public void testMatchingKeyMismatchingValueDoesNotMatch() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","four");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "four");
}};
TagFilter tf = new TagFilter("one:two");
TagFilter.Result result = tf.matches(itemtags);
@ -90,8 +90,8 @@ public class TagFilterTest {
@Test
public void testMatchingKeyAndValueDoesMatch() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","four");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "four");
}};
TagFilter tf = new TagFilter("one:four");
assertThat(tf.matches(itemtags).matched()).isTrue();
@ -99,8 +99,8 @@ public class TagFilterTest {
@Test
public void testMatchingKeyAndValueRegexDoesMatch() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","four-five-six");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "four-five-six");
}};
TagFilter tfLeft = new TagFilter("one:'four-.*'");
assertThat(tfLeft.matches(itemtags).matched()).isTrue();
@ -115,9 +115,9 @@ public class TagFilterTest {
@Override
public Map<String, String> getTags() {
return new HashMap<>() {{
put("one","four-five-six");
put("two","three-seven-nine");
put("five",null);
put("one", "four-five-six");
put("two", "three-seven-nine");
put("five", null);
put("six", null);
}};
}
@ -136,8 +136,8 @@ public class TagFilterTest {
@Test
public void testRawSubstringDoesNotMatchRegex() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","four-five-six");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "four-five-six");
}};
TagFilter tf = new TagFilter("one:'five'");
assertThat(tf.matches(itemtags).matched()).isFalse();
@ -145,8 +145,8 @@ public class TagFilterTest {
@Test
public void testAlternation() {
Map<String,String> itemtags = new HashMap<>() {{
put("one","four-five-six");
Map<String, String> itemtags = new HashMap<>() {{
put("one", "four-five-six");
}};
TagFilter tf = new TagFilter("one:'four.*|seven'");
assertThat(tf.matches(itemtags).matched()).isTrue();
@ -155,12 +155,21 @@ public class TagFilterTest {
@Test
public void testLeadingSpaceTrimmedInQuotedTag() {
Map<String,String> itemtags = new HashMap<>() {{
put("phase","main");
Map<String, String> itemtags = new HashMap<>() {{
put("phase", "main");
}};
TagFilter tf = new TagFilter("\"phase: main\"");
assertThat(tf.matches(itemtags).matched()).isTrue();
}
@Test
public void testAnyCondition() {
Map<String, String> itemtags = Map.of("phase", "main", "truck", "car");
TagFilter tf = new TagFilter("any(truck:car,phase:moon)");
assertThat(tf.matches(itemtags).matched()).isTrue();
TagFilter tf2 = new TagFilter("any(car:truck,phase:moon)");
assertThat(tf2.matches(itemtags).matched()).isFalse();
}
}