doc system service layer improvements

This commit is contained in:
Jonathan Shook
2020-08-06 09:50:59 -05:00
parent 459b84c8c0
commit b0d587814b
14 changed files with 518 additions and 96 deletions

View File

@@ -0,0 +1,35 @@
digraph {
newrank=true
compound=true
node [fontsize = 8,shape = record]
rankdir = LR;
subgraph cluster0 {
rankdir = LR;
step0[shape=none]
node [fontsize = 8, shape = record]
A0 [label="A|topic:a,(b)"]
B0 [label="B|topic:b,(c)"]
C0 [label="C|topic:c"]
}
subgraph cluster1 {
node [fontsize = 8, shape = record]
step1[shape=none]
a1 [label="a",shape=oval]
A1 -> a1 [label="topic of"]
A1 -> b1 [label="topic of"]
expr_b1 -> b1 [label="match"]
expr_b1[label="(b)",shape=oval]
B1 [label="match"]
b1 [label="b", shape=oval]
B1 -> b1 [label="topic of"]
A1 [label="A"]
B1 [label="B|topic:a,(b)"]
C1 [label="C|topic:b,(c)"]
}
step0 -> step1[ltail=cluster0,lhead=cluster1]
}

View File

@@ -0,0 +1,41 @@
digraph {
node [fontsize = 8,shape = record]
rankdir = LR;
subgraph clusterB {
label = "after topic mapping"
node [fontsize = 8,shape = record]
rankdir = LR;
ap; bp; cp; dp; ep;
ap [label = "A|topics: all-topics|included: cli,time,temp"]
ap -> {bp; cp; dp; ep}
bp [label = "B|topics:|included: cli"]
cp [label = "C|topics:cli"]
bp -> cp
dp [label = "D|topics:temp"]
ep [label = "E|topics:time"]
}
subgraph clusterA {
label = "before topic mapping"
node [fontsize = 8,shape = record]
rankdir = LR;
a; b; c; d; e;
a [label = "A|topics: .*,all-topics"]
a -> {b; c; d; e}
b [label = "B|topics:(cli)"]
c [label = "C|topics:cli"]
b -> c
d [label = "D|topics:temp"]
e [label = "E|topics:time"]
}
}

View File

@@ -4,11 +4,13 @@ import io.nosqlbench.nb.api.markdown.types.FrontMatterInfo;
import io.nosqlbench.nb.api.markdown.types.MarkdownInfo;
import java.nio.file.Path;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
public class CompositeMarkdownInfo implements MarkdownInfo {
private List<MarkdownInfo> elements = new LinkedList<>();
private boolean isSorted=false;
@Override
public Path getPath() {
@@ -18,6 +20,10 @@ public class CompositeMarkdownInfo implements MarkdownInfo {
@Override
public String getBody() {
StringBuilder sb = new StringBuilder();
if (!isSorted) {
Collections.sort(elements);
isSorted=true;
}
for (MarkdownInfo element : elements) {
sb.append(element.getBody());
}
@@ -35,15 +41,31 @@ public class CompositeMarkdownInfo implements MarkdownInfo {
}
@Override
public MarkdownInfo withTopics(List<String> assigning) {
public CompositeMarkdownInfo withTopics(List<String> assigning) {
MarkdownInfo leader = elements.get(0);
leader = leader.withTopics(assigning);
elements.set(0,leader);
return this;
}
public <T extends MarkdownInfo> CompositeMarkdownInfo add(T element) {
elements.add(element);
public CompositeMarkdownInfo withIncluded(List<String> included) {
MarkdownInfo leader = elements.get(0);
leader = leader.withIncluded(included);
elements.set(0,leader);
return this;
}
public <T extends MarkdownInfo> CompositeMarkdownInfo add(T element) {
elements.add(element);
isSorted=false;
return this;
}
@Override
public String toString() {
return "CompositeMarkdownInfo{" +
"elements=" + elements +
", isSorted=" + isSorted +
'}';
}
}

View File

@@ -0,0 +1,121 @@
package io.nosqlbench.nb.api.markdown.aggregator;
import io.nosqlbench.nb.api.markdown.types.MarkdownInfo;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class MDGraph {
private Map<String, List<String>> topicsByPattern;
private final Map<String, List<Edge<MarkdownInfo>>> elementsByPattern = new HashMap<>();
private final Map<String, List<Edge<MarkdownInfo>>> elementsByTopic = new HashMap<>();
private final List<Edge<MarkdownInfo>> elements = new LinkedList<>();
public void add(MarkdownInfo addingElem) {
Edge<MarkdownInfo> edge = new Edge<>(addingElem);
elements.add(edge);
for (String topic : addingElem.getTopics()) {
elementsByTopic.computeIfAbsent(topic, t -> new ArrayList<>()).add(edge);
}
// Always add elements to the "none" at a minimum
if (addingElem.getTopics().size() == 0) {
elementsByTopic.computeIfAbsent("none", t -> new ArrayList<>()).add(edge);
}
for (Pattern pattern : addingElem.getTopicGlobs()) {
elementsByPattern.computeIfAbsent(pattern.pattern(),
p -> new ArrayList<>()).add(edge);
}
}
public List<MarkdownInfo> processed() {
if (topicsByPattern == null) {
topicsByPattern = topicsByPattern();
}
LinkedList<Edge<MarkdownInfo>> resolved = new LinkedList<>(elements);
ListIterator<Edge<MarkdownInfo>> iter = resolved.listIterator();
while (iter.hasNext()) {
Edge<MarkdownInfo> elementEdge = iter.next();
MarkdownInfo element = elementEdge.get();
List<Pattern> topicGlobs = element.getTopicGlobs();
if (topicGlobs.size() != 0) {
List<Edge<MarkdownInfo>> included = new ArrayList<>();
boolean leafnodes=true;
for (Pattern topicGlob : topicGlobs) {
for (String matchedTopic : topicsByPattern.get(topicGlob.pattern())) {
List<Edge<MarkdownInfo>> edges = elementsByTopic.get(matchedTopic);
for (Edge<MarkdownInfo> edge : edges) {
if (edge.get().getTopicGlobs().size()!=0) {
leafnodes=false;
}
included.add(edge);
}
}
if (leafnodes) {
CompositeMarkdownInfo mdinfo =
new CompositeMarkdownInfo();
mdinfo.add(element);
for (Edge<MarkdownInfo> tEdge : included) {
mdinfo.add(tEdge.get());
}
// TODO: Add included
MarkdownInfo withTopics = mdinfo.withTopics(element.getTopics());
elementEdge.set(withTopics);
} else {
// Move this to the end of the list.
iter.remove();
resolved.addLast(elementEdge);
}
}
}
}
return resolved.stream().map(Edge::get).collect(Collectors.toList());
}
private Map<String, List<String>> topicsByPattern() {
Map<String, List<String>> tbp = new HashMap<>();
for (String pattern : this.elementsByPattern.keySet()) {
List<String> matchingTopics = tbp.computeIfAbsent(pattern, p -> new ArrayList<>());
for (String topic : this.elementsByTopic.keySet()) {
if (Pattern.compile(pattern).matcher(topic).matches()) {
matchingTopics.add(topic);
}
}
}
return tbp;
}
/**
* Allow edges to point to mutable vertices
* @param <T>
*/
private final static class Edge<T extends MarkdownInfo> {
private T element;
public Edge(T element) {
this.element = element;
}
public T get() {
return element;
}
public void set(T element) {
this.element = element;
}
}
}

View File

@@ -72,95 +72,122 @@ public class MarkdownDocs {
ordered.addAll(markdownWithTopicGlobs);
ordered.addAll(markdownInfos);
List<Edge<List<String>>> edges = new ArrayList<>();
List<String> assigning = null;
MDGraph mdgraph = new MDGraph();
ordered.forEach(mdgraph::add);
for (int i = 0; i < ordered.size()-1; i++) {
MarkdownInfo mdHavingGlobs = ordered.get(i);
List<Pattern> topicGlobs = mdHavingGlobs.getTopicGlobs();
// TODO track and warn if a glob doesn't match anything
for (int j = i+1; j < ordered.size(); j++) {
MarkdownInfo mdHavingTopics = ordered.get(j);
List<String> topics = mdHavingTopics.getTopics();
for (Pattern topicGlob : topicGlobs) {
for (String topic : topics) {
if (topicGlob.matcher(topic).matches()) {
assigning=assigning==null ? new ArrayList<>() : assigning;
assigning.add(topic);
logger.debug("added topic=" + topic + " to " + i + "->" + j + " with " + topicGlob);
}
}
if (assigning!=null) {
assigning.addAll(mdHavingGlobs.getTopics());
ordered.set(i,mdHavingGlobs.withTopics(assigning));
logger.debug("assigned new mdinfo");
}
}
}
}
int loopsremaining=100;
// Assign glob topics to non-glob topics that match
// for (MarkdownInfo parsedMarkdown : markdownInfos) {
// FrontMatterInfo fm = parsedMarkdown.getFrontmatter();
// Set<String> topics = fm.getTopics();
// Set<String> newTopics = new HashSet<>();
// for (String topic : topics) {
// if (isPattern(topic)) {
// Pattern p = Pattern.compile(topic);
// for (String nonGlobTopic : nonGlobTopics) {
// if (p.matcher(nonGlobTopic).matches()) {
// newTopics.add(topic);
return mdgraph.processed();
//
//
//
// List<Edge<List<String>>> edges = new ArrayList<>();
// List<String> matchedtopics = null;
//
// for (int i = 0; i < ordered.size()-1; i++) {
// MarkdownInfo mdHavingGlobs = ordered.get(i);
// List<Pattern> topicGlobs = mdHavingGlobs.getTopicGlobs();
//
// for (Pattern topicGlob : topicGlobs) {
// for (int matchidx = i+1; matchidx < ordered.size(); matchidx++) {
// MarkdownInfo matchableContent = ordered.get(matchidx);
// List<String> matchableTopics = matchableContent.getTopics();
// for (String matchableTopic : matchableTopics) {
// if (topicGlob.matcher(matchableTopic).matches()) {
// matchedtopics=matchedtopics==null ? new ArrayList<>() : matchedtopics;
// matchedtopics.add(matchableTopic);
// logger.debug("added topic=" + matchableTopic + " to " + i + "->" + matchidx + " with " + topicGlob);
// }
// }
// } else {
// newTopics.add(topic);
// if (matchedtopics!=null) {
// matchedtopics.addAll(mdHavingGlobs.getTopics());
// ordered.set(i,mdHavingGlobs.withTopics(matchedtopics));
// logger.debug("assigned new mdinfo");
// matchedtopics=null;
// }
// }
// }
// fm.setTopics(newTopics);
// }
//
// // create topic to content map
// HashMap<String,List<ParsedMarkdown>> contentByTopic = new HashMap<>();
// for (ParsedMarkdown parsedMarkdown : markdownInfos) {
// for (String topic : parsedMarkdown.getFrontmatter().getTopics()) {
// contentByTopic.computeIfAbsent(topic, t -> new ArrayList<>()).add(parsedMarkdown);
// }
// }
// // TODO track and warn if a glob doesn't match anything
// for (int j = i+1; j < ordered.size(); j++) {
//
// ListIterator<? extends MarkdownInfo> lit = markdownInfos.listIterator();
// while (lit.hasNext()) {
// MarkdownInfo mif = lit.next();
// if (mif.hasAggregations()) {
// lit.remove();
// mif = new CompositeMarkdownInfo().add(mif);
// lit.add(mif);
// }
// }
// MarkdownInfo mdHavingTopics = ordered.get(j);
// List<String> topics = mdHavingTopics.getTopics();
//
// // combine aggregate targets
// for (ParsedMarkdown parsedMarkdown : markdownInfos) {
// List<Pattern> aggregations = parsedMarkdown.getFrontmatter().getAggregations();
// if (aggregations.size()>0) {
// for (Pattern aggregation : aggregations) {
// for (Pattern topicGlob : topicGlobs) {
//
// for (String topic : topics) {
// if (topicGlob.matcher(topic).matches()) {
// matchedtopics=matchedtopics==null ? new ArrayList<>() : matchedtopics;
// matchedtopics.add(topic);
// logger.debug("added topic=" + topic + " to " + i + "->" + j + " with " + topicGlob);
// }
// }
// if (matchedtopics!=null) {
// matchedtopics.addAll(mdHavingGlobs.getTopics());
// ordered.set(i,mdHavingGlobs.withTopics(matchedtopics));
// logger.debug("assigned new mdinfo");
// }
// }
// }
// }
//
// // Assign glob topics
// int loopsremaining=100;
//
// // Assign content aggregates
// System.out.println("topics: " + topicSets);
// // Assign glob topics to non-glob topics that match
//
// aggregated.addAll(markdownInfos);
return aggregated;
//// for (MarkdownInfo parsedMarkdown : markdownInfos) {
//// FrontMatterInfo fm = parsedMarkdown.getFrontmatter();
//// Set<String> topics = fm.getTopics();
//// Set<String> newTopics = new HashSet<>();
//// for (String topic : topics) {
//// if (isPattern(topic)) {
//// Pattern p = Pattern.compile(topic);
//// for (String nonGlobTopic : nonGlobTopics) {
//// if (p.matcher(nonGlobTopic).matches()) {
//// newTopics.add(topic);
//// }
//// }
//// } else {
//// newTopics.add(topic);
//// }
//// }
//// fm.setTopics(newTopics);
//// }
////
//// // create topic to content map
//// HashMap<String,List<ParsedMarkdown>> contentByTopic = new HashMap<>();
//// for (ParsedMarkdown parsedMarkdown : markdownInfos) {
//// for (String topic : parsedMarkdown.getFrontmatter().getTopics()) {
//// contentByTopic.computeIfAbsent(topic, t -> new ArrayList<>()).add(parsedMarkdown);
//// }
//// }
////
//// ListIterator<? extends MarkdownInfo> lit = markdownInfos.listIterator();
//// while (lit.hasNext()) {
//// MarkdownInfo mif = lit.next();
//// if (mif.hasAggregations()) {
//// lit.remove();
//// mif = new CompositeMarkdownInfo().add(mif);
//// lit.add(mif);
//// }
//// }
////
//// // combine aggregate targets
//// for (ParsedMarkdown parsedMarkdown : markdownInfos) {
//// List<Pattern> aggregations = parsedMarkdown.getFrontmatter().getAggregations();
//// if (aggregations.size()>0) {
//// for (Pattern aggregation : aggregations) {
////
//// }
//// }
//// }
////
//// // Assign glob topics
////
//// // Assign content aggregates
//// System.out.println("topics: " + topicSets);
////
//// aggregated.addAll(markdownInfos);
// return aggregated;
}

View File

@@ -52,11 +52,23 @@ public class ParsedFrontMatter implements FrontMatterInfo {
for (String topic : topics) {
Collections.addAll(topicSet, topic.split(", *"));
}
topicSet.addAll(topics);
// topicSet.addAll(topics);
}
return topicSet;
}
@Override
public List<String> getIncluded() {
List<String> included = data.get(FrontMatterInfo.INCLUDED);
List<String> includedList = new ArrayList<>();
if (included!=null) {
for (String s : included) {
Collections.addAll(includedList, s.split(", *"));
}
}
return includedList;
}
@Override
public List<Pattern> getAggregations() {
if (!data.containsKey(FrontMatterInfo.AGGREGATE)) {
@@ -104,10 +116,30 @@ public class ParsedFrontMatter implements FrontMatterInfo {
return new ParsedFrontMatter(newmap);
}
public ParsedFrontMatter withIncluded(List<String> included) {
HashMap<String, List<String>> newmap = new HashMap<>();
newmap.putAll(this.data);
newmap.put(FrontMatterInfo.INCLUDED,included);
return new ParsedFrontMatter(newmap);
}
@Override
public String toString() {
return "ParsedFrontMatter{" +
"data=" + data +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ParsedFrontMatter that = (ParsedFrontMatter) o;
return Objects.equals(data, that.data);
}
@Override
public int hashCode() {
return Objects.hash(data);
}
}

View File

@@ -11,10 +11,7 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* TODO: Make this a value type
@@ -93,9 +90,27 @@ public class ParsedMarkdown implements MarkdownInfo, HasDiagnostics {
return new ParsedMarkdown(frontMatter.withTopics(assigning), this.content);
}
public MarkdownInfo withIncluded(List<String> included) {
return new ParsedMarkdown(frontMatter.withIncluded(included), this.content);
}
@Override
public String toString() {
return "ParsedMarkdown/" +
frontMatter.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ParsedMarkdown that = (ParsedMarkdown) o;
return Objects.equals(frontMatter, that.frontMatter) &&
Objects.equals(content, that.content);
}
@Override
public int hashCode() {
return Objects.hash(frontMatter, content);
}
}

View File

@@ -16,7 +16,10 @@ public interface FrontMatterInfo {
String TOPICS = "topics";
String WEIGHT = "weight";
String TITLE = "title";
Set<String> FrontMatterKeyWords = Set.of(SCOPES, AGGREGATE,TOPICS,WEIGHT,TITLE);
String INCLUDED = "included";
Set<String> FrontMatterKeyWords =
Set.of(SCOPES, AGGREGATE,TOPICS,WEIGHT, TITLE,INCLUDED);
/**
@@ -50,6 +53,18 @@ public interface FrontMatterInfo {
*/
Set<String> getTopics();
/**
* <p>If content is included in an item from another topic, then the
* topic name with which the additional content was added is in the
* inclueded list of topics.</p>
*
* <p>This is distinct from {@link #getTopics()}, which is not modified
* by the included topic names.</p>
*
* @return A list of included topics.
*/
List<String> getIncluded();
/**
* <p>
* Aggregation patterns coalesce all the topics that they match into a seamless logical

View File

@@ -1,13 +1,15 @@
package io.nosqlbench.nb.api.markdown.types;
import io.nosqlbench.nb.api.markdown.aggregator.CompositeMarkdownInfo;
import io.nosqlbench.nb.api.markdown.types.FrontMatterInfo;
import org.jetbrains.annotations.NotNull;
import java.nio.file.Path;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public interface MarkdownInfo {
public interface MarkdownInfo extends Comparable<MarkdownInfo> {
Path getPath();
@@ -18,27 +20,53 @@ public interface MarkdownInfo {
boolean hasAggregations();
default boolean hasTopicGlobs() {
return getTopicGlobs().size()>0;
return getTopicGlobs().size() > 0;
}
default List<Pattern> getTopicGlobs() {
return getFrontmatter().getTopics().stream()
.filter(t -> t.startsWith("^") || t.endsWith("$") || t.contains(".*") || t.contains(".+"))
.map(Pattern::compile)
.collect(Collectors.toList());
List<Pattern> pattern = getFrontmatter().getTopics().stream()
.filter(t -> t.startsWith("^") || t.endsWith("$") || t.contains(".*") || t.contains(".+"))
.map(Pattern::compile)
.collect(Collectors.toList());
return pattern;
}
default List<String> getTopics() {
return getFrontmatter().getTopics().stream()
.filter(t -> !t.startsWith("^") && !t.endsWith("$") && !t.contains(".*") && !t.contains(".+"))
.collect(Collectors.toList());
.filter(t -> !t.startsWith("^") && !t.endsWith("$") && !t.contains(".*") && !t.contains(".+"))
.collect(Collectors.toList());
}
default boolean hasAggregators() {
return getFrontmatter().getAggregations().size()>0;
default List<String> getIncluded() {
return getFrontmatter().getIncluded();
}
default boolean hasAggregators() {
return getFrontmatter().getAggregations().size() > 0;
}
default List<Pattern> getAggregators() {
return getFrontmatter().getAggregations();
}
MarkdownInfo withTopics(List<String> assigning);
default int compareTo(@NotNull MarkdownInfo o) {
int diff = getFrontmatter().getWeight() - o.getFrontmatter().getWeight();
if (diff != 0) return diff;
diff = getFrontmatter().getTitle().compareTo(o.getFrontmatter().getTitle());
if (diff!=0) return diff;
diff = getBody().compareTo(o.getBody());
return diff;
}
default boolean matchesTopicPattern(Pattern pattern) {
return getTopics().stream().anyMatch(t -> pattern.matcher(t).matches());
}
MarkdownInfo withIncluded(List<String> included);
}

View File

@@ -0,0 +1,10 @@
---
title: srcmain-Entry 1-1
weight: 37
topics: entries/entry2-1, related-topic-for-entry1-1
aggregate: topic
---
# Title Heading for srcmain-Entry 1-1

View File

@@ -0,0 +1,8 @@
---
title: srcmain-Entry 2-1-L
weight: 39
---
# Title Heading for srcmain-Entry 2-1-L

View File

@@ -1,19 +1,87 @@
package io.nosqlbench.nb.api.markdown.aggregator;
import io.nosqlbench.nb.api.content.PathContent;
import io.nosqlbench.nb.api.markdown.types.MarkdownInfo;
import org.junit.Test;
import java.util.List;
import java.net.URL;
import java.nio.file.*;
import java.util.*;
import java.util.stream.Collectors;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.from;
public class MarkdownDocsTest {
@Test
public void testLoadMarkdown() {
List<MarkdownInfo> all = MarkdownDocs.findAll();
// assertThat(all).hasSizeGreaterThan(0);
List<MarkdownInfo> processed = MarkdownDocs.findAll();
List<MarkdownInfo> expected = fromRaw("docs-for-testing-logical");
Map<Path, MarkdownInfo> processedPaths = processed.stream().collect(Collectors.toMap(MarkdownInfo::getPath, v -> v));
Map<Path, MarkdownInfo> expectedPaths = expected.stream().collect(Collectors.toMap(MarkdownInfo::getPath, v -> v));
for (Path path : expectedPaths.keySet()) {
System.out.println("expected path:" + path.toString());
}
Set<Path> missingPaths = new HashSet<>();
for (Path path : expectedPaths.keySet()) {
if (!processedPaths.containsKey(path)) {
missingPaths.add(path);
}
}
Set<Path> extraPaths = new HashSet<>();
for (Path path : processedPaths.keySet()) {
if (!expectedPaths.containsKey(path)) {
extraPaths.add(path);
}
}
for (MarkdownInfo markdownInfo : processed) {
Path path = markdownInfo.getPath();
}
assertThat(missingPaths).isEmpty();
assertThat(extraPaths).isEmpty();
}
private List<MarkdownInfo> fromRaw(String parentPath) {
List<MarkdownInfo> fromraw = new ArrayList<>();
List<Path> postpaths = getSubPaths("docs-for-testing-logical");
for (Path postpath : postpaths) {
PathContent content = new PathContent(postpath);
ParsedMarkdown parsedMarkdown = new ParsedMarkdown(content);
fromraw.add(parsedMarkdown);
}
Collections.sort(fromraw);
return fromraw;
}
private static List<Path> getSubPaths(String resourcePath) {
List<Path> subpaths = new ArrayList<>();
try {
Enumeration<URL> resources =
MarkdownDocsTest.class.getClassLoader().getResources(resourcePath);
while (resources.hasMoreElements()) {
URL url = resources.nextElement();
System.out.println("url="+url.toExternalForm());
Path path = Paths.get(url.toURI());
Files.walk(path, FileVisitOption.FOLLOW_LINKS)
.filter(p -> !Files.isDirectory(p, LinkOption.NOFOLLOW_LINKS))
.forEach(subpaths::add);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return subpaths;
}
}