diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index ec48009f4..a434f3a82 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -19,6 +19,7 @@ import posixpath import traceback from os import path from codecs import open +from collections import deque import docutils from docutils.utils import relative_path @@ -297,3 +298,38 @@ def format_exception_cut_frames(x=1): res += tbres[-x:] res += traceback.format_exception_only(typ, val) return ''.join(res) + +class PeekableIterator(object): + """ + An iterator which wraps any iterable and makes it possible to peek to see + what's the next item. + """ + def __init__(self, iterable): + self.remaining = deque() + self._iterator = iter(iterable) + + def __iter__(self): + return self + + def next(self): + """ + Returns the next item from the iterator. + """ + if self.remaining: + return self.remaining.popleft() + return self._iterator.next() + + def push(self, item): + """ + Pushes the `item` on the internal stack, it will be returned on the + next :meth:`next` call. + """ + self.remaining.append(item) + + def peek(self): + """ + Returns the next item without changing the state of the iterator. + """ + item = self.next() + self.push(item) + return item diff --git a/sphinx/versioning.py b/sphinx/versioning.py new file mode 100644 index 000000000..42b8bd515 --- /dev/null +++ b/sphinx/versioning.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" + sphinx.versioning + ~~~~~~~~~~~~~~~~~ + + Implements the low-level algorithms Sphinx uses for the versioning of + doctrees. + + :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from uuid import uuid4 +from itertools import izip_longest, product +from difflib import SequenceMatcher + +from sphinx.util import PeekableIterator + +def add_uids(doctree, condition): + """ + Adds a unique id to every node in the `doctree` which matches the condition + and yields it. + + :param doctree: + A :class:`docutils.nodes.document` instance. + + :param condition: + A callable which returns either ``True`` or ``False`` for a given node. + """ + for node in doctree.traverse(condition): + node.uid = uuid4().hex + yield node + +def merge_node(old, new): + """ + Merges the `old` node with the `new` one, if it's successful the `new` node + get's the unique identifier of the `new` one and ``True`` is returned. If + the merge is unsuccesful ``False`` is returned. + """ + equals, changed, replaced = make_diff(old.rawsource, + new.rawsource) + if equals or changed: + new.uid = old.uid + return True + return False + +def merge_doctrees(old, new, condition): + """ + Merges the `old` doctree with the `new` one while looking at nodes matching + the `condition`. + + Each node which replaces another one or has been added to the `new` doctree + will be yielded. + + :param condition: + A callable which returns either ``True`` or ``False`` for a given node. + """ + old_iter = PeekableIterator(old.traverse(condition)) + new_iter = PeekableIterator(new.traverse(condition)) + old_nodes = [] + new_nodes = [] + for old_node, new_node in izip_longest(old_iter, new_iter): + if old_node is None: + new_nodes.append(new_node) + continue + if new_node is None: + old_nodes.append(old_node) + continue + if not merge_node(old_node, new_node): + if old_nodes: + for i, very_old_node in enumerate(old_nodes): + if merge_node(very_old_node, new_node): + del old_nodes[i] + # If the last identified node which has not matched the + # unidentified node matches the current one, we have to + # assume that the last unidentified one has been + # inserted. + # + # As the required time multiplies with each insert, we + # want to avoid that by checking if the next + # unidentified node matches the current identified one + # and if so we make a shift. + if i == len(old_nodes): + next_new_node = new_iter.next() + if not merge_node(old_node, next_new_node): + new_iter.push(next_new_node) + break + else: + old_nodes.append(old_node) + new_nodes.append(new_node) + for (i, new_node), (j, old_node) in product(enumerate(new_nodes), + enumerate(old_nodes)): + if merge_node(old_node, new_node): + del new_nodes[i] + del old_nodes[j] + for node in new_nodes: + node.uid = uuid4().hex + # Yielding the new nodes here makes it possible to use this generator + # like add_uids + yield node + +def make_diff(old, new): + """ + Takes two strings `old` and `new` and returns a :class:`tuple` of boolean + values ``(equals, changed, replaced)``. + + equals + + ``True`` if the `old` string and the `new` one are equal. + + changed + + ``True`` if the `new` string is a changed version of the `old` one. + + replaced + + ``True`` if the `new` string and the `old` string are totally + different. + + .. note:: This assumes the two strings are human readable text or at least + something very similar to that, otherwise it can not detect if + the string has been changed or replaced. In any case the + detection should not be considered reliable. + """ + if old == new: + return True, False, False + if new in old or levenshtein_distance(old, new) / (len(old) / 100.0) < 70: + return False, True, False + return False, False, True + +def levenshtein_distance(a, b): + if len(a) < len(b): + a, b = b, a + if not a: + return len(b) + previous_row = xrange(len(b) + 1) + for i, column1 in enumerate(a): + current_row = [i + 1] + for j, column2 in enumerate(b): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (column1 != column2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + return previous_row[-1] diff --git a/tests/root/contents.txt b/tests/root/contents.txt index e052e04b2..280953b46 100644 --- a/tests/root/contents.txt +++ b/tests/root/contents.txt @@ -26,6 +26,7 @@ Contents: extensions doctest extensions + versioning/index Python diff --git a/tests/root/versioning/added.txt b/tests/root/versioning/added.txt new file mode 100644 index 000000000..22a70739c --- /dev/null +++ b/tests/root/versioning/added.txt @@ -0,0 +1,20 @@ +Versioning test text +==================== + +So the thing is I need some kind of text - not the lorem ipsum stuff, that +doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find +a good text for that under public domain so I thought the easiest solution is +to write one by myself. It's not really interesting, in fact it is *really* +boring. + +Anyway I need more than one paragraph, at least three for the original +document, I think, and another one for two different ones. + +So the previous paragraph was a bit short because I don't want to test this +only on long paragraphs, I hope it was short enough to cover most stuff. +Anyway I see this lacks ``some markup`` so I have to add a **little** bit. + +Woho another paragraph, if this test fails we really have a problem because +this means the algorithm itself fails and not the diffing algorithm which is +pretty much doomed anyway as it probably fails for some kind of language +respecting certain nodes anyway but we can't work around that anyway. diff --git a/tests/root/versioning/deleted.txt b/tests/root/versioning/deleted.txt new file mode 100644 index 000000000..a1a9c4c91 --- /dev/null +++ b/tests/root/versioning/deleted.txt @@ -0,0 +1,12 @@ +Versioning test text +==================== + +So the thing is I need some kind of text - not the lorem ipsum stuff, that +doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find +a good text for that under public domain so I thought the easiest solution is +to write one by myself. It's not really interesting, in fact it is *really* +boring. + +So the previous paragraph was a bit short because I don't want to test this +only on long paragraphs, I hope it was short enough to cover most stuff. +Anyway I see this lacks ``some markup`` so I have to add a **little** bit. diff --git a/tests/root/versioning/deleted_end.txt b/tests/root/versioning/deleted_end.txt new file mode 100644 index 000000000..f30e63007 --- /dev/null +++ b/tests/root/versioning/deleted_end.txt @@ -0,0 +1,11 @@ +Versioning test text +==================== + +So the thing is I need some kind of text - not the lorem ipsum stuff, that +doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find +a good text for that under public domain so I thought the easiest solution is +to write one by myself. It's not really interesting, in fact it is *really* +boring. + +Anyway I need more than one paragraph, at least three for the original +document, I think, and another one for two different ones. diff --git a/tests/root/versioning/index.txt b/tests/root/versioning/index.txt new file mode 100644 index 000000000..234e223f1 --- /dev/null +++ b/tests/root/versioning/index.txt @@ -0,0 +1,11 @@ +Versioning Stuff +================ + +.. toctree:: + + original + added + insert + deleted + deleted_end + modified diff --git a/tests/root/versioning/insert.txt b/tests/root/versioning/insert.txt new file mode 100644 index 000000000..1c157cc90 --- /dev/null +++ b/tests/root/versioning/insert.txt @@ -0,0 +1,18 @@ +Versioning test text +==================== + +So the thing is I need some kind of text - not the lorem ipsum stuff, that +doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find +a good text for that under public domain so I thought the easiest solution is +to write one by myself. It's not really interesting, in fact it is *really* +boring. + +So this paragraph is just something I inserted in this document to test if our +algorithm notices that this paragraph is not just a changed version. + +Anyway I need more than one paragraph, at least three for the original +document, I think, and another one for two different ones. + +So the previous paragraph was a bit short because I don't want to test this +only on long paragraphs, I hope it was short enough to cover most stuff. +Anyway I see this lacks ``some markup`` so I have to add a **little** bit. diff --git a/tests/root/versioning/modified.txt b/tests/root/versioning/modified.txt new file mode 100644 index 000000000..49cdad935 --- /dev/null +++ b/tests/root/versioning/modified.txt @@ -0,0 +1,17 @@ +Versioning test text +==================== + +So the thing is I need some kind of text - not the lorem ipsum stuff, that +doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find +a good text for that under public domain so I thought the easiest solution is +to write one by myself. Inserting something silly as a modification, btw. have +you seen the typo below?. It's not really interesting, in fact it is *really* +boring. + +Anyway I need more than one paragraph, at least three for the original +document, I think, and another one for two different ones. So this is a small +modification by adding something to this paragraph. + +So the previous paragraph was a bit short because I don't want to test this +only on long paragraphs, I hoep it was short enough to cover most stuff. +Anyway I see this lacks ``some markup`` so I have to add a **little** bit. diff --git a/tests/root/versioning/original.txt b/tests/root/versioning/original.txt new file mode 100644 index 000000000..b3fe06094 --- /dev/null +++ b/tests/root/versioning/original.txt @@ -0,0 +1,15 @@ +Versioning test text +==================== + +So the thing is I need some kind of text - not the lorem ipsum stuff, that +doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find +a good text for that under public domain so I thought the easiest solution is +to write one by myself. It's not really interesting, in fact it is *really* +boring. + +Anyway I need more than one paragraph, at least three for the original +document, I think, and another one for two different ones. + +So the previous paragraph was a bit short because I don't want to test this +only on long paragraphs, I hope it was short enough to cover most stuff. +Anyway I see this lacks ``some markup`` so I have to add a **little** bit. diff --git a/tests/test_versioning.py b/tests/test_versioning.py new file mode 100644 index 000000000..77306580c --- /dev/null +++ b/tests/test_versioning.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" + test_versioning + ~~~~~~~~~~~~~~~ + + Test the versioning implementation. + + :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from util import * + +from docutils.statemachine import ViewList + +from sphinx.versioning import make_diff, add_uids, merge_doctrees + +def setup_module(): + global app, original, original_uids + app = TestApp() + app.builder.env.app = app + app.connect('doctree-resolved', on_doctree_resolved) + app.build() + original = doctrees['versioning/original'] + original_uids = [n.uid for n in add_uids(original, is_paragraph)] + +def teardown_module(): + app.cleanup() + (test_root / '_build').rmtree(True) + +doctrees = {} + +def on_doctree_resolved(app, doctree, docname): + doctrees[docname] = doctree + +def test_make_diff(): + tests = [ + (('aaa', 'aaa'), (True, False, False)), + (('aaa', 'aab'), (False, True, False)), + (('aaa', 'abb'), (False, True, False)), + (('aaa', 'aba'), (False, True, False)), + (('aaa', 'baa'), (False, True, False)), + (('aaa', 'bbb'), (False, False, True)) + ] + for args, result in tests: + assert make_diff(*args) == result + +def is_paragraph(node): + return node.__class__.__name__ == 'paragraph' + +def test_add_uids(): + assert len(original_uids) == 3 + +def test_modified(): + modified = doctrees['versioning/modified'] + new_nodes = list(merge_doctrees(original, modified, is_paragraph)) + uids = [n.uid for n in modified.traverse(is_paragraph)] + assert not new_nodes + assert original_uids == uids + +def test_added(): + added = doctrees['versioning/added'] + new_nodes = list(merge_doctrees(original, added, is_paragraph)) + uids = [n.uid for n in added.traverse(is_paragraph)] + assert len(new_nodes) == 1 + assert original_uids == uids[:-1] + +def test_deleted(): + deleted = doctrees['versioning/deleted'] + new_nodes = list(merge_doctrees(original, deleted, is_paragraph)) + uids = [n.uid for n in deleted.traverse(is_paragraph)] + assert not new_nodes + assert original_uids[::2] == uids + +def test_deleted_end(): + deleted_end = doctrees['versioning/deleted_end'] + new_nodes = list(merge_doctrees(original, deleted_end, is_paragraph)) + uids = [n.uid for n in deleted_end.traverse(is_paragraph)] + assert not new_nodes + assert original_uids[:-1] == uids + +def test_insert(): + insert = doctrees['versioning/insert'] + new_nodes = list(merge_doctrees(original, insert, is_paragraph)) + uids = [n.uid for n in insert.traverse(is_paragraph)] + assert len(new_nodes) == 1 + assert original_uids[0] == uids[0] + assert original_uids[1:] == uids[2:]