mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
merge with version-tracking
This commit is contained in:
@@ -10,12 +10,9 @@
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
from uuid import uuid4
|
||||
from operator import itemgetter
|
||||
from collections import defaultdict
|
||||
from itertools import product
|
||||
try:
|
||||
from itertools import izip_longest as zip_longest
|
||||
except ImportError:
|
||||
from itertools import zip_longest
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from sphinx.util import PeekableIterator
|
||||
|
||||
@@ -34,19 +31,6 @@ def add_uids(doctree, condition):
|
||||
node.uid = uuid4().hex
|
||||
yield node
|
||||
|
||||
def merge_node(old, new):
|
||||
"""
|
||||
Merges the `old` node with the `new` one, if it's successful the `new` node
|
||||
get's the unique identifier of the `new` one and ``True`` is returned. If
|
||||
the merge is unsuccesful ``False`` is returned.
|
||||
"""
|
||||
equals, changed, replaced = make_diff(old.rawsource,
|
||||
new.rawsource)
|
||||
if equals or changed:
|
||||
new.uid = old.uid
|
||||
return True
|
||||
return False
|
||||
|
||||
def merge_doctrees(old, new, condition):
|
||||
"""
|
||||
Merges the `old` doctree with the `new` one while looking at nodes matching
|
||||
@@ -58,78 +42,34 @@ def merge_doctrees(old, new, condition):
|
||||
:param condition:
|
||||
A callable which returns either ``True`` or ``False`` for a given node.
|
||||
"""
|
||||
old_iter = PeekableIterator(old.traverse(condition))
|
||||
new_iter = PeekableIterator(new.traverse(condition))
|
||||
old_nodes = []
|
||||
new_nodes = []
|
||||
for old_node, new_node in zip_longest(old_iter, new_iter):
|
||||
if old_node is None:
|
||||
new_nodes.append(new_node)
|
||||
old_nodes = old.traverse(condition)
|
||||
new_nodes = new.traverse(condition)
|
||||
ratios = defaultdict(list)
|
||||
for old_node, new_node in product(old_nodes, new_nodes):
|
||||
ratios[old_node, new_node] = get_ratio(old_node.rawsource,
|
||||
new_node.rawsource)
|
||||
ratios = sorted(ratios.iteritems(), key=itemgetter(1))
|
||||
seen = set()
|
||||
for (old_node, new_node), ratio in ratios:
|
||||
if new_node in seen:
|
||||
continue
|
||||
if new_node is None:
|
||||
old_nodes.append(old_node)
|
||||
continue
|
||||
if not merge_node(old_node, new_node):
|
||||
if old_nodes:
|
||||
for i, very_old_node in enumerate(old_nodes):
|
||||
if merge_node(very_old_node, new_node):
|
||||
del old_nodes[i]
|
||||
# If the last identified node which has not matched the
|
||||
# unidentified node matches the current one, we have to
|
||||
# assume that the last unidentified one has been
|
||||
# inserted.
|
||||
#
|
||||
# As the required time multiplies with each insert, we
|
||||
# want to avoid that by checking if the next
|
||||
# unidentified node matches the current identified one
|
||||
# and if so we make a shift.
|
||||
if i == len(old_nodes):
|
||||
next_new_node = new_iter.next()
|
||||
if not merge_node(old_node, next_new_node):
|
||||
new_iter.push(next_new_node)
|
||||
break
|
||||
else:
|
||||
old_nodes.append(old_node)
|
||||
new_nodes.append(new_node)
|
||||
for (i, new_node), (j, old_node) in product(enumerate(new_nodes),
|
||||
enumerate(old_nodes)):
|
||||
if merge_node(old_node, new_node):
|
||||
del new_nodes[i]
|
||||
del old_nodes[j]
|
||||
for node in new_nodes:
|
||||
node.uid = uuid4().hex
|
||||
# Yielding the new nodes here makes it possible to use this generator
|
||||
# like add_uids
|
||||
yield node
|
||||
else:
|
||||
seen.add(new_node)
|
||||
if ratio < 65:
|
||||
new_node.uid = old_node.uid
|
||||
else:
|
||||
new_node.uid = uuid4().hex
|
||||
yield new_node
|
||||
|
||||
def make_diff(old, new):
|
||||
def get_ratio(old, new):
|
||||
"""
|
||||
Takes two strings `old` and `new` and returns a :class:`tuple` of boolean
|
||||
values ``(equals, changed, replaced)``.
|
||||
|
||||
equals
|
||||
|
||||
``True`` if the `old` string and the `new` one are equal.
|
||||
|
||||
changed
|
||||
|
||||
``True`` if the `new` string is a changed version of the `old` one.
|
||||
|
||||
replaced
|
||||
|
||||
``True`` if the `new` string and the `old` string are totally
|
||||
different.
|
||||
|
||||
.. note:: This assumes the two strings are human readable text or at least
|
||||
something very similar to that, otherwise it can not detect if
|
||||
the string has been changed or replaced. In any case the
|
||||
detection should not be considered reliable.
|
||||
Returns a "similiarity ratio" representing the similarity between the two
|
||||
strings where 0 is equal and anything above less than equal.
|
||||
"""
|
||||
if old == new:
|
||||
return True, False, False
|
||||
if new in old or levenshtein_distance(old, new) / (len(old) / 100.0) < 70:
|
||||
return False, True, False
|
||||
return False, False, True
|
||||
return 0
|
||||
ratio = levenshtein_distance(old, new) / (len(old) / 100.0)
|
||||
return ratio
|
||||
|
||||
def levenshtein_distance(a, b):
|
||||
if len(a) < len(b):
|
||||
|
||||
@@ -88,7 +88,7 @@ class path(str):
|
||||
"""
|
||||
shutil.rmtree(self, ignore_errors=ignore_errors, onerror=onerror)
|
||||
|
||||
def copytree(self, destination, symlinks=False, ignore=None):
|
||||
def copytree(self, destination, symlinks=False):
|
||||
"""
|
||||
Recursively copy a directory to the given `destination`. If the given
|
||||
`destination` does not exist it will be created.
|
||||
@@ -102,7 +102,7 @@ class path(str):
|
||||
A callback which gets called with the path of the directory being
|
||||
copied and a list of paths as returned by :func:`os.listdir`.
|
||||
"""
|
||||
shutil.copytree(self, destination, symlinks=symlinks, ignore=ignore)
|
||||
shutil.copytree(self, destination, symlinks=symlinks)
|
||||
|
||||
def movetree(self, destination):
|
||||
"""
|
||||
|
||||
@@ -10,3 +10,4 @@ Versioning Stuff
|
||||
deleted_end
|
||||
modified
|
||||
insert_beginning
|
||||
insert_similar
|
||||
|
||||
17
tests/root/versioning/insert_similar.txt
Normal file
17
tests/root/versioning/insert_similar.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
Versioning test text
|
||||
====================
|
||||
|
||||
So the thing is I need some kind of text - not the lorem ipsum stuff, that
|
||||
doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find
|
||||
a good text for that under public domain so I thought the easiest solution is
|
||||
to write one by myself. It's not really interesting, in fact it is *really*
|
||||
boring.
|
||||
|
||||
Anyway I need more
|
||||
|
||||
Anyway I need more than one paragraph, at least three for the original
|
||||
document, I think, and another one for two different ones.
|
||||
|
||||
So the previous paragraph was a bit short because I don't want to test this
|
||||
only on long paragraphs, I hope it was short enough to cover most stuff.
|
||||
Anyway I see this lacks ``some markup`` so I have to add a **little** bit.
|
||||
@@ -111,3 +111,12 @@ def test_insert_beginning():
|
||||
assert len(uids) == 4
|
||||
assert original_uids == uids[1:]
|
||||
assert original_uids[0] != uids[0]
|
||||
|
||||
def test_insert_similar():
|
||||
insert_similar = doctrees['versioning/insert_similar']
|
||||
new_nodes = list(merge_doctrees(original, insert_similar, is_paragraph))
|
||||
uids = [n.uid for n in insert_similar.traverse(is_paragraph)]
|
||||
assert len(new_nodes) == 1
|
||||
assert new_nodes[0].rawsource == u'Anyway I need more'
|
||||
assert original_uids[0] == uids[0]
|
||||
assert original_uids[1:] == uids[2:]
|
||||
|
||||
Reference in New Issue
Block a user