Fix text builder did not respect wide/fullwidth characters for textwrap.

This commit is contained in:
Takayuki Shimizukawa 2013-02-07 03:34:51 +00:00
parent b8296ad11e
commit 00fa1b2505
3 changed files with 116 additions and 1 deletions

View File

@ -1,7 +1,8 @@
Release 1.2 (in development)
============================
* Fix text builder did not respect wide/fullwidth charactors.
* Fix text builder did not respect wide/fullwidth characters:
title underline width, table layout width and text wrap width.
* #1062: sphinx.ext.autodoc use __init__ method signature for class signature.

View File

@ -11,6 +11,7 @@
import os
import re
import textwrap
from itertools import groupby
from docutils import nodes, writers
from docutils.utils import column_width
@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper):
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
def _wrap_chunks(self, chunks):
"""_wrap_chunks(chunks : [string]) -> [string]
Original _wrap_chunks use len() to calculate width.
This method respect to wide/fullwidth characters for width adjustment.
"""
drop_whitespace = getattr(self, 'drop_whitespace', True) #py25 compat
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
chunks.reverse()
while chunks:
cur_line = []
cur_len = 0
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
width = self.width - column_width(indent)
if drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]
while chunks:
l = column_width(chunks[-1])
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
else:
break
if chunks and column_width(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
if drop_whitespace and cur_line and cur_line[-1].strip() == '':
del cur_line[-1]
if cur_line:
lines.append(indent + ''.join(cur_line))
return lines
def _break_word(self, word, space_left):
"""_break_word(word : string, space_left : int) -> (string, string)
Break line by unicode width instead of len(word).
"""
total = 0
for i,c in enumerate(word):
total += column_width(c)
if total > space_left:
return word[:i-1], word[i-1:]
return word, ''
def _split(self, text):
"""_split(text : string) -> [string]
Override original method that only split by 'wordsep_re'.
This '_split' split wide-characters into chunk by one character.
"""
split = lambda t: textwrap.TextWrapper._split(self, t)
chunks = []
for chunk in split(text):
for w, g in groupby(chunk, column_width):
if w == 1:
chunks.extend(split(''.join(g)))
else:
chunks.extend(list(g))
return chunks
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
cur_len : int, width : int)
Override original method for using self._break_word() instead of slice.
"""
space_left = max(width - cur_len, 1)
if self.break_long_words:
l, r = self._break_word(reversed_chunks[-1], space_left)
cur_line.append(l)
reversed_chunks[-1] = r
elif not cur_line:
cur_line.append(reversed_chunks.pop())
MAXWIDTH = 70
STDINDENT = 3

View File

@ -12,6 +12,7 @@
from textwrap import dedent
from docutils.utils import column_width
from sphinx.writers.text import MAXWIDTH
from util import *
@ -63,3 +64,23 @@ def test_multibyte_table(app):
lines = [line.strip() for line in result.splitlines() if line.strip()]
line_widths = [column_width(line) for line in lines]
assert len(set(line_widths)) == 1 # same widths
@with_text_app()
def test_multibyte_maxwidth(app):
sb_text = u'abc' #length=3
mb_text = u'\u65e5\u672c\u8a9e' #length=3
sb_line = ' '.join([sb_text] * int(MAXWIDTH / 3))
mb_line = ' '.join([mb_text] * int(MAXWIDTH / 3))
mix_line = ' '.join([sb_text, mb_text] * int(MAXWIDTH / 6))
contents = u'\n\n'.join((sb_line, mb_line, mix_line))
(app.srcdir / 'contents.rst').write_text(contents, encoding='utf-8')
app.builder.build_all()
result = (app.outdir / 'contents.txt').text(encoding='utf-8')
lines = [line.strip() for line in result.splitlines() if line.strip()]
line_widths = [column_width(line) for line in lines]
assert max(line_widths) < MAXWIDTH