From 00fa1b2505adbaec66496ec20fa5952da976496d Mon Sep 17 00:00:00 2001 From: Takayuki Shimizukawa Date: Thu, 7 Feb 2013 03:34:51 +0000 Subject: [PATCH] Fix text builder did not respect wide/fullwidth characters for textwrap. --- CHANGES | 3 +- sphinx/writers/text.py | 93 ++++++++++++++++++++++++++++++++++++++++ tests/test_build_text.py | 21 +++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 8538e0519..33a57e406 100644 --- a/CHANGES +++ b/CHANGES @@ -1,7 +1,8 @@ Release 1.2 (in development) ============================ -* Fix text builder did not respect wide/fullwidth charactors. +* Fix text builder did not respect wide/fullwidth characters: + title underline width, table layout width and text wrap width. * #1062: sphinx.ext.autodoc use __init__ method signature for class signature. diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py index f42d637a3..1f90497e7 100644 --- a/sphinx/writers/text.py +++ b/sphinx/writers/text.py @@ -11,6 +11,7 @@ import os import re import textwrap +from itertools import groupby from docutils import nodes, writers from docutils.utils import column_width @@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper): r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + def _wrap_chunks(self, chunks): + """_wrap_chunks(chunks : [string]) -> [string] + + Original _wrap_chunks use len() to calculate width. + This method respect to wide/fullwidth characters for width adjustment. + """ + drop_whitespace = getattr(self, 'drop_whitespace', True) #py25 compat + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + + chunks.reverse() + + while chunks: + cur_line = [] + cur_len = 0 + + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + width = self.width - column_width(indent) + + if drop_whitespace and chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = column_width(chunks[-1]) + + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + else: + break + + if chunks and column_width(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + + if drop_whitespace and cur_line and cur_line[-1].strip() == '': + del cur_line[-1] + + if cur_line: + lines.append(indent + ''.join(cur_line)) + + return lines + + def _break_word(self, word, space_left): + """_break_word(word : string, space_left : int) -> (string, string) + + Break line by unicode width instead of len(word). + """ + total = 0 + for i,c in enumerate(word): + total += column_width(c) + if total > space_left: + return word[:i-1], word[i-1:] + return word, '' + + def _split(self, text): + """_split(text : string) -> [string] + + Override original method that only split by 'wordsep_re'. + This '_split' split wide-characters into chunk by one character. + """ + split = lambda t: textwrap.TextWrapper._split(self, t) + chunks = [] + for chunk in split(text): + for w, g in groupby(chunk, column_width): + if w == 1: + chunks.extend(split(''.join(g))) + else: + chunks.extend(list(g)) + return chunks + + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): + """_handle_long_word(chunks : [string], + cur_line : [string], + cur_len : int, width : int) + + Override original method for using self._break_word() instead of slice. + """ + space_left = max(width - cur_len, 1) + if self.break_long_words: + l, r = self._break_word(reversed_chunks[-1], space_left) + cur_line.append(l) + reversed_chunks[-1] = r + + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + MAXWIDTH = 70 STDINDENT = 3 diff --git a/tests/test_build_text.py b/tests/test_build_text.py index 63df8ee0a..79edc6230 100644 --- a/tests/test_build_text.py +++ b/tests/test_build_text.py @@ -12,6 +12,7 @@ from textwrap import dedent from docutils.utils import column_width +from sphinx.writers.text import MAXWIDTH from util import * @@ -63,3 +64,23 @@ def test_multibyte_table(app): lines = [line.strip() for line in result.splitlines() if line.strip()] line_widths = [column_width(line) for line in lines] assert len(set(line_widths)) == 1 # same widths + + +@with_text_app() +def test_multibyte_maxwidth(app): + sb_text = u'abc' #length=3 + mb_text = u'\u65e5\u672c\u8a9e' #length=3 + + sb_line = ' '.join([sb_text] * int(MAXWIDTH / 3)) + mb_line = ' '.join([mb_text] * int(MAXWIDTH / 3)) + mix_line = ' '.join([sb_text, mb_text] * int(MAXWIDTH / 6)) + + contents = u'\n\n'.join((sb_line, mb_line, mix_line)) + + (app.srcdir / 'contents.rst').write_text(contents, encoding='utf-8') + app.builder.build_all() + result = (app.outdir / 'contents.txt').text(encoding='utf-8') + + lines = [line.strip() for line in result.splitlines() if line.strip()] + line_widths = [column_width(line) for line in lines] + assert max(line_widths) < MAXWIDTH