Fix text builder did not respect wide/fullwidth characters for textwrap.

2025-02-25 18:55:22 -06:00 · 2013-02-07 03:34:51 +00:00 · 2013-02-07 03:34:51 +00:00 · 00fa1b2505
commit 00fa1b2505
parent b8296ad11e
3 changed files with 116 additions and 1 deletions
--- a/3
+++ b/3
@ -1,7 +1,8 @@
 Release 1.2 (in development)
 ============================

-* Fix text builder did not respect wide/fullwidth charactors.
+* Fix text builder did not respect wide/fullwidth characters:
+  title underline width, table layout width and text wrap width.

 * #1062: sphinx.ext.autodoc use __init__ method signature for class signature.

--- a/sphinx/writers/text.py
+++ b/sphinx/writers/text.py
@ -11,6 +11,7 @@
 import os
 import re
 import textwrap
+from itertools import groupby

 from docutils import nodes, writers
 from docutils.utils import column_width
@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper):
        r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words
        r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash

+    def _wrap_chunks(self, chunks):
+        """_wrap_chunks(chunks : [string]) -> [string]
+
+        Original _wrap_chunks use len() to calculate width.
+        This method respect to wide/fullwidth characters for width adjustment.
+        """
+        drop_whitespace = getattr(self, 'drop_whitespace', True)  #py25 compat
+        lines = []
+        if self.width <= 0:
+            raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+        chunks.reverse()
+
+        while chunks:
+            cur_line = []
+            cur_len = 0
+
+            if lines:
+                indent = self.subsequent_indent
+            else:
+                indent = self.initial_indent
+
+            width = self.width - column_width(indent)
+
+            if drop_whitespace and chunks[-1].strip() == '' and lines:
+                del chunks[-1]
+
+            while chunks:
+                l = column_width(chunks[-1])
+
+                if cur_len + l <= width:
+                    cur_line.append(chunks.pop())
+                    cur_len += l
+
+                else:
+                    break
+
+            if chunks and column_width(chunks[-1]) > width:
+                self._handle_long_word(chunks, cur_line, cur_len, width)
+
+            if drop_whitespace and cur_line and cur_line[-1].strip() == '':
+                del cur_line[-1]
+
+            if cur_line:
+                lines.append(indent + ''.join(cur_line))
+
+        return lines
+
+    def _break_word(self, word, space_left):
+        """_break_word(word : string, space_left : int) -> (string, string)
+
+        Break line by unicode width instead of len(word).
+        """
+        total = 0
+        for i,c in enumerate(word):
+            total += column_width(c)
+            if total > space_left:
+                return word[:i-1], word[i-1:]
+        return word, ''
+
+    def _split(self, text):
+        """_split(text : string) -> [string]
+
+        Override original method that only split by 'wordsep_re'.
+        This '_split' split wide-characters into chunk by one character.
+        """
+        split = lambda t: textwrap.TextWrapper._split(self, t)
+        chunks = []
+        for chunk in split(text):
+            for w, g in groupby(chunk, column_width):
+                if w == 1:
+                    chunks.extend(split(''.join(g)))
+                else:
+                    chunks.extend(list(g))
+        return chunks
+
+    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+        """_handle_long_word(chunks : [string],
+                             cur_line : [string],
+                             cur_len : int, width : int)
+
+        Override original method for using self._break_word() instead of slice.
+        """
+        space_left = max(width - cur_len, 1)
+        if self.break_long_words:
+            l, r = self._break_word(reversed_chunks[-1], space_left)
+            cur_line.append(l)
+            reversed_chunks[-1] = r
+
+        elif not cur_line:
+            cur_line.append(reversed_chunks.pop())
+

 MAXWIDTH = 70
 STDINDENT = 3
--- a/tests/test_build_text.py
+++ b/tests/test_build_text.py
@ -12,6 +12,7 @@
 from textwrap import dedent

 from docutils.utils import column_width
+from sphinx.writers.text import MAXWIDTH

 from util import *

@ -63,3 +64,23 @@ def test_multibyte_table(app):
    lines = [line.strip() for line in result.splitlines() if line.strip()]
    line_widths = [column_width(line) for line in lines]
    assert len(set(line_widths)) == 1  # same widths
+
+
+@with_text_app()
+def test_multibyte_maxwidth(app):
+    sb_text = u'abc'  #length=3
+    mb_text = u'\u65e5\u672c\u8a9e'  #length=3
+
+    sb_line = ' '.join([sb_text] * int(MAXWIDTH / 3))
+    mb_line = ' '.join([mb_text] * int(MAXWIDTH / 3))
+    mix_line = ' '.join([sb_text, mb_text] * int(MAXWIDTH / 6))
+
+    contents = u'\n\n'.join((sb_line, mb_line, mix_line))
+
+    (app.srcdir / 'contents.rst').write_text(contents, encoding='utf-8')
+    app.builder.build_all()
+    result = (app.outdir / 'contents.txt').text(encoding='utf-8')
+
+    lines = [line.strip() for line in result.splitlines() if line.strip()]
+    line_widths = [column_width(line) for line in lines]
+    assert max(line_widths) < MAXWIDTH