mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Fix text builder did not respect wide/fullwidth characters for textwrap.
This commit is contained in:
parent
b8296ad11e
commit
00fa1b2505
3
CHANGES
3
CHANGES
@ -1,7 +1,8 @@
|
||||
Release 1.2 (in development)
|
||||
============================
|
||||
|
||||
* Fix text builder did not respect wide/fullwidth charactors.
|
||||
* Fix text builder did not respect wide/fullwidth characters:
|
||||
title underline width, table layout width and text wrap width.
|
||||
|
||||
* #1062: sphinx.ext.autodoc use __init__ method signature for class signature.
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
import os
|
||||
import re
|
||||
import textwrap
|
||||
from itertools import groupby
|
||||
|
||||
from docutils import nodes, writers
|
||||
from docutils.utils import column_width
|
||||
@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper):
|
||||
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
|
||||
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
||||
|
||||
def _wrap_chunks(self, chunks):
|
||||
"""_wrap_chunks(chunks : [string]) -> [string]
|
||||
|
||||
Original _wrap_chunks use len() to calculate width.
|
||||
This method respect to wide/fullwidth characters for width adjustment.
|
||||
"""
|
||||
drop_whitespace = getattr(self, 'drop_whitespace', True) #py25 compat
|
||||
lines = []
|
||||
if self.width <= 0:
|
||||
raise ValueError("invalid width %r (must be > 0)" % self.width)
|
||||
|
||||
chunks.reverse()
|
||||
|
||||
while chunks:
|
||||
cur_line = []
|
||||
cur_len = 0
|
||||
|
||||
if lines:
|
||||
indent = self.subsequent_indent
|
||||
else:
|
||||
indent = self.initial_indent
|
||||
|
||||
width = self.width - column_width(indent)
|
||||
|
||||
if drop_whitespace and chunks[-1].strip() == '' and lines:
|
||||
del chunks[-1]
|
||||
|
||||
while chunks:
|
||||
l = column_width(chunks[-1])
|
||||
|
||||
if cur_len + l <= width:
|
||||
cur_line.append(chunks.pop())
|
||||
cur_len += l
|
||||
|
||||
else:
|
||||
break
|
||||
|
||||
if chunks and column_width(chunks[-1]) > width:
|
||||
self._handle_long_word(chunks, cur_line, cur_len, width)
|
||||
|
||||
if drop_whitespace and cur_line and cur_line[-1].strip() == '':
|
||||
del cur_line[-1]
|
||||
|
||||
if cur_line:
|
||||
lines.append(indent + ''.join(cur_line))
|
||||
|
||||
return lines
|
||||
|
||||
def _break_word(self, word, space_left):
|
||||
"""_break_word(word : string, space_left : int) -> (string, string)
|
||||
|
||||
Break line by unicode width instead of len(word).
|
||||
"""
|
||||
total = 0
|
||||
for i,c in enumerate(word):
|
||||
total += column_width(c)
|
||||
if total > space_left:
|
||||
return word[:i-1], word[i-1:]
|
||||
return word, ''
|
||||
|
||||
def _split(self, text):
|
||||
"""_split(text : string) -> [string]
|
||||
|
||||
Override original method that only split by 'wordsep_re'.
|
||||
This '_split' split wide-characters into chunk by one character.
|
||||
"""
|
||||
split = lambda t: textwrap.TextWrapper._split(self, t)
|
||||
chunks = []
|
||||
for chunk in split(text):
|
||||
for w, g in groupby(chunk, column_width):
|
||||
if w == 1:
|
||||
chunks.extend(split(''.join(g)))
|
||||
else:
|
||||
chunks.extend(list(g))
|
||||
return chunks
|
||||
|
||||
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
|
||||
"""_handle_long_word(chunks : [string],
|
||||
cur_line : [string],
|
||||
cur_len : int, width : int)
|
||||
|
||||
Override original method for using self._break_word() instead of slice.
|
||||
"""
|
||||
space_left = max(width - cur_len, 1)
|
||||
if self.break_long_words:
|
||||
l, r = self._break_word(reversed_chunks[-1], space_left)
|
||||
cur_line.append(l)
|
||||
reversed_chunks[-1] = r
|
||||
|
||||
elif not cur_line:
|
||||
cur_line.append(reversed_chunks.pop())
|
||||
|
||||
|
||||
MAXWIDTH = 70
|
||||
STDINDENT = 3
|
||||
|
@ -12,6 +12,7 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from docutils.utils import column_width
|
||||
from sphinx.writers.text import MAXWIDTH
|
||||
|
||||
from util import *
|
||||
|
||||
@ -63,3 +64,23 @@ def test_multibyte_table(app):
|
||||
lines = [line.strip() for line in result.splitlines() if line.strip()]
|
||||
line_widths = [column_width(line) for line in lines]
|
||||
assert len(set(line_widths)) == 1 # same widths
|
||||
|
||||
|
||||
@with_text_app()
|
||||
def test_multibyte_maxwidth(app):
|
||||
sb_text = u'abc' #length=3
|
||||
mb_text = u'\u65e5\u672c\u8a9e' #length=3
|
||||
|
||||
sb_line = ' '.join([sb_text] * int(MAXWIDTH / 3))
|
||||
mb_line = ' '.join([mb_text] * int(MAXWIDTH / 3))
|
||||
mix_line = ' '.join([sb_text, mb_text] * int(MAXWIDTH / 6))
|
||||
|
||||
contents = u'\n\n'.join((sb_line, mb_line, mix_line))
|
||||
|
||||
(app.srcdir / 'contents.rst').write_text(contents, encoding='utf-8')
|
||||
app.builder.build_all()
|
||||
result = (app.outdir / 'contents.txt').text(encoding='utf-8')
|
||||
|
||||
lines = [line.strip() for line in result.splitlines() if line.strip()]
|
||||
line_widths = [column_width(line) for line in lines]
|
||||
assert max(line_widths) < MAXWIDTH
|
||||
|
Loading…
Reference in New Issue
Block a user