mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #3832 from jfbu/smarty
Backport docutils.utils.smartquotes fixes as of r8097
This commit is contained in:
commit
c6699eb6b8
@ -3,8 +3,11 @@
|
|||||||
sphinx.util.smartypants
|
sphinx.util.smartypants
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
This code is copied from docutils’ docutils/utils/smartquotes.py
|
This is extracted (with minor adaptations for flake8 compliance) from
|
||||||
version 1.7.1 (from 2017-03-19). It should be removed in the future.
|
docutils’ docutils/utils/smartquotes.py as of revision 8097 (30 May 2017),
|
||||||
|
in order to backport for Sphinx usage with Docutils < 0.14 extra language
|
||||||
|
configurations and fixes. Replaces earlier smartypants version as used up
|
||||||
|
to Sphinx 1.5.6.
|
||||||
|
|
||||||
:copyright: © 2010 Günter Milde,
|
:copyright: © 2010 Günter Milde,
|
||||||
original `SmartyPants`_: © 2003 John Gruber
|
original `SmartyPants`_: © 2003 John Gruber
|
||||||
@ -20,6 +23,7 @@
|
|||||||
.. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
|
.. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
|
||||||
|
|
||||||
See the LICENSE file and the original docutils code for details.
|
See the LICENSE file and the original docutils code for details.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
@ -31,6 +35,96 @@ if False: # For type annotation
|
|||||||
from typing import Iterable, Iterator, Tuple # NOQA
|
from typing import Iterable, Iterator, Tuple # NOQA
|
||||||
|
|
||||||
|
|
||||||
|
langquotes = {'af': u'“”‘’',
|
||||||
|
'af-x-altquot': u'„”‚’',
|
||||||
|
'bg': u'„“‚‘', # Bulgarian, https://bg.wikipedia.org/wiki/Кавички
|
||||||
|
'ca': u'«»“”',
|
||||||
|
'ca-x-altquot': u'“”‘’',
|
||||||
|
'cs': u'„“‚‘',
|
||||||
|
'cs-x-altquot': u'»«›‹',
|
||||||
|
'da': u'»«›‹',
|
||||||
|
'da-x-altquot': u'„“‚‘',
|
||||||
|
# 'da-x-altquot2': u'””’’',
|
||||||
|
'de': u'„“‚‘',
|
||||||
|
'de-x-altquot': u'»«›‹',
|
||||||
|
'de-ch': u'«»‹›',
|
||||||
|
'el': u'«»“”',
|
||||||
|
'en': u'“”‘’',
|
||||||
|
'en-uk-x-altquot': u'‘’“”', # Attention: " → ‘ and ' → “ !
|
||||||
|
'eo': u'“”‘’',
|
||||||
|
'es': u'«»“”',
|
||||||
|
'es-x-altquot': u'“”‘’',
|
||||||
|
'et': u'„“‚‘', # no secondary quote listed in
|
||||||
|
'et-x-altquot': u'«»‹›', # the sources above (wikipedia.org)
|
||||||
|
'eu': u'«»‹›',
|
||||||
|
'fi': u'””’’',
|
||||||
|
'fi-x-altquot': u'»»››',
|
||||||
|
'fr': (u'« ', u' »', u'“', u'”'), # full no-break space
|
||||||
|
'fr-x-altquot': (u'« ', u' »', u'“', u'”'), # narrow no-break space
|
||||||
|
'fr-ch': u'«»‹›',
|
||||||
|
'fr-ch-x-altquot': (u'« ', u' »', u'‹ ', u' ›'), # narrow no-break space
|
||||||
|
# http://typoguide.ch/
|
||||||
|
'gl': u'«»“”',
|
||||||
|
'he': u'”“»«', # Hebrew is RTL, test position:
|
||||||
|
'he-x-altquot': u'„”‚’', # low quotation marks are opening.
|
||||||
|
# 'he-x-altquot': u'“„‘‚', # RTL: low quotation marks opening
|
||||||
|
'hr': u'„”‘’', # http://hrvatska-tipografija.com/polunavodnici/
|
||||||
|
'hr-x-altquot': u'»«›‹',
|
||||||
|
'hsb': u'„“‚‘',
|
||||||
|
'hsb-x-altquot': u'»«›‹',
|
||||||
|
'hu': u'„”«»',
|
||||||
|
'is': u'„“‚‘',
|
||||||
|
'it': u'«»“”',
|
||||||
|
'it-ch': u'«»‹›',
|
||||||
|
'it-x-altquot': u'“”‘’',
|
||||||
|
# 'it-x-altquot2': u'“„‘‚', # [7] in headlines
|
||||||
|
'ja': u'「」『』',
|
||||||
|
'lt': u'„“‚‘',
|
||||||
|
'lv': u'„“‚‘',
|
||||||
|
'mk': u'„“‚‘', # Macedonian,
|
||||||
|
# https://mk.wikipedia.org/wiki/Правопис_и_правоговор_на_македонскиот_јазик
|
||||||
|
'nl': u'“”‘’',
|
||||||
|
'nl-x-altquot': u'„”‚’',
|
||||||
|
# 'nl-x-altquot2': u'””’’',
|
||||||
|
'nb': u'«»’’', # Norsk bokmål (canonical form 'no')
|
||||||
|
'nn': u'«»’’', # Nynorsk [10]
|
||||||
|
'nn-x-altquot': u'«»‘’', # [8], [10]
|
||||||
|
# 'nn-x-altquot2': u'«»«»', # [9], [10]
|
||||||
|
# 'nn-x-altquot3': u'„“‚‘', # [10]
|
||||||
|
'no': u'«»’’', # Norsk bokmål [10]
|
||||||
|
'no-x-altquot': u'«»‘’', # [8], [10]
|
||||||
|
# 'no-x-altquot2': u'«»«»', # [9], [10]
|
||||||
|
# 'no-x-altquot3': u'„“‚‘', # [10]
|
||||||
|
'pl': u'„”«»',
|
||||||
|
'pl-x-altquot': u'«»‚’',
|
||||||
|
# 'pl-x-altquot2': u'„”‚’',
|
||||||
|
# https://pl.wikipedia.org/wiki/Cudzys%C5%82%C3%B3w
|
||||||
|
'pt': u'«»“”',
|
||||||
|
'pt-br': u'“”‘’',
|
||||||
|
'ro': u'„”«»',
|
||||||
|
'ru': u'«»„“',
|
||||||
|
'sh': u'„”‚’', # Serbo-Croatian
|
||||||
|
'sh-x-altquot': u'»«›‹',
|
||||||
|
'sk': u'„“‚‘', # Slovak
|
||||||
|
'sk-x-altquot': u'»«›‹',
|
||||||
|
'sl': u'„“‚‘', # Slovenian
|
||||||
|
'sl-x-altquot': u'»«›‹',
|
||||||
|
'sq': u'«»‹›', # Albanian
|
||||||
|
'sq-x-altquot': u'“„‘‚',
|
||||||
|
'sr': u'„”’’',
|
||||||
|
'sr-x-altquot': u'»«›‹',
|
||||||
|
'sv': u'””’’',
|
||||||
|
'sv-x-altquot': u'»»››',
|
||||||
|
'tr': u'“”‘’',
|
||||||
|
'tr-x-altquot': u'«»‹›',
|
||||||
|
# 'tr-x-altquot2': u'“„‘‚', # [7] antiquated?
|
||||||
|
'uk': u'«»„“',
|
||||||
|
'uk-x-altquot': u'„“‚‘',
|
||||||
|
'zh-cn': u'“”‘’',
|
||||||
|
'zh-tw': u'「」『』',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def educateQuotes(text, language='en'):
|
def educateQuotes(text, language='en'):
|
||||||
# type: (unicode, unicode) -> unicode
|
# type: (unicode, unicode) -> unicode
|
||||||
"""
|
"""
|
||||||
@ -43,7 +137,10 @@ def educateQuotes(text, language='en'):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
smart = smartquotes.smartchars(language)
|
smart = smartquotes.smartchars(language)
|
||||||
smart.apostrophe = u'’'
|
try:
|
||||||
|
apostrophe = smart.apostrophe
|
||||||
|
except:
|
||||||
|
apostrophe = u'’'
|
||||||
|
|
||||||
# oldtext = text
|
# oldtext = text
|
||||||
punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
|
punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
|
||||||
@ -60,7 +157,8 @@ def educateQuotes(text, language='en'):
|
|||||||
text = re.sub(r"""'"(?=\w)""", smart.osquote + smart.opquote, text)
|
text = re.sub(r"""'"(?=\w)""", smart.osquote + smart.opquote, text)
|
||||||
|
|
||||||
# Special case for decade abbreviations (the '80s):
|
# Special case for decade abbreviations (the '80s):
|
||||||
text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text)
|
if language.startswith('en'): # TODO similar cases in other languages?
|
||||||
|
text = re.sub(r"""'(?=\d{2}s)""", apostrophe, text, re.UNICODE)
|
||||||
|
|
||||||
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
|
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
|
||||||
dec_dashes = r"""–|—"""
|
dec_dashes = r"""–|—"""
|
||||||
@ -81,9 +179,11 @@ def educateQuotes(text, language='en'):
|
|||||||
text = opening_single_quotes_regex.sub(r'\1' + smart.osquote, text)
|
text = opening_single_quotes_regex.sub(r'\1' + smart.osquote, text)
|
||||||
|
|
||||||
# In many locales, single closing quotes are different from apostrophe:
|
# In many locales, single closing quotes are different from apostrophe:
|
||||||
if smart.csquote != smart.apostrophe:
|
if smart.csquote != apostrophe:
|
||||||
apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
|
apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
|
||||||
text = apostrophe_regex.sub(smart.apostrophe, text)
|
text = apostrophe_regex.sub(apostrophe, text)
|
||||||
|
# TODO: keep track of quoting level to recognize apostrophe in, e.g.,
|
||||||
|
# "Ich fass' es nicht."
|
||||||
|
|
||||||
closing_single_quotes_regex = re.compile(r"""
|
closing_single_quotes_regex = re.compile(r"""
|
||||||
(%s)
|
(%s)
|
||||||
@ -140,7 +240,7 @@ def educateQuotes(text, language='en'):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def educate_tokens(text_tokens, attr='1', language='en'):
|
def educate_tokens(text_tokens, attr=smartquotes.default_smartypants_attr, language='en'):
|
||||||
# type: (Iterable[Tuple[str, unicode]], unicode, unicode) -> Iterator
|
# type: (Iterable[Tuple[str, unicode]], unicode, unicode) -> Iterator
|
||||||
"""Return iterator that "educates" the items of `text_tokens`.
|
"""Return iterator that "educates" the items of `text_tokens`.
|
||||||
|
|
||||||
@ -178,9 +278,7 @@ def educate_tokens(text_tokens, attr='1', language='en'):
|
|||||||
do_ellipses = False
|
do_ellipses = False
|
||||||
do_stupefy = False
|
do_stupefy = False
|
||||||
|
|
||||||
if attr == "0": # Do nothing.
|
if attr == "1": # Do everything, turn all options on.
|
||||||
pass
|
|
||||||
elif attr == "1": # Do everything, turn all options on.
|
|
||||||
do_quotes = True
|
do_quotes = True
|
||||||
# do_backticks = 1
|
# do_backticks = 1
|
||||||
do_dashes = 1
|
do_dashes = 1
|
||||||
@ -277,15 +375,14 @@ def educate_tokens(text_tokens, attr='1', language='en'):
|
|||||||
|
|
||||||
|
|
||||||
if docutils_version < (0, 13, 2):
|
if docutils_version < (0, 13, 2):
|
||||||
# Monkey patch the old docutils versions to fix the issue mentioned
|
# Monkey patch the old docutils versions to fix the issues mentioned
|
||||||
# at https://sourceforge.net/p/docutils/bugs/313/
|
# at https://sourceforge.net/p/docutils/bugs/313/
|
||||||
|
# at https://sourceforge.net/p/docutils/bugs/317/
|
||||||
|
# and more
|
||||||
smartquotes.educateQuotes = educateQuotes
|
smartquotes.educateQuotes = educateQuotes
|
||||||
|
|
||||||
# And the one mentioned at https://sourceforge.net/p/docutils/bugs/317/
|
|
||||||
smartquotes.educate_tokens = educate_tokens
|
smartquotes.educate_tokens = educate_tokens
|
||||||
|
|
||||||
# Fix the issue with French quotes mentioned at
|
# Fix the issue with French quotes mentioned at
|
||||||
# https://sourceforge.net/p/docutils/mailman/message/35760696/
|
# https://sourceforge.net/p/docutils/mailman/message/35760696/
|
||||||
quotes = smartquotes.smartchars.quotes
|
# Add/fix other languages as well
|
||||||
quotes['fr'] = (u'«\u00a0', u'\u00a0»', u'“', u'”')
|
smartquotes.smartchars.quotes = langquotes
|
||||||
quotes['fr-ch'] = u'«»‹›'
|
|
||||||
|
Loading…
Reference in New Issue
Block a user