Merge pull request #3832 from jfbu/smarty

Backport docutils.utils.smartquotes fixes as of r8097
2025-02-25 18:55:22 -06:00 · 2017-06-04 18:40:59 +02:00 · 2017-06-04 18:40:59 +02:00 · c6699eb6b8
commit c6699eb6b8
parent 17c366659a 0b0bbfcb5b
1 changed files with 113 additions and 16 deletions
--- a/sphinx/util/smartypants.py
+++ b/sphinx/util/smartypants.py
@ -3,8 +3,11 @@
    sphinx.util.smartypants
    ~~~~~~~~~~~~~~~~~~~~~~~
-    This code is copied from docutils’ docutils/utils/smartquotes.py
+    This is extracted (with minor adaptations for flake8 compliance) from
-    version 1.7.1 (from 2017-03-19). It should be removed in the future.
+    docutils’ docutils/utils/smartquotes.py as of revision 8097 (30 May 2017),
    in order to backport for Sphinx usage with Docutils < 0.14 extra language
    configurations and fixes. Replaces earlier smartypants version as used up
    to Sphinx 1.5.6.
    :copyright: © 2010 Günter Milde,
                original `SmartyPants`_: © 2003 John Gruber
@ -20,6 +23,7 @@
    .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
    See the LICENSE file and the original docutils code for details.
 """
 from __future__ import absolute_import, unicode_literals
@ -31,6 +35,96 @@ if False:  # For type annotation
    from typing import Iterable, Iterator, Tuple  # NOQA
 langquotes = {'af':           u'“”‘’',
              'af-x-altquot': u'„”‚’',
              'bg':           u'„“‚‘',  # Bulgarian, https://bg.wikipedia.org/wiki/Кавички
              'ca':           u'«»“”',
              'ca-x-altquot': u'“”‘’',
              'cs':           u'„“‚‘',
              'cs-x-altquot': u'»«›‹',
              'da':           u'»«›‹',
              'da-x-altquot': u'„“‚‘',
              # 'da-x-altquot2': u'””’’',
              'de':           u'„“‚‘',
              'de-x-altquot': u'»«›‹',
              'de-ch':        u'«»‹›',
              'el':           u'«»“”',
              'en':           u'“”‘’',
              'en-uk-x-altquot': u'‘’“”',  # Attention: " → ‘ and ' → “ !
              'eo':           u'“”‘’',
              'es':           u'«»“”',
              'es-x-altquot': u'“”‘’',
              'et':           u'„“‚‘',  # no secondary quote listed in
              'et-x-altquot': u'«»‹›',  # the sources above (wikipedia.org)
              'eu':           u'«»‹›',
              'fi':           u'””’’',
              'fi-x-altquot': u'»»››',
              'fr':           (u'« ', u' »', u'“', u'”'),  # full no-break space
              'fr-x-altquot': (u'« ', u' »', u'“', u'”'),  # narrow no-break space
              'fr-ch':        u'«»‹›',
              'fr-ch-x-altquot': (u'« ',  u' »', u'‹ ', u' ›'),  # narrow no-break space
              # http://typoguide.ch/
              'gl':           u'«»“”',
              'he':           u'”“»«',  # Hebrew is RTL, test position:
              'he-x-altquot': u'„”‚’',  # low quotation marks are opening.
              # 'he-x-altquot': u'“„‘‚',  # RTL: low quotation marks opening
              'hr':           u'„”‘’',  # http://hrvatska-tipografija.com/polunavodnici/
              'hr-x-altquot': u'»«›‹',
              'hsb':          u'„“‚‘',
              'hsb-x-altquot': u'»«›‹',
              'hu':           u'„”«»',
              'is':           u'„“‚‘',
              'it':           u'«»“”',
              'it-ch':        u'«»‹›',
              'it-x-altquot': u'“”‘’',
              # 'it-x-altquot2': u'“„‘‚',  # [7] in headlines
              'ja':           u'「」『』',
              'lt':           u'„“‚‘',
              'lv':           u'„“‚‘',
              'mk':           u'„“‚‘',  # Macedonian,
              # https://mk.wikipedia.org/wiki/Правопис_и_правоговор_на_македонскиот_јазик
              'nl':           u'“”‘’',
              'nl-x-altquot': u'„”‚’',
              # 'nl-x-altquot2': u'””’’',
              'nb':           u'«»’’',  # Norsk bokmål (canonical form 'no')
              'nn':           u'«»’’',  # Nynorsk [10]
              'nn-x-altquot': u'«»‘’',  # [8], [10]
              # 'nn-x-altquot2': u'«»«»',  # [9], [10]
              # 'nn-x-altquot3': u'„“‚‘',  # [10]
              'no':           u'«»’’',  # Norsk bokmål [10]
              'no-x-altquot': u'«»‘’',  # [8], [10]
              # 'no-x-altquot2': u'«»«»',  # [9], [10]
              # 'no-x-altquot3': u'„“‚‘',  # [10]
              'pl':           u'„”«»',
              'pl-x-altquot': u'«»‚’',
              # 'pl-x-altquot2': u'„”‚’',
              # https://pl.wikipedia.org/wiki/Cudzys%C5%82%C3%B3w
              'pt':           u'«»“”',
              'pt-br':        u'“”‘’',
              'ro':           u'„”«»',
              'ru':           u'«»„“',
              'sh':           u'„”‚’',  # Serbo-Croatian
              'sh-x-altquot': u'»«›‹',
              'sk':           u'„“‚‘',  # Slovak
              'sk-x-altquot': u'»«›‹',
              'sl':           u'„“‚‘',  # Slovenian
              'sl-x-altquot': u'»«›‹',
              'sq':           u'«»‹›',  # Albanian
              'sq-x-altquot': u'“„‘‚',
              'sr':           u'„”’’',
              'sr-x-altquot': u'»«›‹',
              'sv':           u'””’’',
              'sv-x-altquot': u'»»››',
              'tr':           u'“”‘’',
              'tr-x-altquot': u'«»‹›',
              # 'tr-x-altquot2': u'“„‘‚',  # [7] antiquated?
              'uk':           u'«»„“',
              'uk-x-altquot': u'„“‚‘',
              'zh-cn':        u'“”‘’',
              'zh-tw':        u'「」『』',
              }
 def educateQuotes(text, language='en'):
    # type: (unicode, unicode) -> unicode
    """
@ -43,7 +137,10 @@ def educateQuotes(text, language='en'):
    """
    smart = smartquotes.smartchars(language)
-    smart.apostrophe = u'’'
+    try:
        apostrophe = smart.apostrophe
    except:
        apostrophe = u'’'
    # oldtext = text
    punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
@ -60,7 +157,8 @@ def educateQuotes(text, language='en'):
    text = re.sub(r"""'"(?=\w)""", smart.osquote + smart.opquote, text)
    # Special case for decade abbreviations (the '80s):
-    text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text)
+    if language.startswith('en'):  # TODO similar cases in other languages?
        text = re.sub(r"""'(?=\d{2}s)""", apostrophe, text, re.UNICODE)
    close_class = r"""[^\ \t\r\n\[\{\(\-]"""
    dec_dashes = r"""&#8211;|&#8212;"""
@ -81,9 +179,11 @@ def educateQuotes(text, language='en'):
    text = opening_single_quotes_regex.sub(r'\1' + smart.osquote, text)
    # In many locales, single closing quotes are different from apostrophe:
-    if smart.csquote != smart.apostrophe:
+    if smart.csquote != apostrophe:
        apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
-        text = apostrophe_regex.sub(smart.apostrophe, text)
+        text = apostrophe_regex.sub(apostrophe, text)
    # TODO: keep track of quoting level to recognize apostrophe in, e.g.,
    # "Ich fass' es nicht."
    closing_single_quotes_regex = re.compile(r"""
                    (%s)
@ -140,7 +240,7 @@ def educateQuotes(text, language='en'):
    return text
-def educate_tokens(text_tokens, attr='1', language='en'):
+def educate_tokens(text_tokens, attr=smartquotes.default_smartypants_attr, language='en'):
    # type: (Iterable[Tuple[str, unicode]], unicode, unicode) -> Iterator
    """Return iterator that "educates" the items of `text_tokens`.
@ -178,9 +278,7 @@ def educate_tokens(text_tokens, attr='1', language='en'):
    do_ellipses = False
    do_stupefy = False
-    if attr == "0":  # Do nothing.
+    if attr == "1":  # Do everything, turn all options on.
        pass
    elif attr == "1":  # Do everything, turn all options on.
        do_quotes = True
        # do_backticks = 1
        do_dashes = 1
@ -277,15 +375,14 @@ def educate_tokens(text_tokens, attr='1', language='en'):
 if docutils_version < (0, 13, 2):
-    # Monkey patch the old docutils versions to fix the issue mentioned
+    # Monkey patch the old docutils versions to fix the issues mentioned
    # at https://sourceforge.net/p/docutils/bugs/313/
    # at https://sourceforge.net/p/docutils/bugs/317/
    # and more
    smartquotes.educateQuotes = educateQuotes
    # And the one mentioned at https://sourceforge.net/p/docutils/bugs/317/
    smartquotes.educate_tokens = educate_tokens
    # Fix the issue with French quotes mentioned at
    # https://sourceforge.net/p/docutils/mailman/message/35760696/
-    quotes = smartquotes.smartchars.quotes
+    # Add/fix other languages as well
-    quotes['fr'] = (u'«\u00a0', u'\u00a0»', u'“', u'”')
+    smartquotes.smartchars.quotes = langquotes
    quotes['fr-ch'] = u'«»‹›'