mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Backport docutils.utils.smartquotes fixes as of r8097
This incorporates fixes from 0.14rc1 which were not in the PR #3666 merged in 1.6.1, and extra language configurations and fixes for quotes. Only active if Docutils is at 0.13.1 or earlier.
This commit is contained in:
parent
075a8cfb8f
commit
0b0bbfcb5b
@ -3,8 +3,11 @@
|
||||
sphinx.util.smartypants
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This code is copied from docutils’ docutils/utils/smartquotes.py
|
||||
version 1.7.1 (from 2017-03-19). It should be removed in the future.
|
||||
This is extracted (with minor adaptations for flake8 compliance) from
|
||||
docutils’ docutils/utils/smartquotes.py as of revision 8097 (30 May 2017),
|
||||
in order to backport for Sphinx usage with Docutils < 0.14 extra language
|
||||
configurations and fixes. Replaces earlier smartypants version as used up
|
||||
to Sphinx 1.5.6.
|
||||
|
||||
:copyright: © 2010 Günter Milde,
|
||||
original `SmartyPants`_: © 2003 John Gruber
|
||||
@ -20,6 +23,7 @@
|
||||
.. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
|
||||
|
||||
See the LICENSE file and the original docutils code for details.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
@ -31,6 +35,96 @@ if False: # For type annotation
|
||||
from typing import Iterable, Iterator, Tuple # NOQA
|
||||
|
||||
|
||||
langquotes = {'af': u'“”‘’',
|
||||
'af-x-altquot': u'„”‚’',
|
||||
'bg': u'„“‚‘', # Bulgarian, https://bg.wikipedia.org/wiki/Кавички
|
||||
'ca': u'«»“”',
|
||||
'ca-x-altquot': u'“”‘’',
|
||||
'cs': u'„“‚‘',
|
||||
'cs-x-altquot': u'»«›‹',
|
||||
'da': u'»«›‹',
|
||||
'da-x-altquot': u'„“‚‘',
|
||||
# 'da-x-altquot2': u'””’’',
|
||||
'de': u'„“‚‘',
|
||||
'de-x-altquot': u'»«›‹',
|
||||
'de-ch': u'«»‹›',
|
||||
'el': u'«»“”',
|
||||
'en': u'“”‘’',
|
||||
'en-uk-x-altquot': u'‘’“”', # Attention: " → ‘ and ' → “ !
|
||||
'eo': u'“”‘’',
|
||||
'es': u'«»“”',
|
||||
'es-x-altquot': u'“”‘’',
|
||||
'et': u'„“‚‘', # no secondary quote listed in
|
||||
'et-x-altquot': u'«»‹›', # the sources above (wikipedia.org)
|
||||
'eu': u'«»‹›',
|
||||
'fi': u'””’’',
|
||||
'fi-x-altquot': u'»»››',
|
||||
'fr': (u'« ', u' »', u'“', u'”'), # full no-break space
|
||||
'fr-x-altquot': (u'« ', u' »', u'“', u'”'), # narrow no-break space
|
||||
'fr-ch': u'«»‹›',
|
||||
'fr-ch-x-altquot': (u'« ', u' »', u'‹ ', u' ›'), # narrow no-break space
|
||||
# http://typoguide.ch/
|
||||
'gl': u'«»“”',
|
||||
'he': u'”“»«', # Hebrew is RTL, test position:
|
||||
'he-x-altquot': u'„”‚’', # low quotation marks are opening.
|
||||
# 'he-x-altquot': u'“„‘‚', # RTL: low quotation marks opening
|
||||
'hr': u'„”‘’', # http://hrvatska-tipografija.com/polunavodnici/
|
||||
'hr-x-altquot': u'»«›‹',
|
||||
'hsb': u'„“‚‘',
|
||||
'hsb-x-altquot': u'»«›‹',
|
||||
'hu': u'„”«»',
|
||||
'is': u'„“‚‘',
|
||||
'it': u'«»“”',
|
||||
'it-ch': u'«»‹›',
|
||||
'it-x-altquot': u'“”‘’',
|
||||
# 'it-x-altquot2': u'“„‘‚', # [7] in headlines
|
||||
'ja': u'「」『』',
|
||||
'lt': u'„“‚‘',
|
||||
'lv': u'„“‚‘',
|
||||
'mk': u'„“‚‘', # Macedonian,
|
||||
# https://mk.wikipedia.org/wiki/Правопис_и_правоговор_на_македонскиот_јазик
|
||||
'nl': u'“”‘’',
|
||||
'nl-x-altquot': u'„”‚’',
|
||||
# 'nl-x-altquot2': u'””’’',
|
||||
'nb': u'«»’’', # Norsk bokmål (canonical form 'no')
|
||||
'nn': u'«»’’', # Nynorsk [10]
|
||||
'nn-x-altquot': u'«»‘’', # [8], [10]
|
||||
# 'nn-x-altquot2': u'«»«»', # [9], [10]
|
||||
# 'nn-x-altquot3': u'„“‚‘', # [10]
|
||||
'no': u'«»’’', # Norsk bokmål [10]
|
||||
'no-x-altquot': u'«»‘’', # [8], [10]
|
||||
# 'no-x-altquot2': u'«»«»', # [9], [10]
|
||||
# 'no-x-altquot3': u'„“‚‘', # [10]
|
||||
'pl': u'„”«»',
|
||||
'pl-x-altquot': u'«»‚’',
|
||||
# 'pl-x-altquot2': u'„”‚’',
|
||||
# https://pl.wikipedia.org/wiki/Cudzys%C5%82%C3%B3w
|
||||
'pt': u'«»“”',
|
||||
'pt-br': u'“”‘’',
|
||||
'ro': u'„”«»',
|
||||
'ru': u'«»„“',
|
||||
'sh': u'„”‚’', # Serbo-Croatian
|
||||
'sh-x-altquot': u'»«›‹',
|
||||
'sk': u'„“‚‘', # Slovak
|
||||
'sk-x-altquot': u'»«›‹',
|
||||
'sl': u'„“‚‘', # Slovenian
|
||||
'sl-x-altquot': u'»«›‹',
|
||||
'sq': u'«»‹›', # Albanian
|
||||
'sq-x-altquot': u'“„‘‚',
|
||||
'sr': u'„”’’',
|
||||
'sr-x-altquot': u'»«›‹',
|
||||
'sv': u'””’’',
|
||||
'sv-x-altquot': u'»»››',
|
||||
'tr': u'“”‘’',
|
||||
'tr-x-altquot': u'«»‹›',
|
||||
# 'tr-x-altquot2': u'“„‘‚', # [7] antiquated?
|
||||
'uk': u'«»„“',
|
||||
'uk-x-altquot': u'„“‚‘',
|
||||
'zh-cn': u'“”‘’',
|
||||
'zh-tw': u'「」『』',
|
||||
}
|
||||
|
||||
|
||||
def educateQuotes(text, language='en'):
|
||||
# type: (unicode, unicode) -> unicode
|
||||
"""
|
||||
@ -43,7 +137,10 @@ def educateQuotes(text, language='en'):
|
||||
"""
|
||||
|
||||
smart = smartquotes.smartchars(language)
|
||||
smart.apostrophe = u'’'
|
||||
try:
|
||||
apostrophe = smart.apostrophe
|
||||
except:
|
||||
apostrophe = u'’'
|
||||
|
||||
# oldtext = text
|
||||
punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
|
||||
@ -60,7 +157,8 @@ def educateQuotes(text, language='en'):
|
||||
text = re.sub(r"""'"(?=\w)""", smart.osquote + smart.opquote, text)
|
||||
|
||||
# Special case for decade abbreviations (the '80s):
|
||||
text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text)
|
||||
if language.startswith('en'): # TODO similar cases in other languages?
|
||||
text = re.sub(r"""'(?=\d{2}s)""", apostrophe, text, re.UNICODE)
|
||||
|
||||
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
|
||||
dec_dashes = r"""–|—"""
|
||||
@ -81,9 +179,11 @@ def educateQuotes(text, language='en'):
|
||||
text = opening_single_quotes_regex.sub(r'\1' + smart.osquote, text)
|
||||
|
||||
# In many locales, single closing quotes are different from apostrophe:
|
||||
if smart.csquote != smart.apostrophe:
|
||||
if smart.csquote != apostrophe:
|
||||
apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
|
||||
text = apostrophe_regex.sub(smart.apostrophe, text)
|
||||
text = apostrophe_regex.sub(apostrophe, text)
|
||||
# TODO: keep track of quoting level to recognize apostrophe in, e.g.,
|
||||
# "Ich fass' es nicht."
|
||||
|
||||
closing_single_quotes_regex = re.compile(r"""
|
||||
(%s)
|
||||
@ -140,7 +240,7 @@ def educateQuotes(text, language='en'):
|
||||
return text
|
||||
|
||||
|
||||
def educate_tokens(text_tokens, attr='1', language='en'):
|
||||
def educate_tokens(text_tokens, attr=smartquotes.default_smartypants_attr, language='en'):
|
||||
# type: (Iterable[Tuple[str, unicode]], unicode, unicode) -> Iterator
|
||||
"""Return iterator that "educates" the items of `text_tokens`.
|
||||
"""
|
||||
@ -167,9 +267,7 @@ def educate_tokens(text_tokens, attr='1', language='en'):
|
||||
do_ellipses = False
|
||||
do_stupefy = False
|
||||
|
||||
if attr == "0": # Do nothing.
|
||||
pass
|
||||
elif attr == "1": # Do everything, turn all options on.
|
||||
if attr == "1": # Do everything, turn all options on.
|
||||
do_quotes = True
|
||||
do_backticks = 1
|
||||
do_dashes = 1
|
||||
@ -266,15 +364,14 @@ def educate_tokens(text_tokens, attr='1', language='en'):
|
||||
|
||||
|
||||
if docutils_version < (0, 13, 2):
|
||||
# Monkey patch the old docutils versions to fix the issue mentioned
|
||||
# Monkey patch the old docutils versions to fix the issues mentioned
|
||||
# at https://sourceforge.net/p/docutils/bugs/313/
|
||||
# at https://sourceforge.net/p/docutils/bugs/317/
|
||||
# and more
|
||||
smartquotes.educateQuotes = educateQuotes
|
||||
|
||||
# And the one mentioned at https://sourceforge.net/p/docutils/bugs/317/
|
||||
smartquotes.educate_tokens = educate_tokens
|
||||
|
||||
# Fix the issue with French quotes mentioned at
|
||||
# https://sourceforge.net/p/docutils/mailman/message/35760696/
|
||||
quotes = smartquotes.smartchars.quotes
|
||||
quotes['fr'] = (u'«\u00a0', u'\u00a0»', u'“', u'”')
|
||||
quotes['fr-ch'] = u'«»‹›'
|
||||
# Add/fix other languages as well
|
||||
smartquotes.smartchars.quotes = langquotes
|
||||
|
Loading…
Reference in New Issue
Block a user