Merge pull request #5284 from jakobandersen/cpp_char_lit_5147

C++, add support for (most) character literals.
This commit is contained in:
Jakob Lykke Andersen 2018-08-10 22:59:22 +02:00 committed by GitHub
commit ef5448e8bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 78 additions and 3 deletions

View File

@ -171,6 +171,7 @@ Features added
* C++, add support for unions.
* C++, add support for anonymous entities using names staring with ``@``.
Fixes #3593 and #2683.
* #5147: C++, add support for (most) character literals.
* #3606: MathJax should be loaded with async attribute
* html: Output ``canonical_url`` metadata if :confval:`html_baseurl` set (refs:
#4193)

View File

@ -289,13 +289,26 @@ logger = logging.getLogger(__name__)
nested-name
"""
# TODO: support hex, oct, etc. work
_integer_literal_re = re.compile(r'[1-9][0-9]*')
_octal_literal_re = re.compile(r'0[0-7]*')
_hex_literal_re = re.compile(r'0[xX][0-7a-fA-F][0-7a-fA-F]*')
_binary_literal_re = re.compile(r'0[bB][01][01]*')
_integer_suffix_re = re.compile(r'')
_float_literal_re = re.compile(r'[+-]?[0-9]*\.[0-9]+')
_char_literal_re = re.compile(r'''(?x)
((?:u8)|u|U|L)?
'(
(?:[^\\'])
| (\\(
(?:['"?\\abfnrtv])
| (?:[0-7]{1,3})
| (?:x[0-9a-fA-F]{2})
| (?:u[0-9a-fA-F]{4})
| (?:U[0-9a-fA-F]{8})
))
)'
''')
_anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
_identifier_re = re.compile(r'''(?x)
( # This 'extends' _anon_identifier_re with the ordinary identifiers,
@ -522,6 +535,10 @@ _id_operator_unary_v2 = {
'!': 'nt',
'~': 'co'
}
_id_char_from_prefix = {
None: 'c', 'u8': 'c',
'u': 'Ds', 'U': 'Di', 'L': 'w'
} # type: Dict[unicode, unicode]
# these are ordered by preceedence
_expression_bin_ops = [
['||'],
@ -780,6 +797,38 @@ class ASTNumberLiteral(ASTBase):
signode.append(nodes.Text(txt, txt))
class UnsupportedMultiCharacterCharLiteral(UnicodeMixin, Exception):
def __init__(self, decoded):
self.decoded = decoded
class ASTCharLiteral(ASTBase):
def __init__(self, prefix, data):
# type: (unicode, unicode) -> None
self.prefix = prefix # may be None when no prefix
self.data = data
assert prefix in _id_char_from_prefix
self.type = _id_char_from_prefix[prefix]
decoded = data.encode().decode('unicode-escape')
if len(decoded) == 1:
self.value = ord(decoded)
else:
raise UnsupportedMultiCharacterCharLiteral(decoded)
def _stringify(self, transform):
if self.prefix is None:
return "'" + self.data + "'"
else:
return self.prefix + "'" + self.data + "'"
def get_id(self, version):
return self.type + str(self.value)
def describe_signature(self, signode, mode, env, symbol):
txt = text_type(self)
signode.append(nodes.Text(txt, txt))
class ASTStringLiteral(ASTBase):
def __init__(self, data):
# type: (unicode) -> None
@ -4177,7 +4226,19 @@ class DefinitionParser(object):
string = self._parse_string()
if string is not None:
return ASTStringLiteral(string)
# TODO: char lit
# character-literal
if self.match(_char_literal_re):
prefix = self.last_match.group(1) # may be None when no prefix
data = self.last_match.group(2)
try:
return ASTCharLiteral(prefix, data)
except UnicodeDecodeError as e:
self.fail("Can not handle character literal. Internal error was: %s" % e)
except UnsupportedMultiCharacterCharLiteral as e:
self.fail("Can not handle character literal"
" resulting in multiple decoded characters.")
# TODO: user-defined lit
return None

View File

@ -127,7 +127,20 @@ def test_expressions():
exprCheck(expr, 'L' + expr + 'E')
exprCheck('"abc\\"cba"', 'LA8_KcE') # string
exprCheck('this', 'fpT')
# TODO: test the rest
# character literals
for p, t in [('', 'c'), ('u8', 'c'), ('u', 'Ds'), ('U', 'Di'), ('L', 'w')]:
exprCheck(p + "'a'", t + "97")
exprCheck(p + "'\\n'", t + "10")
exprCheck(p + "'\\012'", t + "10")
exprCheck(p + "'\\0'", t + "0")
exprCheck(p + "'\\x0a'", t + "10")
exprCheck(p + "'\\x0A'", t + "10")
exprCheck(p + "'\\u0a42'", t + "2626")
exprCheck(p + "'\\u0A42'", t + "2626")
exprCheck(p + "'\\U0001f34c'", t + "127820")
exprCheck(p + "'\\U0001F34C'", t + "127820")
# TODO: user-defined lit
exprCheck('(... + Ns)', '(... + Ns)')
exprCheck('(5)', 'L5E')
exprCheck('C', '1C')