diff --git a/sphinx/util/jsdump.py b/sphinx/util/jsdump.py index 151d3e80a..008cb6153 100644 --- a/sphinx/util/jsdump.py +++ b/sphinx/util/jsdump.py @@ -1,15 +1,208 @@ -"""This module implements a simple JavaScript serializer.""" -import json +"""This module implements a simple JavaScript serializer. + +Uses the basestring encode function from simplejson by Bob Ippolito. +""" + +import re +from typing import IO, Any, Dict, List, Match, Union from sphinx.deprecation import RemovedInSphinx60Warning, deprecated_alias +_str_re = re.compile(r'"(\\\\|\\"|[^"])*"') +_int_re = re.compile(r'\d+') +_name_re = re.compile(r'[a-zA-Z_]\w*') +_nameonly_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*$') + +# escape \, ", control characters and everything outside ASCII +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +ESCAPE_DICT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} + +ESCAPED = re.compile(r'\\u.{4}|\\.') + + +def encode_string(s: str) -> str: + def replace(match: Match) -> str: + s = match.group(0) + try: + return ESCAPE_DICT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +def decode_string(s: str) -> str: + return ESCAPED.sub(lambda m: eval('"' + m.group() + '"'), s) + + +reswords = set("""\ +abstract else instanceof switch +boolean enum int synchronized +break export interface this +byte extends long throw +case false native throws +catch final new transient +char finally null true +class float package try +const for private typeof +continue function protected var +debugger goto public void +default if return volatile +delete implements short while +do import static with +double in super""".split()) + + +def dumps(obj: Any, key: bool = False) -> str: + if key: + if not isinstance(obj, str): + obj = str(obj) + if _nameonly_re.match(obj) and obj not in reswords: + return obj # return it as a bare word + else: + return encode_string(obj) + if obj is None: + return 'null' + elif obj is True or obj is False: + return 'true' if obj else 'false' + elif isinstance(obj, (int, float)): + return str(obj) + elif isinstance(obj, dict): + return '{%s}' % ','.join(sorted('%s:%s' % ( + dumps(key, True), + dumps(value) + ) for key, value in obj.items())) + elif isinstance(obj, set): + return '[%s]' % ','.join(sorted(dumps(x) for x in obj)) + elif isinstance(obj, (tuple, list)): + return '[%s]' % ','.join(dumps(x) for x in obj) + elif isinstance(obj, str): + return encode_string(obj) + raise TypeError(type(obj)) + + +def dump(obj: Any, f: IO) -> None: + f.write(dumps(obj)) + + +def loads(x: str) -> Any: + """Loader that can read the JS subset the indexer produces.""" + nothing = object() + i = 0 + n = len(x) + stack: List[Union[List, Dict]] = [] + obj: Any = nothing + key = False + keys = [] + while i < n: + c = x[i] + if c == '{': + obj = {} + stack.append(obj) + key = True + keys.append(nothing) + i += 1 + elif c == '[': + obj = [] + stack.append(obj) + key = False + keys.append(nothing) + i += 1 + elif c in '}]': + if key: + if keys[-1] is not nothing: + raise ValueError("unfinished dict") + # empty dict + key = False + oldobj = stack.pop() + keys.pop() + if stack: + obj = stack[-1] + if isinstance(obj, dict): + if keys[-1] is nothing: + raise ValueError("invalid key object", oldobj) + obj[keys[-1]] = oldobj + else: + obj.append(oldobj) + else: + break + i += 1 + elif c == ',': + if key: + raise ValueError("multiple keys") + if isinstance(obj, dict): + key = True + i += 1 + elif c == ':': + if not isinstance(obj, dict): + raise ValueError("colon in list") + i += 1 + if not key: + raise ValueError("multiple values") + key = False + else: + y: Any = None + m = _str_re.match(x, i) + if m: + y = decode_string(m.group()[1:-1]) + else: + m = _int_re.match(x, i) + if m: + y = int(m.group()) + else: + m = _name_re.match(x, i) + if m: + y = m.group() + if y == 'true': + y = True + elif y == 'false': + y = False + elif y == 'null': + y = None + elif not key: + raise ValueError("bareword as value") + else: + raise ValueError("read error at pos %d" % i) + i = m.end() + if isinstance(obj, dict): + if key: + keys[-1] = y + else: + obj[keys[-1]] = y + key = False + else: + obj.append(y) + if obj is nothing: + raise ValueError("nothing loaded from string") + return obj + + +def load(f: IO) -> Any: + return loads(f.read()) + + deprecated_alias( 'sphinx.util.jsdump', { - 'dumps': lambda o, _key: json.dumps(o), - 'dump': json.dump, - 'loads': json.loads, - 'load': json.load, + 'dumps': dumps, + 'dump': dump, + 'loads': loads, + 'load': load, }, RemovedInSphinx60Warning, { diff --git a/tests/test_util_jsdump.py b/tests/test_util_jsdump.py new file mode 100644 index 000000000..d93c6ecd0 --- /dev/null +++ b/tests/test_util_jsdump.py @@ -0,0 +1,19 @@ +from sphinx.util.jsdump import dumps, loads + + +def test_jsdump(): + data = {'1a': 1} + assert dumps(data) == '{"1a":1}' + assert data == loads(dumps(data)) + + data = {'a1': 1} + assert dumps(data) == '{a1:1}' + assert data == loads(dumps(data)) + + data = {'a\xe8': 1} + assert dumps(data) == '{"a\\u00e8":1}' + assert data == loads(dumps(data)) + + data = {'_foo': 1} + assert dumps(data) == '{_foo:1}' + assert data == loads(dumps(data))