From b536ffabc1ff7d8cf4c2ec60ac8b5f7b812e448a Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 27 Apr 2022 02:53:13 +0100 Subject: [PATCH] Deprecate `sphinx.util.jsdump` --- doc/extdev/deprecated.rst | 5 + sphinx/search/__init__.py | 24 +++-- sphinx/util/jsdump.py | 211 ++++---------------------------------- tests/test_search.py | 33 +++--- tests/test_util_jsdump.py | 19 ---- 5 files changed, 57 insertions(+), 235 deletions(-) delete mode 100644 tests/test_util_jsdump.py diff --git a/doc/extdev/deprecated.rst b/doc/extdev/deprecated.rst index 9fc1110fc..35d6eb304 100644 --- a/doc/extdev/deprecated.rst +++ b/doc/extdev/deprecated.rst @@ -22,6 +22,11 @@ The following is a list of deprecated interfaces. - (will be) Removed - Alternatives + * - ``sphinx.util.jsdump`` + - 5.0 + - 6.0 + - The standard library ``json`` module. + * - :doc:`Setuptools integration ` - 5.0 - 7.0 diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index c3e46ce22..5a5622e3a 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -1,7 +1,9 @@ """Create a full-text search index for offline search.""" import html +import json import pickle import re +import warnings from importlib import import_module from os import path from typing import IO, Any, Dict, Iterable, List, Optional, Set, Tuple, Type @@ -10,8 +12,8 @@ from docutils import nodes from docutils.nodes import Element, Node from sphinx import addnodes, package_dir +from sphinx.deprecation import RemovedInSphinx60Warning from sphinx.environment import BuildEnvironment -from sphinx.util import jsdump class SearchLanguage: @@ -154,14 +156,14 @@ class _JavaScriptIndex: SUFFIX = ')' def dumps(self, data: Any) -> str: - return self.PREFIX + jsdump.dumps(data) + self.SUFFIX + return self.PREFIX + json.dumps(data) + self.SUFFIX def loads(self, s: str) -> Any: data = s[len(self.PREFIX):-len(self.SUFFIX)] if not data or not s.startswith(self.PREFIX) or not \ s.endswith(self.SUFFIX): raise ValueError('invalid data') - return jsdump.loads(data) + return json.loads(data) def dump(self, data: Any, f: IO) -> None: f.write(self.dumps(data)) @@ -224,7 +226,7 @@ class IndexBuilder: passed to the `feed` method. """ formats = { - 'jsdump': jsdump, + 'json': json, 'pickle': pickle } @@ -265,7 +267,11 @@ class IndexBuilder: def load(self, stream: IO, format: Any) -> None: """Reconstruct from frozen data.""" - if isinstance(format, str): + if format == "jsdump": + warnings.warn("format=jsdump is deprecated, use json instead", + RemovedInSphinx60Warning, stacklevel=2) + format = self.formats["json"] + elif isinstance(format, str): format = self.formats[format] frozen = format.load(stream) # if an old index is present, we treat it as not existing. @@ -291,7 +297,11 @@ class IndexBuilder: def dump(self, stream: IO, format: Any) -> None: """Dump the frozen index to a stream.""" - if isinstance(format, str): + if format == "jsdump": + warnings.warn("format=jsdump is deprecated, use json instead", + RemovedInSphinx60Warning, stacklevel=2) + format = self.formats["json"] + elif isinstance(format, str): format = self.formats[format] format.dump(self.freeze(), stream) @@ -417,7 +427,7 @@ class IndexBuilder: return { 'search_language_stemming_code': self.get_js_stemmer_code(), - 'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)), + 'search_language_stop_words': json.dumps(sorted(self.lang.stopwords)), 'search_scorer_tool': self.js_scorer_code, 'search_word_splitter_code': js_splitter_code, } diff --git a/sphinx/util/jsdump.py b/sphinx/util/jsdump.py index ed5aea4ba..151d3e80a 100644 --- a/sphinx/util/jsdump.py +++ b/sphinx/util/jsdump.py @@ -1,194 +1,21 @@ -"""This module implements a simple JavaScript serializer. +"""This module implements a simple JavaScript serializer.""" +import json -Uses the basestring encode function from simplejson by Bob Ippolito. -""" +from sphinx.deprecation import RemovedInSphinx60Warning, deprecated_alias -import re -from typing import IO, Any, Dict, List, Match, Union - -_str_re = re.compile(r'"(\\\\|\\"|[^"])*"') -_int_re = re.compile(r'\d+') -_name_re = re.compile(r'[a-zA-Z_]\w*') -_nameonly_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*$') - -# escape \, ", control characters and everything outside ASCII -ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') -ESCAPE_DICT = { - '\\': '\\\\', - '"': '\\"', - '\b': '\\b', - '\f': '\\f', - '\n': '\\n', - '\r': '\\r', - '\t': '\\t', -} - -ESCAPED = re.compile(r'\\u.{4}|\\.') - - -def encode_string(s: str) -> str: - def replace(match: Match) -> str: - s = match.group(0) - try: - return ESCAPE_DICT[s] - except KeyError: - n = ord(s) - if n < 0x10000: - return '\\u%04x' % (n,) - else: - # surrogate pair - n -= 0x10000 - s1 = 0xd800 | ((n >> 10) & 0x3ff) - s2 = 0xdc00 | (n & 0x3ff) - return '\\u%04x\\u%04x' % (s1, s2) - return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' - - -def decode_string(s: str) -> str: - return ESCAPED.sub(lambda m: eval('"' + m.group() + '"'), s) - - -reswords = set("""\ -abstract else instanceof switch -boolean enum int synchronized -break export interface this -byte extends long throw -case false native throws -catch final new transient -char finally null true -class float package try -const for private typeof -continue function protected var -debugger goto public void -default if return volatile -delete implements short while -do import static with -double in super""".split()) - - -def dumps(obj: Any, key: bool = False) -> str: - if key: - if not isinstance(obj, str): - obj = str(obj) - if _nameonly_re.match(obj) and obj not in reswords: - return obj # return it as a bare word - else: - return encode_string(obj) - if obj is None: - return 'null' - elif obj is True or obj is False: - return 'true' if obj else 'false' - elif isinstance(obj, (int, float)): - return str(obj) - elif isinstance(obj, dict): - return '{%s}' % ','.join(sorted('%s:%s' % ( - dumps(key, True), - dumps(value) - ) for key, value in obj.items())) - elif isinstance(obj, set): - return '[%s]' % ','.join(sorted(dumps(x) for x in obj)) - elif isinstance(obj, (tuple, list)): - return '[%s]' % ','.join(dumps(x) for x in obj) - elif isinstance(obj, str): - return encode_string(obj) - raise TypeError(type(obj)) - - -def dump(obj: Any, f: IO) -> None: - f.write(dumps(obj)) - - -def loads(x: str) -> Any: - """Loader that can read the JS subset the indexer produces.""" - nothing = object() - i = 0 - n = len(x) - stack: List[Union[List, Dict]] = [] - obj: Any = nothing - key = False - keys = [] - while i < n: - c = x[i] - if c == '{': - obj = {} - stack.append(obj) - key = True - keys.append(nothing) - i += 1 - elif c == '[': - obj = [] - stack.append(obj) - key = False - keys.append(nothing) - i += 1 - elif c in '}]': - if key: - if keys[-1] is not nothing: - raise ValueError("unfinished dict") - # empty dict - key = False - oldobj = stack.pop() - keys.pop() - if stack: - obj = stack[-1] - if isinstance(obj, dict): - if keys[-1] is nothing: - raise ValueError("invalid key object", oldobj) - obj[keys[-1]] = oldobj - else: - obj.append(oldobj) - else: - break - i += 1 - elif c == ',': - if key: - raise ValueError("multiple keys") - if isinstance(obj, dict): - key = True - i += 1 - elif c == ':': - if not isinstance(obj, dict): - raise ValueError("colon in list") - i += 1 - if not key: - raise ValueError("multiple values") - key = False - else: - y: Any = None - m = _str_re.match(x, i) - if m: - y = decode_string(m.group()[1:-1]) - else: - m = _int_re.match(x, i) - if m: - y = int(m.group()) - else: - m = _name_re.match(x, i) - if m: - y = m.group() - if y == 'true': - y = True - elif y == 'false': - y = False - elif y == 'null': - y = None - elif not key: - raise ValueError("bareword as value") - else: - raise ValueError("read error at pos %d" % i) - i = m.end() - if isinstance(obj, dict): - if key: - keys[-1] = y - else: - obj[keys[-1]] = y - key = False - else: - obj.append(y) - if obj is nothing: - raise ValueError("nothing loaded from string") - return obj - - -def load(f: IO) -> Any: - return loads(f.read()) +deprecated_alias( + 'sphinx.util.jsdump', + { + 'dumps': lambda o, _key: json.dumps(o), + 'dump': json.dump, + 'loads': json.loads, + 'load': json.load, + }, + RemovedInSphinx60Warning, + { + 'dumps': 'json.dumps', + 'dump': 'json.dump', + 'loads': 'json.loads', + 'load': 'json.load', + } +) diff --git a/tests/test_search.py b/tests/test_search.py index 024e6941c..540793544 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,5 +1,6 @@ """Test the search index builder.""" +import json from collections import namedtuple from io import BytesIO @@ -8,7 +9,6 @@ from docutils import frontend, utils from docutils.parsers import rst from sphinx.search import IndexBuilder -from sphinx.util import jsdump DummyEnvironment = namedtuple('DummyEnvironment', ['version', 'domains']) @@ -32,12 +32,12 @@ def setup_module(): parser = rst.Parser() -def jsload(path): +def load_searchindex(path): searchindex = path.read_text() assert searchindex.startswith('Search.setIndex(') assert searchindex.endswith(')') - return jsdump.loads(searchindex[16:-1]) + return json.loads(searchindex[16:-1]) def is_registered_term(index, keyword): @@ -57,7 +57,7 @@ test that non-comments are indexed: fermion @pytest.mark.sphinx(testroot='ext-viewcode') def test_objects_are_escaped(app, status, warning): app.builder.build_all() - index = jsload(app.outdir / 'searchindex.js') + index = load_searchindex(app.outdir / 'searchindex.js') for item in index.get('objects').get(''): if item[-1] == 'n::Array<T, d>': # n::Array is escaped break @@ -68,7 +68,7 @@ def test_objects_are_escaped(app, status, warning): @pytest.mark.sphinx(testroot='search') def test_meta_keys_are_handled_for_language_en(app, status, warning): app.builder.build_all() - searchindex = jsload(app.outdir / 'searchindex.js') + searchindex = load_searchindex(app.outdir / 'searchindex.js') assert not is_registered_term(searchindex, 'thisnoteith') assert is_registered_term(searchindex, 'thisonetoo') assert is_registered_term(searchindex, 'findthiskei') @@ -81,7 +81,7 @@ def test_meta_keys_are_handled_for_language_en(app, status, warning): @pytest.mark.sphinx(testroot='search', confoverrides={'html_search_language': 'de'}) def test_meta_keys_are_handled_for_language_de(app, status, warning): app.builder.build_all() - searchindex = jsload(app.outdir / 'searchindex.js') + searchindex = load_searchindex(app.outdir / 'searchindex.js') assert not is_registered_term(searchindex, 'thisnoteith') assert is_registered_term(searchindex, 'thisonetoo') assert not is_registered_term(searchindex, 'findthiskei') @@ -100,7 +100,7 @@ def test_stemmer_does_not_remove_short_words(app, status, warning): @pytest.mark.sphinx(testroot='search') def test_stemmer(app, status, warning): - searchindex = jsload(app.outdir / 'searchindex.js') + searchindex = load_searchindex(app.outdir / 'searchindex.js') print(searchindex) assert is_registered_term(searchindex, 'findthisstemmedkei') assert is_registered_term(searchindex, 'intern') @@ -112,13 +112,13 @@ def test_term_in_heading_and_section(app, status, warning): # if search term is in the title of one doc and in the text of another # both documents should be a hit in the search index as a title, # respectively text hit - assert 'textinhead:2' in searchindex - assert 'textinhead:0' in searchindex + assert '"textinhead": 2' in searchindex + assert '"textinhead": 0' in searchindex @pytest.mark.sphinx(testroot='search') def test_term_in_raw_directive(app, status, warning): - searchindex = jsload(app.outdir / 'searchindex.js') + searchindex = load_searchindex(app.outdir / 'searchindex.js') assert not is_registered_term(searchindex, 'raw') assert is_registered_term(searchindex, 'rawword') assert not is_registered_term(searchindex, 'latex_keyword') @@ -255,18 +255,17 @@ def test_IndexBuilder_lookup(): ) def test_search_index_gen_zh(app, status, warning): app.builder.build_all() - # jsdump fails if search language is 'zh'; hence we just get the text: - searchindex = (app.outdir / 'searchindex.js').read_text() - assert 'chinesetest ' not in searchindex - assert 'chinesetest' in searchindex - assert 'chinesetesttwo' in searchindex - assert 'cas' in searchindex + index = load_searchindex(app.outdir / 'searchindex.js') + assert 'chinesetest ' not in index['terms'] + assert 'chinesetest' in index['terms'] + assert 'chinesetesttwo' in index['terms'] + assert 'cas' in index['terms'] @pytest.mark.sphinx(testroot='search') def test_nosearch(app): app.build() - index = jsload(app.outdir / 'searchindex.js') + index = load_searchindex(app.outdir / 'searchindex.js') assert index['docnames'] == ['index', 'nosearch', 'tocitem'] assert 'latex' not in index['terms'] assert 'zfs' in index['terms'] diff --git a/tests/test_util_jsdump.py b/tests/test_util_jsdump.py deleted file mode 100644 index d93c6ecd0..000000000 --- a/tests/test_util_jsdump.py +++ /dev/null @@ -1,19 +0,0 @@ -from sphinx.util.jsdump import dumps, loads - - -def test_jsdump(): - data = {'1a': 1} - assert dumps(data) == '{"1a":1}' - assert data == loads(dumps(data)) - - data = {'a1': 1} - assert dumps(data) == '{a1:1}' - assert data == loads(dumps(data)) - - data = {'a\xe8': 1} - assert dumps(data) == '{"a\\u00e8":1}' - assert data == loads(dumps(data)) - - data = {'_foo': 1} - assert dumps(data) == '{_foo:1}' - assert data == loads(dumps(data))