Deprecate sphinx.util.jsdump

This commit is contained in:
Adam Turner
2022-04-27 02:53:13 +01:00
parent 5eeeb9ce37
commit b536ffabc1
5 changed files with 57 additions and 235 deletions

View File

@@ -22,6 +22,11 @@ The following is a list of deprecated interfaces.
- (will be) Removed
- Alternatives
* - ``sphinx.util.jsdump``
- 5.0
- 6.0
- The standard library ``json`` module.
* - :doc:`Setuptools integration </usage/advanced/setuptools>`
- 5.0
- 7.0

View File

@@ -1,7 +1,9 @@
"""Create a full-text search index for offline search."""
import html
import json
import pickle
import re
import warnings
from importlib import import_module
from os import path
from typing import IO, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
@@ -10,8 +12,8 @@ from docutils import nodes
from docutils.nodes import Element, Node
from sphinx import addnodes, package_dir
from sphinx.deprecation import RemovedInSphinx60Warning
from sphinx.environment import BuildEnvironment
from sphinx.util import jsdump
class SearchLanguage:
@@ -154,14 +156,14 @@ class _JavaScriptIndex:
SUFFIX = ')'
def dumps(self, data: Any) -> str:
return self.PREFIX + jsdump.dumps(data) + self.SUFFIX
return self.PREFIX + json.dumps(data) + self.SUFFIX
def loads(self, s: str) -> Any:
data = s[len(self.PREFIX):-len(self.SUFFIX)]
if not data or not s.startswith(self.PREFIX) or not \
s.endswith(self.SUFFIX):
raise ValueError('invalid data')
return jsdump.loads(data)
return json.loads(data)
def dump(self, data: Any, f: IO) -> None:
f.write(self.dumps(data))
@@ -224,7 +226,7 @@ class IndexBuilder:
passed to the `feed` method.
"""
formats = {
'jsdump': jsdump,
'json': json,
'pickle': pickle
}
@@ -265,7 +267,11 @@ class IndexBuilder:
def load(self, stream: IO, format: Any) -> None:
"""Reconstruct from frozen data."""
if isinstance(format, str):
if format == "jsdump":
warnings.warn("format=jsdump is deprecated, use json instead",
RemovedInSphinx60Warning, stacklevel=2)
format = self.formats["json"]
elif isinstance(format, str):
format = self.formats[format]
frozen = format.load(stream)
# if an old index is present, we treat it as not existing.
@@ -291,7 +297,11 @@ class IndexBuilder:
def dump(self, stream: IO, format: Any) -> None:
"""Dump the frozen index to a stream."""
if isinstance(format, str):
if format == "jsdump":
warnings.warn("format=jsdump is deprecated, use json instead",
RemovedInSphinx60Warning, stacklevel=2)
format = self.formats["json"]
elif isinstance(format, str):
format = self.formats[format]
format.dump(self.freeze(), stream)
@@ -417,7 +427,7 @@ class IndexBuilder:
return {
'search_language_stemming_code': self.get_js_stemmer_code(),
'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)),
'search_language_stop_words': json.dumps(sorted(self.lang.stopwords)),
'search_scorer_tool': self.js_scorer_code,
'search_word_splitter_code': js_splitter_code,
}

View File

@@ -1,194 +1,21 @@
"""This module implements a simple JavaScript serializer.
"""This module implements a simple JavaScript serializer."""
import json
Uses the basestring encode function from simplejson by Bob Ippolito.
"""
from sphinx.deprecation import RemovedInSphinx60Warning, deprecated_alias
import re
from typing import IO, Any, Dict, List, Match, Union
_str_re = re.compile(r'"(\\\\|\\"|[^"])*"')
_int_re = re.compile(r'\d+')
_name_re = re.compile(r'[a-zA-Z_]\w*')
_nameonly_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*$')
# escape \, ", control characters and everything outside ASCII
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
ESCAPE_DICT = {
'\\': '\\\\',
'"': '\\"',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
}
ESCAPED = re.compile(r'\\u.{4}|\\.')
def encode_string(s: str) -> str:
def replace(match: Match) -> str:
s = match.group(0)
try:
return ESCAPE_DICT[s]
except KeyError:
n = ord(s)
if n < 0x10000:
return '\\u%04x' % (n,)
else:
# surrogate pair
n -= 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
s2 = 0xdc00 | (n & 0x3ff)
return '\\u%04x\\u%04x' % (s1, s2)
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
def decode_string(s: str) -> str:
return ESCAPED.sub(lambda m: eval('"' + m.group() + '"'), s)
reswords = set("""\
abstract else instanceof switch
boolean enum int synchronized
break export interface this
byte extends long throw
case false native throws
catch final new transient
char finally null true
class float package try
const for private typeof
continue function protected var
debugger goto public void
default if return volatile
delete implements short while
do import static with
double in super""".split())
def dumps(obj: Any, key: bool = False) -> str:
if key:
if not isinstance(obj, str):
obj = str(obj)
if _nameonly_re.match(obj) and obj not in reswords:
return obj # return it as a bare word
else:
return encode_string(obj)
if obj is None:
return 'null'
elif obj is True or obj is False:
return 'true' if obj else 'false'
elif isinstance(obj, (int, float)):
return str(obj)
elif isinstance(obj, dict):
return '{%s}' % ','.join(sorted('%s:%s' % (
dumps(key, True),
dumps(value)
) for key, value in obj.items()))
elif isinstance(obj, set):
return '[%s]' % ','.join(sorted(dumps(x) for x in obj))
elif isinstance(obj, (tuple, list)):
return '[%s]' % ','.join(dumps(x) for x in obj)
elif isinstance(obj, str):
return encode_string(obj)
raise TypeError(type(obj))
def dump(obj: Any, f: IO) -> None:
f.write(dumps(obj))
def loads(x: str) -> Any:
"""Loader that can read the JS subset the indexer produces."""
nothing = object()
i = 0
n = len(x)
stack: List[Union[List, Dict]] = []
obj: Any = nothing
key = False
keys = []
while i < n:
c = x[i]
if c == '{':
obj = {}
stack.append(obj)
key = True
keys.append(nothing)
i += 1
elif c == '[':
obj = []
stack.append(obj)
key = False
keys.append(nothing)
i += 1
elif c in '}]':
if key:
if keys[-1] is not nothing:
raise ValueError("unfinished dict")
# empty dict
key = False
oldobj = stack.pop()
keys.pop()
if stack:
obj = stack[-1]
if isinstance(obj, dict):
if keys[-1] is nothing:
raise ValueError("invalid key object", oldobj)
obj[keys[-1]] = oldobj
else:
obj.append(oldobj)
else:
break
i += 1
elif c == ',':
if key:
raise ValueError("multiple keys")
if isinstance(obj, dict):
key = True
i += 1
elif c == ':':
if not isinstance(obj, dict):
raise ValueError("colon in list")
i += 1
if not key:
raise ValueError("multiple values")
key = False
else:
y: Any = None
m = _str_re.match(x, i)
if m:
y = decode_string(m.group()[1:-1])
else:
m = _int_re.match(x, i)
if m:
y = int(m.group())
else:
m = _name_re.match(x, i)
if m:
y = m.group()
if y == 'true':
y = True
elif y == 'false':
y = False
elif y == 'null':
y = None
elif not key:
raise ValueError("bareword as value")
else:
raise ValueError("read error at pos %d" % i)
i = m.end()
if isinstance(obj, dict):
if key:
keys[-1] = y
else:
obj[keys[-1]] = y
key = False
else:
obj.append(y)
if obj is nothing:
raise ValueError("nothing loaded from string")
return obj
def load(f: IO) -> Any:
return loads(f.read())
deprecated_alias(
'sphinx.util.jsdump',
{
'dumps': lambda o, _key: json.dumps(o),
'dump': json.dump,
'loads': json.loads,
'load': json.load,
},
RemovedInSphinx60Warning,
{
'dumps': 'json.dumps',
'dump': 'json.dump',
'loads': 'json.loads',
'load': 'json.load',
}
)

View File

@@ -1,5 +1,6 @@
"""Test the search index builder."""
import json
from collections import namedtuple
from io import BytesIO
@@ -8,7 +9,6 @@ from docutils import frontend, utils
from docutils.parsers import rst
from sphinx.search import IndexBuilder
from sphinx.util import jsdump
DummyEnvironment = namedtuple('DummyEnvironment', ['version', 'domains'])
@@ -32,12 +32,12 @@ def setup_module():
parser = rst.Parser()
def jsload(path):
def load_searchindex(path):
searchindex = path.read_text()
assert searchindex.startswith('Search.setIndex(')
assert searchindex.endswith(')')
return jsdump.loads(searchindex[16:-1])
return json.loads(searchindex[16:-1])
def is_registered_term(index, keyword):
@@ -57,7 +57,7 @@ test that non-comments are indexed: fermion
@pytest.mark.sphinx(testroot='ext-viewcode')
def test_objects_are_escaped(app, status, warning):
app.builder.build_all()
index = jsload(app.outdir / 'searchindex.js')
index = load_searchindex(app.outdir / 'searchindex.js')
for item in index.get('objects').get(''):
if item[-1] == 'n::Array&lt;T, d&gt;': # n::Array<T,d> is escaped
break
@@ -68,7 +68,7 @@ def test_objects_are_escaped(app, status, warning):
@pytest.mark.sphinx(testroot='search')
def test_meta_keys_are_handled_for_language_en(app, status, warning):
app.builder.build_all()
searchindex = jsload(app.outdir / 'searchindex.js')
searchindex = load_searchindex(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'thisnoteith')
assert is_registered_term(searchindex, 'thisonetoo')
assert is_registered_term(searchindex, 'findthiskei')
@@ -81,7 +81,7 @@ def test_meta_keys_are_handled_for_language_en(app, status, warning):
@pytest.mark.sphinx(testroot='search', confoverrides={'html_search_language': 'de'})
def test_meta_keys_are_handled_for_language_de(app, status, warning):
app.builder.build_all()
searchindex = jsload(app.outdir / 'searchindex.js')
searchindex = load_searchindex(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'thisnoteith')
assert is_registered_term(searchindex, 'thisonetoo')
assert not is_registered_term(searchindex, 'findthiskei')
@@ -100,7 +100,7 @@ def test_stemmer_does_not_remove_short_words(app, status, warning):
@pytest.mark.sphinx(testroot='search')
def test_stemmer(app, status, warning):
searchindex = jsload(app.outdir / 'searchindex.js')
searchindex = load_searchindex(app.outdir / 'searchindex.js')
print(searchindex)
assert is_registered_term(searchindex, 'findthisstemmedkei')
assert is_registered_term(searchindex, 'intern')
@@ -112,13 +112,13 @@ def test_term_in_heading_and_section(app, status, warning):
# if search term is in the title of one doc and in the text of another
# both documents should be a hit in the search index as a title,
# respectively text hit
assert 'textinhead:2' in searchindex
assert 'textinhead:0' in searchindex
assert '"textinhead": 2' in searchindex
assert '"textinhead": 0' in searchindex
@pytest.mark.sphinx(testroot='search')
def test_term_in_raw_directive(app, status, warning):
searchindex = jsload(app.outdir / 'searchindex.js')
searchindex = load_searchindex(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'raw')
assert is_registered_term(searchindex, 'rawword')
assert not is_registered_term(searchindex, 'latex_keyword')
@@ -255,18 +255,17 @@ def test_IndexBuilder_lookup():
)
def test_search_index_gen_zh(app, status, warning):
app.builder.build_all()
# jsdump fails if search language is 'zh'; hence we just get the text:
searchindex = (app.outdir / 'searchindex.js').read_text()
assert 'chinesetest ' not in searchindex
assert 'chinesetest' in searchindex
assert 'chinesetesttwo' in searchindex
assert 'cas' in searchindex
index = load_searchindex(app.outdir / 'searchindex.js')
assert 'chinesetest ' not in index['terms']
assert 'chinesetest' in index['terms']
assert 'chinesetesttwo' in index['terms']
assert 'cas' in index['terms']
@pytest.mark.sphinx(testroot='search')
def test_nosearch(app):
app.build()
index = jsload(app.outdir / 'searchindex.js')
index = load_searchindex(app.outdir / 'searchindex.js')
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
assert 'latex' not in index['terms']
assert 'zfs' in index['terms']

View File

@@ -1,19 +0,0 @@
from sphinx.util.jsdump import dumps, loads
def test_jsdump():
data = {'1a': 1}
assert dumps(data) == '{"1a":1}'
assert data == loads(dumps(data))
data = {'a1': 1}
assert dumps(data) == '{a1:1}'
assert data == loads(dumps(data))
data = {'a\xe8': 1}
assert dumps(data) == '{"a\\u00e8":1}'
assert data == loads(dumps(data))
data = {'_foo': 1}
assert dumps(data) == '{_foo:1}'
assert data == loads(dumps(data))