From b536ffabc1ff7d8cf4c2ec60ac8b5f7b812e448a Mon Sep 17 00:00:00 2001
From: Adam Turner <9087854+aa-turner@users.noreply.github.com>
Date: Wed, 27 Apr 2022 02:53:13 +0100
Subject: [PATCH] Deprecate `sphinx.util.jsdump`

---
 doc/extdev/deprecated.rst |   5 +
 sphinx/search/__init__.py |  24 +++--
 sphinx/util/jsdump.py     | 211 ++++----------------------------------
 tests/test_search.py      |  33 +++---
 tests/test_util_jsdump.py |  19 ----
 5 files changed, 57 insertions(+), 235 deletions(-)
 delete mode 100644 tests/test_util_jsdump.py

diff --git a/doc/extdev/deprecated.rst b/doc/extdev/deprecated.rst
index 9fc1110fc..35d6eb304 100644
--- a/doc/extdev/deprecated.rst
+++ b/doc/extdev/deprecated.rst
@@ -22,6 +22,11 @@ The following is a list of deprecated interfaces.
      - (will be) Removed
      - Alternatives
 
+   * - ``sphinx.util.jsdump``
+     - 5.0
+     - 6.0
+     - The standard library ``json`` module.
+
    * - :doc:`Setuptools integration </usage/advanced/setuptools>`
      - 5.0
      - 7.0
diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py
index c3e46ce22..5a5622e3a 100644
--- a/sphinx/search/__init__.py
+++ b/sphinx/search/__init__.py
@@ -1,7 +1,9 @@
 """Create a full-text search index for offline search."""
 import html
+import json
 import pickle
 import re
+import warnings
 from importlib import import_module
 from os import path
 from typing import IO, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
@@ -10,8 +12,8 @@ from docutils import nodes
 from docutils.nodes import Element, Node
 
 from sphinx import addnodes, package_dir
+from sphinx.deprecation import RemovedInSphinx60Warning
 from sphinx.environment import BuildEnvironment
-from sphinx.util import jsdump
 
 
 class SearchLanguage:
@@ -154,14 +156,14 @@ class _JavaScriptIndex:
     SUFFIX = ')'
 
     def dumps(self, data: Any) -> str:
-        return self.PREFIX + jsdump.dumps(data) + self.SUFFIX
+        return self.PREFIX + json.dumps(data) + self.SUFFIX
 
     def loads(self, s: str) -> Any:
         data = s[len(self.PREFIX):-len(self.SUFFIX)]
         if not data or not s.startswith(self.PREFIX) or not \
            s.endswith(self.SUFFIX):
             raise ValueError('invalid data')
-        return jsdump.loads(data)
+        return json.loads(data)
 
     def dump(self, data: Any, f: IO) -> None:
         f.write(self.dumps(data))
@@ -224,7 +226,7 @@ class IndexBuilder:
     passed to the `feed` method.
     """
     formats = {
-        'jsdump':   jsdump,
+        'json':     json,
         'pickle':   pickle
     }
 
@@ -265,7 +267,11 @@ class IndexBuilder:
 
     def load(self, stream: IO, format: Any) -> None:
         """Reconstruct from frozen data."""
-        if isinstance(format, str):
+        if format == "jsdump":
+            warnings.warn("format=jsdump is deprecated, use json instead",
+                          RemovedInSphinx60Warning, stacklevel=2)
+            format = self.formats["json"]
+        elif isinstance(format, str):
             format = self.formats[format]
         frozen = format.load(stream)
         # if an old index is present, we treat it as not existing.
@@ -291,7 +297,11 @@ class IndexBuilder:
 
     def dump(self, stream: IO, format: Any) -> None:
         """Dump the frozen index to a stream."""
-        if isinstance(format, str):
+        if format == "jsdump":
+            warnings.warn("format=jsdump is deprecated, use json instead",
+                          RemovedInSphinx60Warning, stacklevel=2)
+            format = self.formats["json"]
+        elif isinstance(format, str):
             format = self.formats[format]
         format.dump(self.freeze(), stream)
 
@@ -417,7 +427,7 @@ class IndexBuilder:
 
         return {
             'search_language_stemming_code': self.get_js_stemmer_code(),
-            'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)),
+            'search_language_stop_words': json.dumps(sorted(self.lang.stopwords)),
             'search_scorer_tool': self.js_scorer_code,
             'search_word_splitter_code': js_splitter_code,
         }
diff --git a/sphinx/util/jsdump.py b/sphinx/util/jsdump.py
index ed5aea4ba..151d3e80a 100644
--- a/sphinx/util/jsdump.py
+++ b/sphinx/util/jsdump.py
@@ -1,194 +1,21 @@
-"""This module implements a simple JavaScript serializer.
+"""This module implements a simple JavaScript serializer."""
+import json
 
-Uses the basestring encode function from simplejson by Bob Ippolito.
-"""
+from sphinx.deprecation import RemovedInSphinx60Warning, deprecated_alias
 
-import re
-from typing import IO, Any, Dict, List, Match, Union
-
-_str_re = re.compile(r'"(\\\\|\\"|[^"])*"')
-_int_re = re.compile(r'\d+')
-_name_re = re.compile(r'[a-zA-Z_]\w*')
-_nameonly_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*$')
-
-# escape \, ", control characters and everything outside ASCII
-ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
-ESCAPE_DICT = {
-    '\\': '\\\\',
-    '"': '\\"',
-    '\b': '\\b',
-    '\f': '\\f',
-    '\n': '\\n',
-    '\r': '\\r',
-    '\t': '\\t',
-}
-
-ESCAPED = re.compile(r'\\u.{4}|\\.')
-
-
-def encode_string(s: str) -> str:
-    def replace(match: Match) -> str:
-        s = match.group(0)
-        try:
-            return ESCAPE_DICT[s]
-        except KeyError:
-            n = ord(s)
-            if n < 0x10000:
-                return '\\u%04x' % (n,)
-            else:
-                # surrogate pair
-                n -= 0x10000
-                s1 = 0xd800 | ((n >> 10) & 0x3ff)
-                s2 = 0xdc00 | (n & 0x3ff)
-                return '\\u%04x\\u%04x' % (s1, s2)
-    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
-
-
-def decode_string(s: str) -> str:
-    return ESCAPED.sub(lambda m: eval('"' + m.group() + '"'), s)
-
-
-reswords = set("""\
-abstract   else   instanceof   switch
-boolean   enum   int   synchronized
-break   export   interface   this
-byte   extends   long   throw
-case   false   native   throws
-catch   final   new   transient
-char   finally   null   true
-class   float   package   try
-const   for   private   typeof
-continue   function   protected   var
-debugger   goto   public   void
-default   if   return   volatile
-delete   implements   short   while
-do   import   static   with
-double   in   super""".split())
-
-
-def dumps(obj: Any, key: bool = False) -> str:
-    if key:
-        if not isinstance(obj, str):
-            obj = str(obj)
-        if _nameonly_re.match(obj) and obj not in reswords:
-            return obj  # return it as a bare word
-        else:
-            return encode_string(obj)
-    if obj is None:
-        return 'null'
-    elif obj is True or obj is False:
-        return 'true' if obj else 'false'
-    elif isinstance(obj, (int, float)):
-        return str(obj)
-    elif isinstance(obj, dict):
-        return '{%s}' % ','.join(sorted('%s:%s' % (
-            dumps(key, True),
-            dumps(value)
-        ) for key, value in obj.items()))
-    elif isinstance(obj, set):
-        return '[%s]' % ','.join(sorted(dumps(x) for x in obj))
-    elif isinstance(obj, (tuple, list)):
-        return '[%s]' % ','.join(dumps(x) for x in obj)
-    elif isinstance(obj, str):
-        return encode_string(obj)
-    raise TypeError(type(obj))
-
-
-def dump(obj: Any, f: IO) -> None:
-    f.write(dumps(obj))
-
-
-def loads(x: str) -> Any:
-    """Loader that can read the JS subset the indexer produces."""
-    nothing = object()
-    i = 0
-    n = len(x)
-    stack: List[Union[List, Dict]] = []
-    obj: Any = nothing
-    key = False
-    keys = []
-    while i < n:
-        c = x[i]
-        if c == '{':
-            obj = {}
-            stack.append(obj)
-            key = True
-            keys.append(nothing)
-            i += 1
-        elif c == '[':
-            obj = []
-            stack.append(obj)
-            key = False
-            keys.append(nothing)
-            i += 1
-        elif c in '}]':
-            if key:
-                if keys[-1] is not nothing:
-                    raise ValueError("unfinished dict")
-                # empty dict
-                key = False
-            oldobj = stack.pop()
-            keys.pop()
-            if stack:
-                obj = stack[-1]
-                if isinstance(obj, dict):
-                    if keys[-1] is nothing:
-                        raise ValueError("invalid key object", oldobj)
-                    obj[keys[-1]] = oldobj
-                else:
-                    obj.append(oldobj)
-            else:
-                break
-            i += 1
-        elif c == ',':
-            if key:
-                raise ValueError("multiple keys")
-            if isinstance(obj, dict):
-                key = True
-            i += 1
-        elif c == ':':
-            if not isinstance(obj, dict):
-                raise ValueError("colon in list")
-            i += 1
-            if not key:
-                raise ValueError("multiple values")
-            key = False
-        else:
-            y: Any = None
-            m = _str_re.match(x, i)
-            if m:
-                y = decode_string(m.group()[1:-1])
-            else:
-                m = _int_re.match(x, i)
-                if m:
-                    y = int(m.group())
-                else:
-                    m = _name_re.match(x, i)
-                    if m:
-                        y = m.group()
-                        if y == 'true':
-                            y = True
-                        elif y == 'false':
-                            y = False
-                        elif y == 'null':
-                            y = None
-                        elif not key:
-                            raise ValueError("bareword as value")
-                    else:
-                        raise ValueError("read error at pos %d" % i)
-            i = m.end()
-            if isinstance(obj, dict):
-                if key:
-                    keys[-1] = y
-                else:
-                    obj[keys[-1]] = y
-                    key = False
-            else:
-                obj.append(y)
-    if obj is nothing:
-        raise ValueError("nothing loaded from string")
-    return obj
-
-
-def load(f: IO) -> Any:
-    return loads(f.read())
+deprecated_alias(
+    'sphinx.util.jsdump',
+    {
+        'dumps': lambda o, _key: json.dumps(o),
+        'dump': json.dump,
+        'loads': json.loads,
+        'load': json.load,
+    },
+    RemovedInSphinx60Warning,
+    {
+        'dumps': 'json.dumps',
+        'dump': 'json.dump',
+        'loads': 'json.loads',
+        'load': 'json.load',
+    }
+)
diff --git a/tests/test_search.py b/tests/test_search.py
index 024e6941c..540793544 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -1,5 +1,6 @@
 """Test the search index builder."""
 
+import json
 from collections import namedtuple
 from io import BytesIO
 
@@ -8,7 +9,6 @@ from docutils import frontend, utils
 from docutils.parsers import rst
 
 from sphinx.search import IndexBuilder
-from sphinx.util import jsdump
 
 DummyEnvironment = namedtuple('DummyEnvironment', ['version', 'domains'])
 
@@ -32,12 +32,12 @@ def setup_module():
     parser = rst.Parser()
 
 
-def jsload(path):
+def load_searchindex(path):
     searchindex = path.read_text()
     assert searchindex.startswith('Search.setIndex(')
     assert searchindex.endswith(')')
 
-    return jsdump.loads(searchindex[16:-1])
+    return json.loads(searchindex[16:-1])
 
 
 def is_registered_term(index, keyword):
@@ -57,7 +57,7 @@ test that non-comments are indexed: fermion
 @pytest.mark.sphinx(testroot='ext-viewcode')
 def test_objects_are_escaped(app, status, warning):
     app.builder.build_all()
-    index = jsload(app.outdir / 'searchindex.js')
+    index = load_searchindex(app.outdir / 'searchindex.js')
     for item in index.get('objects').get(''):
         if item[-1] == 'n::Array&lt;T, d&gt;':  # n::Array<T,d> is escaped
             break
@@ -68,7 +68,7 @@ def test_objects_are_escaped(app, status, warning):
 @pytest.mark.sphinx(testroot='search')
 def test_meta_keys_are_handled_for_language_en(app, status, warning):
     app.builder.build_all()
-    searchindex = jsload(app.outdir / 'searchindex.js')
+    searchindex = load_searchindex(app.outdir / 'searchindex.js')
     assert not is_registered_term(searchindex, 'thisnoteith')
     assert is_registered_term(searchindex, 'thisonetoo')
     assert is_registered_term(searchindex, 'findthiskei')
@@ -81,7 +81,7 @@ def test_meta_keys_are_handled_for_language_en(app, status, warning):
 @pytest.mark.sphinx(testroot='search', confoverrides={'html_search_language': 'de'})
 def test_meta_keys_are_handled_for_language_de(app, status, warning):
     app.builder.build_all()
-    searchindex = jsload(app.outdir / 'searchindex.js')
+    searchindex = load_searchindex(app.outdir / 'searchindex.js')
     assert not is_registered_term(searchindex, 'thisnoteith')
     assert is_registered_term(searchindex, 'thisonetoo')
     assert not is_registered_term(searchindex, 'findthiskei')
@@ -100,7 +100,7 @@ def test_stemmer_does_not_remove_short_words(app, status, warning):
 
 @pytest.mark.sphinx(testroot='search')
 def test_stemmer(app, status, warning):
-    searchindex = jsload(app.outdir / 'searchindex.js')
+    searchindex = load_searchindex(app.outdir / 'searchindex.js')
     print(searchindex)
     assert is_registered_term(searchindex, 'findthisstemmedkei')
     assert is_registered_term(searchindex, 'intern')
@@ -112,13 +112,13 @@ def test_term_in_heading_and_section(app, status, warning):
     # if search term is in the title of one doc and in the text of another
     # both documents should be a hit in the search index as a title,
     # respectively text hit
-    assert 'textinhead:2' in searchindex
-    assert 'textinhead:0' in searchindex
+    assert '"textinhead": 2' in searchindex
+    assert '"textinhead": 0' in searchindex
 
 
 @pytest.mark.sphinx(testroot='search')
 def test_term_in_raw_directive(app, status, warning):
-    searchindex = jsload(app.outdir / 'searchindex.js')
+    searchindex = load_searchindex(app.outdir / 'searchindex.js')
     assert not is_registered_term(searchindex, 'raw')
     assert is_registered_term(searchindex, 'rawword')
     assert not is_registered_term(searchindex, 'latex_keyword')
@@ -255,18 +255,17 @@ def test_IndexBuilder_lookup():
 )
 def test_search_index_gen_zh(app, status, warning):
     app.builder.build_all()
-    # jsdump fails if search language is 'zh'; hence we just get the text:
-    searchindex = (app.outdir / 'searchindex.js').read_text()
-    assert 'chinesetest ' not in searchindex
-    assert 'chinesetest' in searchindex
-    assert 'chinesetesttwo' in searchindex
-    assert 'cas' in searchindex
+    index = load_searchindex(app.outdir / 'searchindex.js')
+    assert 'chinesetest ' not in index['terms']
+    assert 'chinesetest' in index['terms']
+    assert 'chinesetesttwo' in index['terms']
+    assert 'cas' in index['terms']
 
 
 @pytest.mark.sphinx(testroot='search')
 def test_nosearch(app):
     app.build()
-    index = jsload(app.outdir / 'searchindex.js')
+    index = load_searchindex(app.outdir / 'searchindex.js')
     assert index['docnames'] == ['index', 'nosearch', 'tocitem']
     assert 'latex' not in index['terms']
     assert 'zfs' in index['terms']
diff --git a/tests/test_util_jsdump.py b/tests/test_util_jsdump.py
deleted file mode 100644
index d93c6ecd0..000000000
--- a/tests/test_util_jsdump.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from sphinx.util.jsdump import dumps, loads
-
-
-def test_jsdump():
-    data = {'1a': 1}
-    assert dumps(data) == '{"1a":1}'
-    assert data == loads(dumps(data))
-
-    data = {'a1': 1}
-    assert dumps(data) == '{a1:1}'
-    assert data == loads(dumps(data))
-
-    data = {'a\xe8': 1}
-    assert dumps(data) == '{"a\\u00e8":1}'
-    assert data == loads(dumps(data))
-
-    data = {'_foo': 1}
-    assert dumps(data) == '{_foo:1}'
-    assert data == loads(dumps(data))