mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Update splitQuery
This commit is contained in:
@@ -1,91 +1,71 @@
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# find char codes they are matched with Python's (?u)\\w
|
||||
|
||||
match = re.compile(r'(?u)\w')
|
||||
begin = -1
|
||||
|
||||
ranges = []
|
||||
singles = []
|
||||
|
||||
for i in range(65536):
|
||||
# 0xd800-0xdfff is surrogate pair area. skip this.
|
||||
if not match.match(chr(i)) and not (0xd800 <= i <= 0xdfff):
|
||||
# Get all non 'word' codepoints. This means skipping all alphanumerics and
|
||||
# '_' (U+0095), matching the `\w` character class in `re`. We also skip
|
||||
# 0xd800-0xdfff, the surrogate pair area.
|
||||
if not (chr(i).isalnum() or i == 95) and not (0xd800 <= i <= 0xdfff):
|
||||
if begin == -1:
|
||||
begin = i
|
||||
elif begin != -1:
|
||||
if begin + 1 == i:
|
||||
singles.append(begin)
|
||||
else:
|
||||
ranges.append((begin, i - 1))
|
||||
ranges.append((begin, i))
|
||||
begin = -1
|
||||
|
||||
|
||||
# fold json within almost 80 chars per line
|
||||
def fold(jsonData, splitter):
|
||||
code = json.dumps(jsonData)
|
||||
def fold(json_data, splitter):
|
||||
code = json.dumps(json_data)
|
||||
lines = []
|
||||
while True:
|
||||
if len(code) < 71:
|
||||
lines.append(' ' + code)
|
||||
if len(code) < 75:
|
||||
lines.append(' ' + code)
|
||||
break
|
||||
index = code.index(splitter, 70)
|
||||
lines.append(' ' + code[:index + len(splitter)])
|
||||
index = code.index(splitter, 74)
|
||||
lines.append(' ' + code[:index + len(splitter)])
|
||||
code = code[index + len(splitter):]
|
||||
lines[0] = lines[0][8:]
|
||||
lines[0] = lines[0][4:]
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
# JavaScript code
|
||||
js_src = '''
|
||||
var splitChars = (function() {
|
||||
var result = {};
|
||||
var singles = %s;
|
||||
var i, j, start, end;
|
||||
for (i = 0; i < singles.length; i++) {
|
||||
result[singles[i]] = true;
|
||||
}
|
||||
var ranges = %s;
|
||||
for (i = 0; i < ranges.length; i++) {
|
||||
start = ranges[i][0];
|
||||
end = ranges[i][1];
|
||||
for (j = start; j <= end; j++) {
|
||||
result[j] = true;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
})();
|
||||
js_src = '''\
|
||||
const splitChars = new Set(
|
||||
''' + fold(ranges, "],") + '''.map(
|
||||
([start, end]) => Array(end - start).fill(0).map((_, i) => start + i)
|
||||
).flat()
|
||||
)
|
||||
|
||||
function splitQuery(query) {
|
||||
var result = [];
|
||||
var start = -1;
|
||||
for (var i = 0; i < query.length; i++) {
|
||||
if (splitChars[query.charCodeAt(i)]) {
|
||||
if (start !== -1) {
|
||||
const splitQuery = (query) => {
|
||||
const result = [];
|
||||
let start = null;
|
||||
for (let i = 0; i < query.length; i++) {
|
||||
if (splitChars.has(query.charCodeAt(i))) {
|
||||
if (start !== null) {
|
||||
result.push(query.slice(start, i));
|
||||
start = -1;
|
||||
start = null;
|
||||
}
|
||||
} else {
|
||||
if (start === null) start = i;
|
||||
if (i === query.length - 1) {
|
||||
result.push(query.slice(start));
|
||||
}
|
||||
} else if (start === -1) {
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
if (start !== -1) {
|
||||
result.push(query.slice(start));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
''' % (fold(singles, ','), fold(ranges, '],'))
|
||||
'''
|
||||
|
||||
js_test_src = '''
|
||||
js_test_src = f'''\
|
||||
// This is regression test for https://github.com/sphinx-doc/sphinx/issues/3150
|
||||
// generated by compat_regexp_generator.py
|
||||
// it needs node.js for testing
|
||||
var assert = require('assert');
|
||||
const assert = require('assert');
|
||||
|
||||
%s
|
||||
{js_src}
|
||||
|
||||
console.log("test splitting English words")
|
||||
assert.deepEqual(['Hello', 'World'], splitQuery(' Hello World '));
|
||||
@@ -99,7 +79,7 @@ console.log("test splitting Chinese characters")
|
||||
assert.deepEqual(['Hello', 'from', '中国', '上海'], splitQuery('Hello from 中国 上海'));
|
||||
console.log(' ... ok\\n')
|
||||
|
||||
console.log("test splitting Emoji(surrogate pair) characters. It should keep emojis.")
|
||||
console.log("test splitting Emoji (surrogate pair) characters. It should keep emojis.")
|
||||
assert.deepEqual(['😁😁'], splitQuery('😁😁'));
|
||||
console.log(' ... ok\\n')
|
||||
|
||||
@@ -109,9 +89,9 @@ assert.deepEqual(
|
||||
splitQuery('Löschen Prüfung Abändern ærlig spørsmål'));
|
||||
console.log(' ... ok\\n')
|
||||
|
||||
''' % js_src
|
||||
'''
|
||||
|
||||
python_src = '''\
|
||||
python_src = f'''\
|
||||
"""
|
||||
sphinx.search.jssplitter
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -120,21 +100,20 @@ python_src = '''\
|
||||
|
||||
DO NOT EDIT. This is generated by utils/jssplitter_generator.py
|
||||
|
||||
:copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS.
|
||||
:copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
splitter_code = """
|
||||
%s
|
||||
{js_src}
|
||||
"""
|
||||
''' % js_src
|
||||
'''
|
||||
|
||||
with open('../sphinx/search/jssplitter.py', 'w') as f:
|
||||
with open('../sphinx/search/jssplitter.py', 'w', encoding="utf-8") as f:
|
||||
f.write(python_src)
|
||||
|
||||
with open('./regression_test.js', 'w') as f:
|
||||
with open('./regression_test.js', 'w', encoding="utf-8") as f:
|
||||
f.write(js_test_src)
|
||||
|
||||
print("starting test...")
|
||||
result = subprocess.call(['node', './regression_test.js'])
|
||||
sys.exit(result)
|
||||
raise SystemExit(subprocess.call(['node', './regression_test.js']))
|
||||
|
||||
Reference in New Issue
Block a user