Merge pull request #7315 from tk0miya/7293_js_splitter_code

Close #7293: html search: Allow to override JavaScript splitter
This commit is contained in:
Takeshi KOMIYA 2020-03-17 10:55:04 +09:00 committed by GitHub
commit e6ffda3848
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 1 deletions

View File

@ -89,6 +89,8 @@ Features added
* #7103: linkcheck: writes all links to ``output.json`` * #7103: linkcheck: writes all links to ``output.json``
* #7025: html search: full text search can be disabled for individual document * #7025: html search: full text search can be disabled for individual document
using ``:nosearch:`` file-wide metadata using ``:nosearch:`` file-wide metadata
* #7293: html search: Allow to override JavaScript splitter via
``SearchLanguage.js_splitter_code``
* #7142: html theme: Add a theme option: ``pygments_dark_style`` to switch the * #7142: html theme: Add a theme option: ``pygments_dark_style`` to switch the
style of code-blocks in dark mode style of code-blocks in dark mode

View File

@ -43,6 +43,14 @@ class SearchLanguage:
This is a set of stop words of the target language. Default `stopwords` This is a set of stop words of the target language. Default `stopwords`
is empty. This word is used for building index and embedded in JS. is empty. This word is used for building index and embedded in JS.
.. attribute:: js_splitter_code
Return splitter funcion of JavaScript version. The function should be
named as ``splitQuery``. And it should take a string and return list of
strings.
.. versionadded:: 3.0
.. attribute:: js_stemmer_code .. attribute:: js_stemmer_code
Return stemmer class of JavaScript version. This class' name should be Return stemmer class of JavaScript version. This class' name should be
@ -55,6 +63,7 @@ class SearchLanguage:
lang = None # type: str lang = None # type: str
language_name = None # type: str language_name = None # type: str
stopwords = set() # type: Set[str] stopwords = set() # type: Set[str]
js_splitter_code = None # type: str
js_stemmer_rawcode = None # type: str js_stemmer_rawcode = None # type: str
js_stemmer_code = """ js_stemmer_code = """
/** /**
@ -425,11 +434,16 @@ class IndexBuilder:
self._mapping.setdefault(stemmed_word, set()).add(docname) self._mapping.setdefault(stemmed_word, set()).add(docname)
def context_for_searchtool(self) -> Dict[str, Any]: def context_for_searchtool(self) -> Dict[str, Any]:
if self.lang.js_splitter_code:
js_splitter_code = self.lang.js_splitter_code
else:
js_splitter_code = self.js_splitter_code
return { return {
'search_language_stemming_code': self.lang.js_stemmer_code, 'search_language_stemming_code': self.lang.js_stemmer_code,
'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)), 'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)),
'search_scorer_tool': self.js_scorer_code, 'search_scorer_tool': self.js_scorer_code,
'search_word_splitter_code': self.js_splitter_code, 'search_word_splitter_code': js_splitter_code,
} }
def get_js_stemmer_rawcode(self) -> str: def get_js_stemmer_rawcode(self) -> str: