From 28b3360863df43ab1279e5cc07cbebe5ff37a58a Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sun, 15 Mar 2020 22:05:38 +0900 Subject: [PATCH] Close #7293: html search: Allow to override JavaScript splitter --- CHANGES | 2 ++ sphinx/search/__init__.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 4f50422e6..134395db7 100644 --- a/CHANGES +++ b/CHANGES @@ -86,6 +86,8 @@ Features added * #7103: linkcheck: writes all links to ``output.json`` * #7025: html search: full text search can be disabled for individual document using ``:nosearch:`` file-wide metadata +* #7293: html search: Allow to override JavaScript splitter via + ``SearchLanguage.js_splitter_code`` Bugs fixed ---------- diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index aab4297a0..d9853ff06 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -43,6 +43,14 @@ class SearchLanguage: This is a set of stop words of the target language. Default `stopwords` is empty. This word is used for building index and embedded in JS. + .. attribute:: js_splitter_code + + Return splitter funcion of JavaScript version. The function should be + named as ``splitQuery``. And it should take a string and return list of + strings. + + .. versionadded:: 3.0 + .. attribute:: js_stemmer_code Return stemmer class of JavaScript version. This class' name should be @@ -55,6 +63,7 @@ class SearchLanguage: lang = None # type: str language_name = None # type: str stopwords = set() # type: Set[str] + js_splitter_code = None # type: str js_stemmer_rawcode = None # type: str js_stemmer_code = """ /** @@ -425,11 +434,16 @@ class IndexBuilder: self._mapping.setdefault(stemmed_word, set()).add(docname) def context_for_searchtool(self) -> Dict[str, Any]: + if self.lang.js_splitter_code: + js_splitter_code = self.lang.js_splitter_code + else: + js_splitter_code = self.js_splitter_code + return { 'search_language_stemming_code': self.lang.js_stemmer_code, 'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)), 'search_scorer_tool': self.js_scorer_code, - 'search_word_splitter_code': self.js_splitter_code, + 'search_word_splitter_code': js_splitter_code, } def get_js_stemmer_rawcode(self) -> str: