diff --git a/CHANGES b/CHANGES index d4e8d2ea4..720e44dfd 100644 --- a/CHANGES +++ b/CHANGES @@ -89,6 +89,8 @@ Features added * #7103: linkcheck: writes all links to ``output.json`` * #7025: html search: full text search can be disabled for individual document using ``:nosearch:`` file-wide metadata +* #7293: html search: Allow to override JavaScript splitter via + ``SearchLanguage.js_splitter_code`` * #7142: html theme: Add a theme option: ``pygments_dark_style`` to switch the style of code-blocks in dark mode diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index aab4297a0..d9853ff06 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -43,6 +43,14 @@ class SearchLanguage: This is a set of stop words of the target language. Default `stopwords` is empty. This word is used for building index and embedded in JS. + .. attribute:: js_splitter_code + + Return splitter funcion of JavaScript version. The function should be + named as ``splitQuery``. And it should take a string and return list of + strings. + + .. versionadded:: 3.0 + .. attribute:: js_stemmer_code Return stemmer class of JavaScript version. This class' name should be @@ -55,6 +63,7 @@ class SearchLanguage: lang = None # type: str language_name = None # type: str stopwords = set() # type: Set[str] + js_splitter_code = None # type: str js_stemmer_rawcode = None # type: str js_stemmer_code = """ /** @@ -425,11 +434,16 @@ class IndexBuilder: self._mapping.setdefault(stemmed_word, set()).add(docname) def context_for_searchtool(self) -> Dict[str, Any]: + if self.lang.js_splitter_code: + js_splitter_code = self.lang.js_splitter_code + else: + js_splitter_code = self.js_splitter_code + return { 'search_language_stemming_code': self.lang.js_stemmer_code, 'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)), 'search_scorer_tool': self.js_scorer_code, - 'search_word_splitter_code': self.js_splitter_code, + 'search_word_splitter_code': js_splitter_code, } def get_js_stemmer_rawcode(self) -> str: