From b69e1a4fe7ac0de27aa5613d4db22fff28c18dda Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Wed, 10 Sep 2008 11:26:07 +0000 Subject: [PATCH] Moved searchindex to the bottom to not lock the search page, prepared index for title searching. --- sphinx/search.py | 17 +- sphinx/static/searchtools.js | 764 +++++++++++++++++------------------ sphinx/templates/search.html | 6 +- 3 files changed, 392 insertions(+), 395 deletions(-) diff --git a/sphinx/search.py b/sphinx/search.py index 342a253ac..7368f7cfe 100644 --- a/sphinx/search.py +++ b/sphinx/search.py @@ -59,6 +59,7 @@ class Stemmer(PorterStemmer): """ def stem(self, word): + word = word.lower() return PorterStemmer.stem(self, word, 0, len(word) - 1) @@ -139,9 +140,17 @@ class IndexBuilder(object): visitor = WordCollector(doctree) doctree.walk(visitor) - for word in word_re.findall(title) + visitor.found_words: - self._mapping.setdefault(self._stemmer.stem(word.lower()), - set()).add(filename) + + def add_term(word, prefix=''): + word = self._stemmer.stem(word) + self._mapping.setdefault(prefix + word, set()).add(filename) + + for word in word_re.findall(title): + add_term(word) + add_term(word, 'T') + + for word in visitor.found_words: + add_term(word) class SearchFrontend(object): @@ -177,6 +186,6 @@ class SearchFrontend(object): word = word[1:] else: storage = required - storage.add(self._stemmer.stem(word.lower())) + storage.add(self._stemmer.stem(word)) return self.query(required, excluded) diff --git a/sphinx/static/searchtools.js b/sphinx/static/searchtools.js index e3781ba2b..77d8639a2 100644 --- a/sphinx/static/searchtools.js +++ b/sphinx/static/searchtools.js @@ -5,24 +5,24 @@ * words. the first one is used to find the occurance, the * latter for highlighting it. */ + jQuery.makeSearchSummary = function(text, keywords, hlwords) { - var textLower = text.toLowerCase(); - var start = 0; - $.each(keywords, function() { - var i = textLower.indexOf(this.toLowerCase()); - if (i > -1) { - start = i; - } - }); - start = Math.max(start - 120, 0); - var excerpt = ((start > 0) ? '...' : '') + - $.trim(text.substr(start, 240)) + - ((start + 240 - text.length) ? '...' : ''); - var rv = $('
').text(excerpt); - $.each(hlwords, function() { - rv = rv.highlightText(this, 'highlight'); - }); - return rv; + var textLower = text.toLowerCase(); + var start = 0; + $.each(keywords, function() { + var i = textLower.indexOf(this.toLowerCase()); + if (i > -1) + start = i; + }); + start = Math.max(start - 120, 0); + var excerpt = ((start > 0) ? '...' : '') + + $.trim(text.substr(start, 240)) + + ((start + 240 - text.length) ? '...' : ''); + var rv = $('
').text(excerpt); + $.each(hlwords, function() { + rv = rv.highlightText(this, 'highlight'); + }); + return rv; } /** @@ -30,193 +30,182 @@ jQuery.makeSearchSummary = function(text, keywords, hlwords) { */ var PorterStemmer = function() { - var step2list = { - ational: 'ate', - tional: 'tion', - enci: 'ence', - anci: 'ance', - izer: 'ize', - bli: 'ble', - alli: 'al', - entli: 'ent', - eli: 'e', - ousli: 'ous', - ization: 'ize', - ation: 'ate', - ator: 'ate', - alism: 'al', - iveness: 'ive', - fulness: 'ful', - ousness: 'ous', - aliti: 'al', - iviti: 'ive', - biliti: 'ble', - logi: 'log' - }; + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; - var step3list = { - icate: 'ic', - ative: '', - alize: 'al', - iciti: 'ic', - ical: 'ic', - ful: '', - ness: '' - }; + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; - var c = "[^aeiou]"; // consonant - var v = "[aeiouy]"; // vowel - var C = c + "[^aeiouy]*"; // consonant sequence - var V = v + "[aeiou]*"; // vowel sequence + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence - var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 - var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 - var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 - var s_v = "^(" + C + ")?" + v; // vowel in stem + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem - this.stemWord = function (w) { - var stem; - var suffix; - var firstch; - var origword = w; + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; - if (w.length < 3) { - return w; - } + if (w.length < 3) + return w; - var re; - var re2; - var re3; - var re4; + var re; + var re2; + var re3; + var re4; - firstch = w.substr(0,1); - if (firstch == "y") { - w = firstch.toUpperCase() + w.substr(1); - } + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); - // Step 1a - re = /^(.+?)(ss|i)es$/; - re2 = /^(.+?)([^s])s$/; + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; - if (re.test(w)) { - w = w.replace(re,"$1$2"); - } - else if (re2.test(w)) { - w = w.replace(re2,"$1$2"); - } + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); - // Step 1b - re = /^(.+?)eed$/; - re2 = /^(.+?)(ed|ing)$/; - if (re.test(w)) { - var fp = re.exec(w); - re = new RegExp(mgr0); - if (re.test(fp[1])) { - re = /.$/; - w = w.replace(re,""); - } - } - else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1]; - re2 = new RegExp(s_v); - if (re2.test(stem)) { - w = stem; - re2 = /(at|bl|iz)$/; - re3 = new RegExp("([^aeiouylsz])\\1$"); - re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - if (re2.test(w)) { - w = w + "e"; - } - else if (re3.test(w)) { - re = /.$/; w = w.replace(re,""); - } - else if (re4.test(w)) { - w = w + "e"; - } - } - } - - // Step 1c - re = /^(.+?)y$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(s_v); - if (re.test(stem)) { w = stem + "i"; } - } - - // Step 2 - re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = new RegExp(mgr0); - if (re.test(stem)) { - w = stem + step2list[suffix]; - } - } - - // Step 3 - re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = new RegExp(mgr0); - if (re.test(stem)) { - w = stem + step3list[suffix]; - } - } - - // Step 4 - re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; - re2 = /^(.+?)(s|t)(ion)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(mgr1); - if (re.test(stem)) { - w = stem; - } - } - else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1] + fp[2]; - re2 = new RegExp(mgr1); - if (re2.test(stem)) { - w = stem; - } - } - - // Step 5 - re = /^(.+?)e$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(mgr1); - re2 = new RegExp(meq1); - re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { - w = stem; - } - } - re = /ll$/; - re2 = new RegExp(mgr1); - if (re.test(w) && re2.test(w)) { - re = /.$/; - w = w.replace(re,""); - } - - // and turn initial Y back to y - if (firstch == "y") { - w = firstch.toLowerCase() + w.substr(1); - } - return w; + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } } -} + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} /** @@ -224,208 +213,203 @@ var PorterStemmer = function() { */ var Search = { - _index : null, - _queued_query : null, - _pulse_status : -1, + _index : null, + _queued_query : null, + _pulse_status : -1, - init : function() { - var params = $.getQueryParameters(); - if (params.q) { - var query = params.q[0]; - $('input[@name="q"]')[0].value = query; - this.performSearch(query); - } - }, + init : function() { + var params = $.getQueryParameters(); + if (params.q) { + var query = params.q[0]; + $('input[@name="q"]')[0].value = query; + this.performSearch(query); + } + }, - /** - * Sets the index - */ - setIndex : function(index) { - var q; - this._index = index; - if ((q = this._queued_query) !== null) { - this._queued_query = null; - Search.query(q); - } - }, - - hasIndex : function() { - return self._index !== null; - }, - - deferQuery : function(query) { - this._queued_query = query; - }, - - stopPulse : function() { - this._pulse_status = 0; - }, - - startPulse : function() { - if (this._pulse_status >= 0) - return; - function pulse() { - Search._pulse_status = (Search._pulse_status + 1) % 4; - var dotString = ''; - for (var i = 0; i < Search._pulse_status; i++) { - dotString += '.'; - } - Search.dots.text(dotString); - if (Search._pulse_status > -1) { - window.setTimeout(pulse, 500); - } - }; - pulse(); - }, - - /** - * perform a search for something - */ - performSearch : function(query) { - // create the required interface elements - this.out = $('#search-results'); - this.title = $('

' + _('Searching') + '

').appendTo(this.out); - this.dots = $('').appendTo(this.title); - this.status = $('

').appendTo(this.out); - this.output = $('