Moved searchindex to the bottom to not lock the search page, prepared index for title searching.

This commit is contained in:
Armin Ronacher 2008-09-10 11:26:07 +00:00
parent 3debdc2c2a
commit b69e1a4fe7
3 changed files with 392 additions and 395 deletions

View File

@ -59,6 +59,7 @@ class Stemmer(PorterStemmer):
""" """
def stem(self, word): def stem(self, word):
word = word.lower()
return PorterStemmer.stem(self, word, 0, len(word) - 1) return PorterStemmer.stem(self, word, 0, len(word) - 1)
@ -139,9 +140,17 @@ class IndexBuilder(object):
visitor = WordCollector(doctree) visitor = WordCollector(doctree)
doctree.walk(visitor) doctree.walk(visitor)
for word in word_re.findall(title) + visitor.found_words:
self._mapping.setdefault(self._stemmer.stem(word.lower()), def add_term(word, prefix=''):
set()).add(filename) word = self._stemmer.stem(word)
self._mapping.setdefault(prefix + word, set()).add(filename)
for word in word_re.findall(title):
add_term(word)
add_term(word, 'T')
for word in visitor.found_words:
add_term(word)
class SearchFrontend(object): class SearchFrontend(object):
@ -177,6 +186,6 @@ class SearchFrontend(object):
word = word[1:] word = word[1:]
else: else:
storage = required storage = required
storage.add(self._stemmer.stem(word.lower())) storage.add(self._stemmer.stem(word))
return self.query(required, excluded) return self.query(required, excluded)

View File

@ -5,24 +5,24 @@
* words. the first one is used to find the occurance, the * words. the first one is used to find the occurance, the
* latter for highlighting it. * latter for highlighting it.
*/ */
jQuery.makeSearchSummary = function(text, keywords, hlwords) { jQuery.makeSearchSummary = function(text, keywords, hlwords) {
var textLower = text.toLowerCase(); var textLower = text.toLowerCase();
var start = 0; var start = 0;
$.each(keywords, function() { $.each(keywords, function() {
var i = textLower.indexOf(this.toLowerCase()); var i = textLower.indexOf(this.toLowerCase());
if (i > -1) { if (i > -1)
start = i; start = i;
} });
}); start = Math.max(start - 120, 0);
start = Math.max(start - 120, 0); var excerpt = ((start > 0) ? '...' : '') +
var excerpt = ((start > 0) ? '...' : '') + $.trim(text.substr(start, 240)) +
$.trim(text.substr(start, 240)) + ((start + 240 - text.length) ? '...' : '');
((start + 240 - text.length) ? '...' : ''); var rv = $('<div class="context"></div>').text(excerpt);
var rv = $('<div class="context"></div>').text(excerpt); $.each(hlwords, function() {
$.each(hlwords, function() { rv = rv.highlightText(this, 'highlight');
rv = rv.highlightText(this, 'highlight'); });
}); return rv;
return rv;
} }
/** /**
@ -30,193 +30,182 @@ jQuery.makeSearchSummary = function(text, keywords, hlwords) {
*/ */
var PorterStemmer = function() { var PorterStemmer = function() {
var step2list = { var step2list = {
ational: 'ate', ational: 'ate',
tional: 'tion', tional: 'tion',
enci: 'ence', enci: 'ence',
anci: 'ance', anci: 'ance',
izer: 'ize', izer: 'ize',
bli: 'ble', bli: 'ble',
alli: 'al', alli: 'al',
entli: 'ent', entli: 'ent',
eli: 'e', eli: 'e',
ousli: 'ous', ousli: 'ous',
ization: 'ize', ization: 'ize',
ation: 'ate', ation: 'ate',
ator: 'ate', ator: 'ate',
alism: 'al', alism: 'al',
iveness: 'ive', iveness: 'ive',
fulness: 'ful', fulness: 'ful',
ousness: 'ous', ousness: 'ous',
aliti: 'al', aliti: 'al',
iviti: 'ive', iviti: 'ive',
biliti: 'ble', biliti: 'ble',
logi: 'log' logi: 'log'
}; };
var step3list = { var step3list = {
icate: 'ic', icate: 'ic',
ative: '', ative: '',
alize: 'al', alize: 'al',
iciti: 'ic', iciti: 'ic',
ical: 'ic', ical: 'ic',
ful: '', ful: '',
ness: '' ness: ''
}; };
var c = "[^aeiou]"; // consonant var c = "[^aeiou]"; // consonant
var v = "[aeiouy]"; // vowel var v = "[aeiouy]"; // vowel
var C = c + "[^aeiouy]*"; // consonant sequence var C = c + "[^aeiouy]*"; // consonant sequence
var V = v + "[aeiou]*"; // vowel sequence var V = v + "[aeiou]*"; // vowel sequence
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
var s_v = "^(" + C + ")?" + v; // vowel in stem var s_v = "^(" + C + ")?" + v; // vowel in stem
this.stemWord = function (w) { this.stemWord = function (w) {
var stem; var stem;
var suffix; var suffix;
var firstch; var firstch;
var origword = w; var origword = w;
if (w.length < 3) { if (w.length < 3)
return w; return w;
}
var re; var re;
var re2; var re2;
var re3; var re3;
var re4; var re4;
firstch = w.substr(0,1); firstch = w.substr(0,1);
if (firstch == "y") { if (firstch == "y")
w = firstch.toUpperCase() + w.substr(1); w = firstch.toUpperCase() + w.substr(1);
}
// Step 1a // Step 1a
re = /^(.+?)(ss|i)es$/; re = /^(.+?)(ss|i)es$/;
re2 = /^(.+?)([^s])s$/; re2 = /^(.+?)([^s])s$/;
if (re.test(w)) { if (re.test(w))
w = w.replace(re,"$1$2"); w = w.replace(re,"$1$2");
} else if (re2.test(w))
else if (re2.test(w)) { w = w.replace(re2,"$1$2");
w = w.replace(re2,"$1$2");
}
// Step 1b // Step 1b
re = /^(.+?)eed$/; re = /^(.+?)eed$/;
re2 = /^(.+?)(ed|ing)$/; re2 = /^(.+?)(ed|ing)$/;
if (re.test(w)) { if (re.test(w)) {
var fp = re.exec(w); var fp = re.exec(w);
re = new RegExp(mgr0); re = new RegExp(mgr0);
if (re.test(fp[1])) { if (re.test(fp[1])) {
re = /.$/; re = /.$/;
w = w.replace(re,""); w = w.replace(re,"");
} }
} }
else if (re2.test(w)) { else if (re2.test(w)) {
var fp = re2.exec(w); var fp = re2.exec(w);
stem = fp[1]; stem = fp[1];
re2 = new RegExp(s_v); re2 = new RegExp(s_v);
if (re2.test(stem)) { if (re2.test(stem)) {
w = stem; w = stem;
re2 = /(at|bl|iz)$/; re2 = /(at|bl|iz)$/;
re3 = new RegExp("([^aeiouylsz])\\1$"); re3 = new RegExp("([^aeiouylsz])\\1$");
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re2.test(w)) { if (re2.test(w))
w = w + "e"; w = w + "e";
} else if (re3.test(w)) {
else if (re3.test(w)) { re = /.$/;
re = /.$/; w = w.replace(re,""); w = w.replace(re,"");
} }
else if (re4.test(w)) { else if (re4.test(w))
w = w + "e"; w = w + "e";
} }
}
}
// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem)) { w = stem + "i"; }
}
// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem)) {
w = stem + step2list[suffix];
}
}
// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem)) {
w = stem + step3list[suffix];
}
}
// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem)) {
w = stem;
}
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem)) {
w = stem;
}
}
// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
w = stem;
}
}
re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y") {
w = firstch.toLowerCase() + w.substr(1);
}
return w;
} }
}
// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem))
w = stem + "i";
}
// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step2list[suffix];
}
// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step3list[suffix];
}
// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem))
w = stem;
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem))
w = stem;
}
// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
w = stem;
}
re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y")
w = firstch.toLowerCase() + w.substr(1);
return w;
}
}
/** /**
@ -224,208 +213,203 @@ var PorterStemmer = function() {
*/ */
var Search = { var Search = {
_index : null, _index : null,
_queued_query : null, _queued_query : null,
_pulse_status : -1, _pulse_status : -1,
init : function() { init : function() {
var params = $.getQueryParameters(); var params = $.getQueryParameters();
if (params.q) { if (params.q) {
var query = params.q[0]; var query = params.q[0];
$('input[@name="q"]')[0].value = query; $('input[@name="q"]')[0].value = query;
this.performSearch(query); this.performSearch(query);
} }
}, },
/** /**
* Sets the index * Sets the index
*/ */
setIndex : function(index) { setIndex : function(index) {
var q; var q;
this._index = index; this._index = index;
if ((q = this._queued_query) !== null) { if ((q = this._queued_query) !== null) {
this._queued_query = null; this._queued_query = null;
Search.query(q); Search.query(q);
}
},
hasIndex : function() {
return self._index !== null;
},
deferQuery : function(query) {
this._queued_query = query;
},
stopPulse : function() {
this._pulse_status = 0;
},
startPulse : function() {
if (this._pulse_status >= 0)
return;
function pulse() {
Search._pulse_status = (Search._pulse_status + 1) % 4;
var dotString = '';
for (var i = 0; i < Search._pulse_status; i++) {
dotString += '.';
}
Search.dots.text(dotString);
if (Search._pulse_status > -1) {
window.setTimeout(pulse, 500);
}
};
pulse();
},
/**
* perform a search for something
*/
performSearch : function(query) {
// create the required interface elements
this.out = $('#search-results');
this.title = $('<h2>' + _('Searching') + '</h2>').appendTo(this.out);
this.dots = $('<span></span>').appendTo(this.title);
this.status = $('<p style="display: none"></p>').appendTo(this.out);
this.output = $('<ul class="search"/>').appendTo(this.out);
$('#search-progress').text(_('Preparing search...'));
this.startPulse();
// index already loaded, the browser was quick!
if (this.hasIndex())
this.query(query);
else
this.setQuery(query);
},
query : function(query) {
// stem the searchwords and add them to the
// correct list
var stemmer = new PorterStemmer();
var searchwords = [];
var excluded = [];
var hlwords = [];
var tmp = query.split(/\s+/);
for (var i = 0; i < tmp.length; i++) {
// stem the word
var word = stemmer.stemWord(tmp[i]).toLowerCase();
// select the correct list
if (word[0] == '-') {
var toAppend = excluded;
word = word.substr(1);
}
else {
var toAppend = searchwords;
hlwords.push(tmp[i].toLowerCase());
}
// only add if not already in the list
if (!$.contains(toAppend, word)) {
toAppend.push(word);
}
};
var highlightstring = '?highlight=' + $.urlencode(hlwords.join(" "));
console.debug('SEARCH: searching for:');
console.info('required: ', searchwords);
console.info('excluded: ', excluded);
// prepare search
var filenames = this._index[0];
var titles = this._index[1];
var words = this._index[2];
var fileMap = {};
var files = null;
$('#search-progress').empty();
// perform the search on the required words
for (var i = 0; i < searchwords.length; i++) {
var word = searchwords[i];
// no match but word was a required one
if ((files = words[word]) == null)
break;
// create the mapping
for (var j = 0; j < files.length; j++) {
var file = files[j];
if (file in fileMap)
fileMap[file].push(word);
else
fileMap[file] = [word];
}
}
// now check if the files are in the correct
// areas and if the don't contain excluded words
var results = [];
for (var file in fileMap) {
var valid = true;
// check if all requirements are matched
if (fileMap[file].length != searchwords.length)
continue;
// ensure that none of the excluded words is in the
// search result.
for (var i = 0; i < excluded.length; i++) {
if ($.contains(words[excluded[i]] || [], file)) {
valid = false;
break;
}
}
// if we have still a valid result we can add it
// to the result list
if (valid)
results.push([filenames[file], titles[file]]);
}
// delete unused variables in order to not waste
// memory until list is retrieved completely
delete filenames, titles, words;
// now sort the results by title
results.sort(function(a, b) {
var left = a[1].toLowerCase();
var right = b[1].toLowerCase();
return (left > right) ? -1 : ((left < right) ? 1 : 0);
});
// print the results
var resultCount = results.length;
function displayNextItem() {
// results left, load the summary and display it
if (results.length) {
var item = results.pop();
var listItem = $('<li style="display:none"></li>');
listItem.append($('<a/>').attr(
'href',
item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
highlightstring).html(item[1]));
$.get('_sources/' + item[0] + '.txt', function(data) {
listItem.append($.makeSearchSummary(data, searchwords, hlwords));
Search.output.append(listItem);
listItem.slideDown(10, function() {
displayNextItem();
});
});
}
// search finished, update title and status message
else {
Search.stopPulse();
Search.title.text(_('Search Results'));
if (!resultCount) {
Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.'));
}
else {
Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount));
}
Search.status.fadeIn(500);
}
}
displayNextItem();
} }
},
hasIndex : function() {
return self._index !== null;
},
deferQuery : function(query) {
this._queued_query = query;
},
stopPulse : function() {
this._pulse_status = 0;
},
startPulse : function() {
if (this._pulse_status >= 0)
return;
function pulse() {
Search._pulse_status = (Search._pulse_status + 1) % 4;
var dotString = '';
for (var i = 0; i < Search._pulse_status; i++)
dotString += '.';
Search.dots.text(dotString);
if (Search._pulse_status > -1)
window.setTimeout(pulse, 500);
};
pulse();
},
/**
* perform a search for something
*/
performSearch : function(query) {
// create the required interface elements
this.out = $('#search-results');
this.title = $('<h2>' + _('Searching') + '</h2>').appendTo(this.out);
this.dots = $('<span></span>').appendTo(this.title);
this.status = $('<p style="display: none"></p>').appendTo(this.out);
this.output = $('<ul class="search"/>').appendTo(this.out);
$('#search-progress').text(_('Preparing search...'));
this.startPulse();
// index already loaded, the browser was quick!
if (this.hasIndex())
this.query(query);
else
this.setQuery(query);
},
query : function(query) {
// stem the searchwords and add them to the
// correct list
var stemmer = new PorterStemmer();
var searchwords = [];
var excluded = [];
var hlwords = [];
var tmp = query.split(/\s+/);
for (var i = 0; i < tmp.length; i++) {
// stem the word
var word = stemmer.stemWord(tmp[i]).toLowerCase();
// select the correct list
if (word[0] == '-') {
var toAppend = excluded;
word = word.substr(1);
}
else {
var toAppend = searchwords;
hlwords.push(tmp[i].toLowerCase());
}
// only add if not already in the list
if (!$.contains(toAppend, word))
toAppend.push(word);
};
var highlightstring = '?highlight=' + $.urlencode(hlwords.join(" "));
console.debug('SEARCH: searching for:');
console.info('required: ', searchwords);
console.info('excluded: ', excluded);
// prepare search
var filenames = this._index[0];
var titles = this._index[1];
var words = this._index[2];
var fileMap = {};
var files = null;
$('#search-progress').empty();
// perform the search on the required words
for (var i = 0; i < searchwords.length; i++) {
var word = searchwords[i];
// no match but word was a required one
if ((files = words[word]) == null)
break;
// create the mapping
for (var j = 0; j < files.length; j++) {
var file = files[j];
if (file in fileMap)
fileMap[file].push(word);
else
fileMap[file] = [word];
}
}
// now check if the files are in the correct
// areas and if the don't contain excluded words
var results = [];
for (var file in fileMap) {
var valid = true;
// check if all requirements are matched
if (fileMap[file].length != searchwords.length)
continue;
// ensure that none of the excluded words is in the
// search result.
for (var i = 0; i < excluded.length; i++) {
if ($.contains(words[excluded[i]] || [], file)) {
valid = false;
break;
}
}
// if we have still a valid result we can add it
// to the result list
if (valid)
results.push([filenames[file], titles[file]]);
}
// delete unused variables in order to not waste
// memory until list is retrieved completely
delete filenames, titles, words;
// now sort the results by title
results.sort(function(a, b) {
var left = a[1].toLowerCase();
var right = b[1].toLowerCase();
return (left > right) ? -1 : ((left < right) ? 1 : 0);
});
// print the results
var resultCount = results.length;
function displayNextItem() {
// results left, load the summary and display it
if (results.length) {
var item = results.pop();
var listItem = $('<li style="display:none"></li>');
listItem.append($('<a/>').attr(
'href',
item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
highlightstring).html(item[1]));
$.get('_sources/' + item[0] + '.txt', function(data) {
listItem.append($.makeSearchSummary(data, searchwords, hlwords));
Search.output.append(listItem);
listItem.slideDown(10, function() {
displayNextItem();
});
});
}
// search finished, update title and status message
else {
Search.stopPulse();
Search.title.text(_('Search Results'));
if (!resultCount)
Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.'));
else
Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount));
Search.status.fadeIn(500);
}
}
displayNextItem();
}
} }
$(document).ready(function() { $(document).ready(function() {
Search.init(); Search.init();
}); });

View File

@ -1,6 +1,6 @@
{% extends "layout.html" %} {% extends "layout.html" %}
{% set title = _('Search') %} {% set title = _('Search') %}
{% set script_files = script_files + ['_static/searchtools.js', 'searchindex.js'] %} {% set script_files = script_files + ['_static/searchtools.js'] %}
{% block body %} {% block body %}
<h1 id="search-documentation">{{ _('Search') }}</h1> <h1 id="search-documentation">{{ _('Search') }}</h1>
<p> <p>
@ -32,3 +32,7 @@
{% endif %} {% endif %}
</div> </div>
{% endblock %} {% endblock %}
{% block footer %}
{{ super() }}
<script type="text/javascript" src="searchindex.js"></script>
{% endblock %}