mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Improved search slightly by adding keyword based lookup
This commit is contained in:
@@ -386,7 +386,7 @@ class StandaloneHTMLBuilder(Builder):
|
|||||||
def prepare_writing(self, docnames):
|
def prepare_writing(self, docnames):
|
||||||
from sphinx.search import IndexBuilder
|
from sphinx.search import IndexBuilder
|
||||||
|
|
||||||
self.indexer = IndexBuilder()
|
self.indexer = IndexBuilder(self.env)
|
||||||
self.load_indexer(docnames)
|
self.load_indexer(docnames)
|
||||||
self.docwriter = HTMLWriter(self)
|
self.docwriter = HTMLWriter(self)
|
||||||
self.docsettings = OptionParser(
|
self.docsettings = OptionParser(
|
||||||
|
|||||||
@@ -87,7 +87,8 @@ class IndexBuilder(object):
|
|||||||
'pickle': pickle
|
'pickle': pickle
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, env):
|
||||||
|
self.env = env
|
||||||
self._stemmer = Stemmer()
|
self._stemmer = Stemmer()
|
||||||
# filename -> title
|
# filename -> title
|
||||||
self._titles = {}
|
self._titles = {}
|
||||||
@@ -110,19 +111,28 @@ class IndexBuilder(object):
|
|||||||
format = self.formats[format]
|
format = self.formats[format]
|
||||||
format.dump(self.freeze(), stream)
|
format.dump(self.freeze(), stream)
|
||||||
|
|
||||||
|
def get_keyword_map(self):
|
||||||
|
"""Return a dict of all keywords."""
|
||||||
|
rv = {}
|
||||||
|
for kw, (ref, _, _, _) in self.env.modules.iteritems():
|
||||||
|
rv[kw] = (ref, 'module', 'module-' + kw)
|
||||||
|
for kw, (ref, ref_type) in self.env.descrefs.iteritems():
|
||||||
|
rv[kw] = (ref, ref_type, kw)
|
||||||
|
return rv
|
||||||
|
|
||||||
def freeze(self):
|
def freeze(self):
|
||||||
"""
|
"""Create a useable data structure for serializing."""
|
||||||
Create a useable data structure. You can pass this output
|
filenames = self._titles.keys()
|
||||||
to the `SearchFrontend` to search the index.
|
titles = self._titles.values()
|
||||||
"""
|
fn2index = dict((f, i) for (i, f) in enumerate(filenames))
|
||||||
fns, titles = self._titles.keys(), self._titles.values()
|
return dict(
|
||||||
fn2index = dict((f, i) for (i, f) in enumerate(fns))
|
filenames=filenames,
|
||||||
return [
|
titles=titles,
|
||||||
fns,
|
terms=dict((k, [fn2index[fn] for fn in v])
|
||||||
titles,
|
|
||||||
dict((k, [fn2index[fn] for fn in v])
|
|
||||||
for (k, v) in self._mapping.iteritems()),
|
for (k, v) in self._mapping.iteritems()),
|
||||||
]
|
keywords=dict((k, (fn2index[v[0]],) + v[1:]) for k, v in
|
||||||
|
self.get_keyword_map().iteritems())
|
||||||
|
)
|
||||||
|
|
||||||
def prune(self, filenames):
|
def prune(self, filenames):
|
||||||
"""Remove data for all filenames not in the list."""
|
"""Remove data for all filenames not in the list."""
|
||||||
@@ -147,45 +157,6 @@ class IndexBuilder(object):
|
|||||||
|
|
||||||
for word in word_re.findall(title):
|
for word in word_re.findall(title):
|
||||||
add_term(word)
|
add_term(word)
|
||||||
add_term(word, 'T')
|
|
||||||
|
|
||||||
for word in visitor.found_words:
|
for word in visitor.found_words:
|
||||||
add_term(word)
|
add_term(word)
|
||||||
|
|
||||||
|
|
||||||
class SearchFrontend(object):
|
|
||||||
"""
|
|
||||||
This class acts as a frontend for the search index. It can search
|
|
||||||
a searchindex as provided by `IndexBuilder`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, index):
|
|
||||||
self.filenames, self.titles, self.words = index
|
|
||||||
self._stemmer = Stemmer()
|
|
||||||
|
|
||||||
def query(self, required, excluded):
|
|
||||||
file_map = {}
|
|
||||||
for word in required:
|
|
||||||
if word not in self.words:
|
|
||||||
break
|
|
||||||
for fid in self.words[word]:
|
|
||||||
file_map.setdefault(fid, set()).add(word)
|
|
||||||
|
|
||||||
return sorted(((self.filenames[fid], self.titles[fid])
|
|
||||||
for fid, words in file_map.iteritems()
|
|
||||||
if len(words) == len(required) and not
|
|
||||||
any(fid in self.words.get(word, ()) for word in excluded)
|
|
||||||
), key=lambda x: x[1].lower())
|
|
||||||
|
|
||||||
def search(self, searchstring):
|
|
||||||
required = set()
|
|
||||||
excluded = set()
|
|
||||||
for word in searchstring.split():
|
|
||||||
if word.startswith('-'):
|
|
||||||
storage = excluded
|
|
||||||
word = word[1:]
|
|
||||||
else:
|
|
||||||
storage = required
|
|
||||||
storage.add(self._stemmer.stem(word))
|
|
||||||
|
|
||||||
return self.query(required, excluded)
|
|
||||||
|
|||||||
@@ -294,6 +294,7 @@ var Search = {
|
|||||||
var excluded = [];
|
var excluded = [];
|
||||||
var hlwords = [];
|
var hlwords = [];
|
||||||
var tmp = query.split(/\s+/);
|
var tmp = query.split(/\s+/);
|
||||||
|
var keyword = (tmp.length == 1) ? tmp[0] : null;
|
||||||
for (var i = 0; i < tmp.length; i++) {
|
for (var i = 0; i < tmp.length; i++) {
|
||||||
// stem the word
|
// stem the word
|
||||||
var word = stemmer.stemWord(tmp[i]).toLowerCase();
|
var word = stemmer.stemWord(tmp[i]).toLowerCase();
|
||||||
@@ -317,13 +318,22 @@ var Search = {
|
|||||||
console.info('excluded: ', excluded);
|
console.info('excluded: ', excluded);
|
||||||
|
|
||||||
// prepare search
|
// prepare search
|
||||||
var filenames = this._index[0];
|
var filenames = this._index.filenames;
|
||||||
var titles = this._index[1];
|
var titles = this._index.titles;
|
||||||
var words = this._index[2];
|
var words = this._index.terms;
|
||||||
var fileMap = {};
|
var fileMap = {};
|
||||||
var files = null;
|
var files = null;
|
||||||
|
var results = [];
|
||||||
|
var regularResults = [];
|
||||||
$('#search-progress').empty();
|
$('#search-progress').empty();
|
||||||
|
|
||||||
|
// lookup the keyword
|
||||||
|
if (keyword != null) {
|
||||||
|
var match = this._index.keywords[keyword];
|
||||||
|
if (match)
|
||||||
|
results.push([filenames[match[0]], titles[match[0]], match[2]]);
|
||||||
|
}
|
||||||
|
|
||||||
// perform the search on the required words
|
// perform the search on the required words
|
||||||
for (var i = 0; i < searchwords.length; i++) {
|
for (var i = 0; i < searchwords.length; i++) {
|
||||||
var word = searchwords[i];
|
var word = searchwords[i];
|
||||||
@@ -342,7 +352,6 @@ var Search = {
|
|||||||
|
|
||||||
// now check if the files are in the correct
|
// now check if the files are in the correct
|
||||||
// areas and if the don't contain excluded words
|
// areas and if the don't contain excluded words
|
||||||
var results = [];
|
|
||||||
for (var file in fileMap) {
|
for (var file in fileMap) {
|
||||||
var valid = true;
|
var valid = true;
|
||||||
|
|
||||||
@@ -362,20 +371,23 @@ var Search = {
|
|||||||
// if we have still a valid result we can add it
|
// if we have still a valid result we can add it
|
||||||
// to the result list
|
// to the result list
|
||||||
if (valid)
|
if (valid)
|
||||||
results.push([filenames[file], titles[file]]);
|
results.push([filenames[file], titles[file], null]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete unused variables in order to not waste
|
// delete unused variables in order to not waste
|
||||||
// memory until list is retrieved completely
|
// memory until list is retrieved completely
|
||||||
delete filenames, titles, words;
|
delete filenames, titles, words;
|
||||||
|
|
||||||
// now sort the results by title
|
// now sort the regular results by title
|
||||||
results.sort(function(a, b) {
|
regularResults.sort(function(a, b) {
|
||||||
var left = a[1].toLowerCase();
|
var left = a[1].toLowerCase();
|
||||||
var right = b[1].toLowerCase();
|
var right = b[1].toLowerCase();
|
||||||
return (left > right) ? -1 : ((left < right) ? 1 : 0);
|
return (left > right) ? -1 : ((left < right) ? 1 : 0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// combine both
|
||||||
|
results = results.concat(regularResults);
|
||||||
|
|
||||||
// print the results
|
// print the results
|
||||||
var resultCount = results.length;
|
var resultCount = results.length;
|
||||||
function displayNextItem() {
|
function displayNextItem() {
|
||||||
@@ -386,7 +398,8 @@ var Search = {
|
|||||||
listItem.append($('<a/>').attr(
|
listItem.append($('<a/>').attr(
|
||||||
'href',
|
'href',
|
||||||
item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
|
item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
|
||||||
highlightstring).html(item[1]));
|
highlightstring +
|
||||||
|
(item[2] ? '#' + item[2] : '')).html(item[1]));
|
||||||
$.get('_sources/' + item[0] + '.txt', function(data) {
|
$.get('_sources/' + item[0] + '.txt', function(data) {
|
||||||
listItem.append($.makeSearchSummary(data, searchwords, hlwords));
|
listItem.append($.makeSearchSummary(data, searchwords, hlwords));
|
||||||
Search.output.append(listItem);
|
Search.output.append(listItem);
|
||||||
|
|||||||
Reference in New Issue
Block a user