Pass original filenames to search indecies (ref: #2399)

* Not tested carefully
* Always adds .txt to source filename even if html_sourcelink_txt is False
This commit is contained in:
Takeshi KOMIYA
2016-05-16 21:11:17 +09:00
committed by Matthias Geier
parent 86615e1f09
commit b5ae1b0846
3 changed files with 22 additions and 14 deletions

View File

@@ -718,7 +718,8 @@ class StandaloneHTMLBuilder(Builder):
def index_page(self, pagename, doctree, title): def index_page(self, pagename, doctree, title):
# only index pages with title # only index pages with title
if self.indexer is not None and title: if self.indexer is not None and title:
self.indexer.feed(pagename, title, doctree) filename = self.env.doc2path(pagename, base=None)
self.indexer.feed(pagename, filename, title, doctree)
def _get_local_toctree(self, docname, collapse=True, **kwds): def _get_local_toctree(self, docname, collapse=True, **kwds):
if 'includehidden' not in kwds: if 'includehidden' not in kwds:

View File

@@ -226,11 +226,13 @@ class IndexBuilder(object):
def __init__(self, env, lang, options, scoring): def __init__(self, env, lang, options, scoring):
self.env = env self.env = env
# filename -> title # docname -> title
self._titles = {} self._titles = {}
# stemmed word -> set(filenames) # docname -> filename
self._filenames = {}
# stemmed word -> set(docname)
self._mapping = {} self._mapping = {}
# stemmed words in titles -> set(filenames) # stemmed words in titles -> set(docname)
self._title_mapping = {} self._title_mapping = {}
# word -> stemmed word # word -> stemmed word
self._stem_cache = {} self._stem_cache = {}
@@ -338,15 +340,16 @@ class IndexBuilder(object):
def freeze(self): def freeze(self):
"""Create a usable data structure for serializing.""" """Create a usable data structure for serializing."""
filenames, titles = zip(*sorted(self._titles.items())) docnames, titles = zip(*sorted(self._titles.items()))
fn2index = dict((f, i) for (i, f) in enumerate(filenames)) filenames = [self._filenames.get(docname) for docname in docnames]
fn2index = dict((f, i) for (i, f) in enumerate(docnames))
terms, title_terms = self.get_terms(fn2index) terms, title_terms = self.get_terms(fn2index)
objects = self.get_objects(fn2index) # populates _objtypes objects = self.get_objects(fn2index) # populates _objtypes
objtypes = dict((v, k[0] + ':' + k[1]) objtypes = dict((v, k[0] + ':' + k[1])
for (k, v) in iteritems(self._objtypes)) for (k, v) in iteritems(self._objtypes))
objnames = self._objnames objnames = self._objnames
return dict(filenames=filenames, titles=titles, terms=terms, return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
objects=objects, objtypes=objtypes, objnames=objnames, objects=objects, objtypes=objtypes, objnames=objnames,
titleterms=title_terms, envversion=self.env.version) titleterms=title_terms, envversion=self.env.version)
@@ -365,9 +368,11 @@ class IndexBuilder(object):
for wordnames in itervalues(self._title_mapping): for wordnames in itervalues(self._title_mapping):
wordnames.intersection_update(filenames) wordnames.intersection_update(filenames)
def feed(self, filename, title, doctree): def feed(self, docname, filename, title, doctree):
"""Feed a doctree to the index.""" """Feed a doctree to the index."""
self._titles[filename] = title self._titles[docname] = title
self._filenames[docname] = filename
visitor = WordCollector(doctree, self.lang) visitor = WordCollector(doctree, self.lang)
doctree.walk(visitor) doctree.walk(visitor)
@@ -383,12 +388,12 @@ class IndexBuilder(object):
for word in visitor.found_title_words: for word in visitor.found_title_words:
word = stem(word) word = stem(word)
if _filter(word): if _filter(word):
self._title_mapping.setdefault(word, set()).add(filename) self._title_mapping.setdefault(word, set()).add(docname)
for word in visitor.found_words: for word in visitor.found_words:
word = stem(word) word = stem(word)
if word not in self._title_mapping and _filter(word): if word not in self._title_mapping and _filter(word):
self._mapping.setdefault(word, set()).add(filename) self._mapping.setdefault(word, set()).add(docname)
def context_for_searchtool(self): def context_for_searchtool(self):
return dict( return dict(

View File

@@ -256,7 +256,7 @@ var Search = {
displayNextItem(); displayNextItem();
}); });
} else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) { } else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) {
$.ajax({url: DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' + item[0] + '.txt', $.ajax({url: DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' + item[5] + '.txt',
dataType: "text", dataType: "text",
complete: function(jqxhr, textstatus) { complete: function(jqxhr, textstatus) {
var data = jqxhr.responseText; var data = jqxhr.responseText;
@@ -295,6 +295,7 @@ var Search = {
*/ */
performObjectSearch : function(object, otherterms) { performObjectSearch : function(object, otherterms) {
var filenames = this._index.filenames; var filenames = this._index.filenames;
var docnames = this._index.docnames;
var objects = this._index.objects; var objects = this._index.objects;
var objnames = this._index.objnames; var objnames = this._index.objnames;
var titles = this._index.titles; var titles = this._index.titles;
@@ -348,7 +349,7 @@ var Search = {
} else { } else {
score += Scorer.objPrioDefault; score += Scorer.objPrioDefault;
} }
results.push([filenames[match[0]], fullname, '#'+anchor, descr, score]); results.push([docnames[match[0]], fullname, '#'+anchor, descr, score, filenames[match[0]]]);
} }
} }
} }
@@ -360,6 +361,7 @@ var Search = {
* search for full-text terms in the index * search for full-text terms in the index
*/ */
performTermsSearch : function(searchterms, excluded, terms, titleterms) { performTermsSearch : function(searchterms, excluded, terms, titleterms) {
var docnames = this._index.docnames;
var filenames = this._index.filenames; var filenames = this._index.filenames;
var titles = this._index.titles; var titles = this._index.titles;
@@ -434,7 +436,7 @@ var Search = {
// select one (max) score for the file. // select one (max) score for the file.
// for better ranking, we should calculate ranking by using words statistics like basic tf-idf... // for better ranking, we should calculate ranking by using words statistics like basic tf-idf...
var score = $u.max($u.map(fileMap[file], function(w){return scoreMap[file][w]})); var score = $u.max($u.map(fileMap[file], function(w){return scoreMap[file][w]}));
results.push([filenames[file], titles[file], '', null, score]); results.push([docnames[file], titles[file], '', null, score, filenames[file]]);
} }
} }
return results; return results;