mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Closes #1067: implement pluggable search scorer and tweak scoring to give good results. Patch by Hernan Grecco.
This commit is contained in:
4
CHANGES
4
CHANGES
@@ -1,6 +1,10 @@
|
||||
Release 1.2 (in development)
|
||||
============================
|
||||
|
||||
* #1067: Improve the ordering of the JavaScript search results: matches in titles
|
||||
come before matches in full text, and object results are better categorized.
|
||||
Also implement a pluggable search scorer.
|
||||
|
||||
* PR#72: #975: Fix gettext does not extract definition terms before docutils 0.10.0
|
||||
|
||||
* PR#25: In inheritance diagrams, the first line of the class docstring
|
||||
|
||||
@@ -760,6 +760,15 @@ that use Sphinx' HTMLWriter class.
|
||||
|
||||
.. versionadded:: 1.1
|
||||
|
||||
.. confval:: html_search_scorer
|
||||
|
||||
The name of a javascript file (relative to the configuration directory) that
|
||||
implements a search results scorer. If empty, the default will be used.
|
||||
|
||||
.. XXX describe interface for scorer here
|
||||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
.. confval:: htmlhelp_basename
|
||||
|
||||
Output file base name for HTML help builder. Default is ``'pydoc'``.
|
||||
|
||||
@@ -240,7 +240,8 @@ class StandaloneHTMLBuilder(Builder):
|
||||
if not lang or lang not in languages:
|
||||
lang = 'en'
|
||||
self.indexer = IndexBuilder(self.env, lang,
|
||||
self.config.html_search_options)
|
||||
self.config.html_search_options,
|
||||
self.config.html_search_scorer)
|
||||
self.load_indexer(docnames)
|
||||
|
||||
self.docwriter = HTMLWriter(self)
|
||||
|
||||
@@ -110,6 +110,7 @@ class Config(object):
|
||||
html_secnumber_suffix = ('. ', 'html'),
|
||||
html_search_language = (None, 'html'),
|
||||
html_search_options = ({}, 'html'),
|
||||
html_search_scorer = ('', None),
|
||||
|
||||
# HTML help only options
|
||||
htmlhelp_basename = (lambda self: make_filename(self.project), None),
|
||||
|
||||
@@ -161,7 +161,7 @@ class IndexBuilder(object):
|
||||
'pickle': pickle
|
||||
}
|
||||
|
||||
def __init__(self, env, lang, options):
|
||||
def __init__(self, env, lang, options, scoring):
|
||||
self.env = env
|
||||
# filename -> title
|
||||
self._titles = {}
|
||||
@@ -176,6 +176,12 @@ class IndexBuilder(object):
|
||||
# add language-specific SearchLanguage instance
|
||||
self.lang = languages[lang](options)
|
||||
|
||||
if scoring:
|
||||
with open(scoring, 'rb') as fp:
|
||||
self.js_scorer_code = fp.read().decode('utf-8')
|
||||
else:
|
||||
self.js_scorer_code = u''
|
||||
|
||||
def load(self, stream, format):
|
||||
"""Reconstruct from frozen data."""
|
||||
if isinstance(format, basestring):
|
||||
@@ -305,4 +311,5 @@ class IndexBuilder(object):
|
||||
return dict(
|
||||
search_language_stemming_code = self.lang.js_stemmer_code,
|
||||
search_language_stop_words = jsdump.dumps(sorted(self.lang.stopwords)),
|
||||
search_scorer_tool = self.js_scorer_code,
|
||||
)
|
||||
|
||||
@@ -9,34 +9,41 @@
|
||||
*
|
||||
*/
|
||||
|
||||
{{ search_language_stemming_code|safe }}
|
||||
|
||||
{% if search_scorer_tool %}
|
||||
{{ search_scorer_tool|safe }}
|
||||
{% else %}
|
||||
/**
|
||||
* helper function to return a node containing the
|
||||
* search summary for a given text. keywords is a list
|
||||
* of stemmed words, hlwords is the list of normal, unstemmed
|
||||
* words. the first one is used to find the occurance, the
|
||||
* latter for highlighting it.
|
||||
* Simple result scoring code.
|
||||
*/
|
||||
var Scorer = {
|
||||
// Implement the following function to further tweak the score for each result
|
||||
// The function takes a result array [filename, title, anchor, descr, score]
|
||||
// and returns the new score.
|
||||
/*
|
||||
score: function(result) {
|
||||
return result[4];
|
||||
},
|
||||
*/
|
||||
|
||||
jQuery.makeSearchSummary = function(text, keywords, hlwords) {
|
||||
var textLower = text.toLowerCase();
|
||||
var start = 0;
|
||||
$.each(keywords, function() {
|
||||
var i = textLower.indexOf(this.toLowerCase());
|
||||
if (i > -1)
|
||||
start = i;
|
||||
});
|
||||
start = Math.max(start - 120, 0);
|
||||
var excerpt = ((start > 0) ? '...' : '') +
|
||||
$.trim(text.substr(start, 240)) +
|
||||
((start + 240 - text.length) ? '...' : '');
|
||||
var rv = $('<div class="context"></div>').text(excerpt);
|
||||
$.each(hlwords, function() {
|
||||
rv = rv.highlightText(this, 'highlighted');
|
||||
});
|
||||
return rv;
|
||||
};
|
||||
// query matches the full name of an object
|
||||
objNameMatch: 11,
|
||||
// or matches in the last dotted part of the object name
|
||||
objPartialMatch: 6,
|
||||
// Additive scores depending on the priority of the object
|
||||
objPrio: {0: 15, // used to be importantResults
|
||||
1: 5, // used to be objectResults
|
||||
2: -5}, // used to be unimportantResults
|
||||
// Used when the priority is not in the mapping.
|
||||
objPrioDefault: 0,
|
||||
|
||||
{{ search_language_stemming_code|safe }}
|
||||
// query found in title
|
||||
title: 15,
|
||||
// query found in terms
|
||||
term: 5
|
||||
};
|
||||
{% endif %}
|
||||
|
||||
/**
|
||||
* Search Module
|
||||
@@ -184,23 +191,40 @@ var Search = {
|
||||
}
|
||||
|
||||
// lookup as search terms in fulltext
|
||||
results = results.concat(this.performTermsSearch(searchterms, excluded, terms, 0))
|
||||
.concat(this.performTermsSearch(searchterms, excluded, titleterms, 20));
|
||||
results = results.concat(this.performTermsSearch(searchterms, excluded, terms, Scorer.term))
|
||||
.concat(this.performTermsSearch(searchterms, excluded, titleterms, Scorer.title));
|
||||
|
||||
// delete unused variables in order to not waste memory until list is
|
||||
// retrieved completely
|
||||
delete filenames, titles, terms, titleterms;
|
||||
|
||||
// now sort the regular results by score (in opposite order of appearance,
|
||||
// since the display function below uses pop() to retrieve items)
|
||||
// let the scorer override scores with a custom scoring function
|
||||
if (Scorer.score) {
|
||||
for (i = 0; i < results.length; i++)
|
||||
results[i][4] = Scorer.score(results[i]);
|
||||
}
|
||||
|
||||
// now sort the results by score (in opposite order of appearance, since the
|
||||
// display function below uses pop() to retrieve items) and then
|
||||
// alphabetically
|
||||
results.sort(function(a, b) {
|
||||
var left = a[4];
|
||||
var right = b[4];
|
||||
return (left > right) ? 1 : ((left < right) ? -1 : 0);
|
||||
if (left > right) {
|
||||
return 1;
|
||||
} else if (left < right) {
|
||||
return -1;
|
||||
} else {
|
||||
// same score: sort alphabetically
|
||||
left = a[1].toLowerCase();
|
||||
right = b[1].toLowerCase();
|
||||
return (left > right) ? -1 : ((left < right) ? 1 : 0);
|
||||
}
|
||||
});
|
||||
|
||||
console.info('search results:', results);
|
||||
Search.lastresults = results.slice(); // a copy
|
||||
// for debugging
|
||||
//Search.lastresults = results.slice(); // a copy
|
||||
//console.info('search results:', Search.lastresults);
|
||||
|
||||
// print the results
|
||||
var resultCount = results.length;
|
||||
@@ -236,7 +260,7 @@ var Search = {
|
||||
$.get(DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' +
|
||||
item[0] + '.txt', function(data) {
|
||||
if (data != '') {
|
||||
listItem.append($.makeSearchSummary(data, searchterms, hlterms));
|
||||
listItem.append(Search.makeSearchSummary(data, searchterms, hlterms));
|
||||
Search.output.append(listItem);
|
||||
}
|
||||
listItem.slideDown(5, function() {
|
||||
@@ -281,6 +305,16 @@ var Search = {
|
||||
for (var name in objects[prefix]) {
|
||||
var fullname = (prefix ? prefix + '.' : '') + name;
|
||||
if (fullname.toLowerCase().indexOf(object) > -1) {
|
||||
var score = 0;
|
||||
var parts = fullname.split('.');
|
||||
// check for different match types: exact matches of full name or
|
||||
// "last name" (i.e. last dotted part)
|
||||
if (fullname == object || parts[parts.length - 1] == object) {
|
||||
score += Scorer.objNameMatch;
|
||||
// matches in last name
|
||||
} else if (parts[parts.length - 1].indexOf(object) > -1) {
|
||||
score += Scorer.objPartialMatch;
|
||||
}
|
||||
var match = objects[prefix][name];
|
||||
var objname = objnames[match[1]][2];
|
||||
var title = titles[match[0]];
|
||||
@@ -301,20 +335,17 @@ var Search = {
|
||||
}
|
||||
}
|
||||
var descr = objname + _(', in ') + title;
|
||||
|
||||
anchor = match[3];
|
||||
if (anchor == '')
|
||||
anchor = fullname;
|
||||
else if (anchor == '-')
|
||||
anchor = objnames[match[1]][1] + '-' + fullname;
|
||||
result = [filenames[match[0]], fullname, '#'+anchor, descr, 0];
|
||||
var score;
|
||||
switch (match[2]) {
|
||||
case 1: // normal results -- display between important and fulltext
|
||||
score = 5; break;
|
||||
case 0: // "important" results -- show directly after title results
|
||||
score = 10; break;
|
||||
case 2: // "unimportant" results -- show after fulltext results
|
||||
score = -10; break;
|
||||
// add custom score for some objects according to scorer
|
||||
if (Scorer.objPrio.hasOwnProperty(match[2])) {
|
||||
score += Scorer.objPrio[match[2]];
|
||||
} else {
|
||||
score += Scorer.objPrioDefault;
|
||||
}
|
||||
results.push([filenames[match[0]], fullname, '#'+anchor, descr, score]);
|
||||
}
|
||||
@@ -372,11 +403,38 @@ var Search = {
|
||||
}
|
||||
|
||||
// if we have still a valid result we can add it to the result list
|
||||
if (valid)
|
||||
if (valid) {
|
||||
results.push([filenames[file], titles[file], '', null, score]);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to return a node containing the
|
||||
* search summary for a given text. keywords is a list
|
||||
* of stemmed words, hlwords is the list of normal, unstemmed
|
||||
* words. the first one is used to find the occurance, the
|
||||
* latter for highlighting it.
|
||||
*/
|
||||
makeSearchSummary : function(text, keywords, hlwords) {
|
||||
var textLower = text.toLowerCase();
|
||||
var start = 0;
|
||||
$.each(keywords, function() {
|
||||
var i = textLower.indexOf(this.toLowerCase());
|
||||
if (i > -1)
|
||||
start = i;
|
||||
});
|
||||
start = Math.max(start - 120, 0);
|
||||
var excerpt = ((start > 0) ? '...' : '') +
|
||||
$.trim(text.substr(start, 240)) +
|
||||
((start + 240 - text.length) ? '...' : '');
|
||||
var rv = $('<div class="context"></div>').text(excerpt);
|
||||
$.each(hlwords, function() {
|
||||
rv = rv.highlightText(this, 'highlighted');
|
||||
});
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
$(document).ready(function() {
|
||||
|
||||
Reference in New Issue
Block a user