Added xapian search

This commit is contained in:
jacob 2010-06-24 14:30:02 -05:00
parent 00f841be2a
commit 8c4e535170
5 changed files with 204 additions and 6 deletions

View File

@ -23,12 +23,26 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
name = 'websupport'
out_suffix = '.fpickle'
def init(self):
self.init_search()
StandaloneHTMLBuilder.init(self)
def init_search(self):
self.search = self.app.search
if self.search is not None:
self.search.create_index()
def init_translator_class(self):
self.translator_class = WebSupportTranslator
def write_doc(self, docname, doctree):
# The translator needs the docname to generate ids.
self.docname = docname
# Index the page if search is enabled.
if self.search is not None:
doc_contents = doctree.astext()
title = doc_contents[:20]
self.search.add_document(docname, title, doc_contents)
StandaloneHTMLBuilder.write_doc(self, docname, doctree)
def get_target_uri(self, docname, typ=None):
@ -59,7 +73,8 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
ctx, event_arg)
# Create a dict that will be pickled and used by webapps.
doc_ctx = {'body': ctx.get('body', '')}
doc_ctx = {'body': ctx.get('body', ''),
'title': ctx.get('title', '')}
# Partially render the html template to proved a more useful ctx.
template = self.templates.environment.get_template(templatename)
template_module = template.make_module(ctx)
@ -86,4 +101,3 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
os_path(ctx['sourcename']))
ensuredir(path.dirname(source_name))
copyfile(self.env.doc2path(pagename), source_name)

View File

@ -0,0 +1,36 @@
{#
basic/searchresults.html
~~~~~~~~~~~~~~~~~
Template for the body of the search results page.
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
#}
<h1 id="search-documentation">Search</h1>
<p>
From here you can search these documents. Enter your search
words into the box below and click "search".
</p>
<form action="" method="get">
<input type="text" name="q" value="" />
<input type="submit" value="search" />
<span id="search-progress" style="padding-left: 10px"></span>
</form>
{% if search_performed %}
<h2>Search Results</h2>
{% if not search_results %}
<p>'Your search did not match any results.</p>
{% endif %}
{% endif %}
<div id="search-results">
{% if search_results %}
<ul>
{% for href, caption, context in search_results %}
<li><a href="{{ href }}">{{ caption }}</a>
<div class="context">{{ context|e }}</div>
</li>
{% endfor %}
</ul>
{% endif %}
</div>

View File

@ -12,20 +12,47 @@
import cPickle as pickle
from os import path
from jinja2 import Environment, FileSystemLoader
from sphinx.application import Sphinx
from sphinx.websupport.search import search_adapters
class WebSupportApp(Sphinx):
def __init__(self, *args, **kwargs):
self.search = kwargs.pop('search', None)
Sphinx.__init__(self, *args, **kwargs)
class WebSupport(object):
def init(self, srcdir='', outdir=''):
def init(self, srcdir='', outdir='', search=None):
self.srcdir = srcdir
self.outdir = outdir or path.join(self.srcdir, '_build',
'websupport')
self.init_templating()
if search is not None:
self.init_search(search)
def init_templating(self):
import sphinx
template_path = path.join(path.dirname(sphinx.__file__),
'themes', 'basic')
loader = FileSystemLoader(template_path)
self.template_env = Environment(loader=loader)
def init_search(self, search):
mod, cls = search_adapters[search]
search_class = getattr(__import__('sphinx.websupport.search.' + mod,
None, None, [cls]), cls)
search_path = path.join(self.outdir, 'search')
self.search = search_class(search_path)
self.results_template = \
self.template_env.get_template('searchresults.html')
def build(self, **kwargs):
doctreedir = kwargs.pop('doctreedir',
path.join(self.outdir, 'doctrees'))
app = Sphinx(self.srcdir, self.srcdir,
self.outdir, doctreedir, 'websupport')
app = WebSupportApp(self.srcdir, self.srcdir,
self.outdir, doctreedir, 'websupport',
search=self.search)
app.build()
def get_document(self, docname):
@ -33,3 +60,12 @@ class WebSupport(object):
f = open(infilename, 'rb')
document = pickle.load(f)
return document
def get_search_results(self, q):
results, results_found, results_displayed = self.search.query(q)
ctx = {'search_performed': True,
'search_results': results}
document = self.get_document('search')
document['body'] = self.results_template.render(ctx)
document['title'] = 'Search Results'
return document

View File

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
"""
sphinx.websupport.search
~~~~~~~~~~~~~~~~~~~~~~~~
Server side search support for the web support package.
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
class BaseSearch(object):
def create_index(self, path):
raise NotImplemented
def add_document(self, path, title, text):
raise NotImplemented
def query(self, q):
raise NotImplemented
def extract_context(self, text, query_string):
# From GSOC 2009
with_context_re = '([\W\w]{0,80})(%s)([\W\w]{0,80})' % (query_string)
try:
res = re.findall(with_context_re, text, re.I|re.U)[0]
return tuple((unicode(i, errors='ignore') for i in res))
except IndexError:
return '', '', ''
search_adapters = {
'xapian': ('xapiansearch', 'XapianSearch'),
'whoosh': ('whooshsearch', 'WhooshSearch'),
}

View File

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
"""
sphinx.websupport.search.xapian
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Xapian search adapter.
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from os import path
import xapian
from sphinx.util.osutil import ensuredir
from sphinx.websupport.search import BaseSearch
class XapianSearch(BaseSearch):
# Adapted from the GSOC 2009 webapp project.
# Xapian metadata constants
DOC_PATH = 0
DOC_TITLE = 1
def __init__(self, db_path):
self.db_path = db_path
def create_index(self):
ensuredir(self.db_path)
self.database = xapian.WritableDatabase(self.db_path,
xapian.DB_CREATE_OR_OPEN)
self.indexer = xapian.TermGenerator()
stemmer = xapian.Stem("english")
self.indexer.set_stemmer(stemmer)
def add_document(self, path, title, text):
self.database.begin_transaction()
doc = xapian.Document()
doc.set_data(text)
doc.add_value(self.DOC_PATH, path)
doc.add_value(self.DOC_TITLE, title)
self.indexer.set_document(doc)
self.indexer.index_text(text)
for word in text.split():
doc.add_posting(word, 1)
self.database.add_document(doc)
self.database.commit_transaction()
def query(self, q):
database = xapian.Database(self.db_path)
enquire = xapian.Enquire(database)
qp = xapian.QueryParser()
stemmer = xapian.Stem("english")
qp.set_stemmer(stemmer)
qp.set_database(database)
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
query = qp.parse_query(q)
# Find the top 100 results for the query.
enquire.set_query(query)
matches = enquire.get_mset(0, 100)
results_found = matches.get_matches_estimated()
results_displayed = matches.size()
results = []
for m in matches:
context = self.extract_context(m.document.get_data(), q)
results.append((m.document.get_value(self.DOC_PATH),
m.document.get_value(self.DOC_TITLE),
''.join(context) ))
return results, results_found, results_displayed