mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Added xapian search
This commit is contained in:
parent
00f841be2a
commit
8c4e535170
@ -23,12 +23,26 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
|
||||
name = 'websupport'
|
||||
out_suffix = '.fpickle'
|
||||
|
||||
def init(self):
|
||||
self.init_search()
|
||||
StandaloneHTMLBuilder.init(self)
|
||||
|
||||
def init_search(self):
|
||||
self.search = self.app.search
|
||||
if self.search is not None:
|
||||
self.search.create_index()
|
||||
|
||||
def init_translator_class(self):
|
||||
self.translator_class = WebSupportTranslator
|
||||
|
||||
def write_doc(self, docname, doctree):
|
||||
# The translator needs the docname to generate ids.
|
||||
self.docname = docname
|
||||
# Index the page if search is enabled.
|
||||
if self.search is not None:
|
||||
doc_contents = doctree.astext()
|
||||
title = doc_contents[:20]
|
||||
self.search.add_document(docname, title, doc_contents)
|
||||
StandaloneHTMLBuilder.write_doc(self, docname, doctree)
|
||||
|
||||
def get_target_uri(self, docname, typ=None):
|
||||
@ -59,7 +73,8 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
|
||||
ctx, event_arg)
|
||||
|
||||
# Create a dict that will be pickled and used by webapps.
|
||||
doc_ctx = {'body': ctx.get('body', '')}
|
||||
doc_ctx = {'body': ctx.get('body', ''),
|
||||
'title': ctx.get('title', '')}
|
||||
# Partially render the html template to proved a more useful ctx.
|
||||
template = self.templates.environment.get_template(templatename)
|
||||
template_module = template.make_module(ctx)
|
||||
@ -86,4 +101,3 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
|
||||
os_path(ctx['sourcename']))
|
||||
ensuredir(path.dirname(source_name))
|
||||
copyfile(self.env.doc2path(pagename), source_name)
|
||||
|
||||
|
36
sphinx/themes/basic/searchresults.html
Normal file
36
sphinx/themes/basic/searchresults.html
Normal file
@ -0,0 +1,36 @@
|
||||
{#
|
||||
basic/searchresults.html
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
Template for the body of the search results page.
|
||||
|
||||
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
#}
|
||||
<h1 id="search-documentation">Search</h1>
|
||||
<p>
|
||||
From here you can search these documents. Enter your search
|
||||
words into the box below and click "search".
|
||||
</p>
|
||||
<form action="" method="get">
|
||||
<input type="text" name="q" value="" />
|
||||
<input type="submit" value="search" />
|
||||
<span id="search-progress" style="padding-left: 10px"></span>
|
||||
</form>
|
||||
{% if search_performed %}
|
||||
<h2>Search Results</h2>
|
||||
{% if not search_results %}
|
||||
<p>'Your search did not match any results.</p>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
<div id="search-results">
|
||||
{% if search_results %}
|
||||
<ul>
|
||||
{% for href, caption, context in search_results %}
|
||||
<li><a href="{{ href }}">{{ caption }}</a>
|
||||
<div class="context">{{ context|e }}</div>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</div>
|
@ -12,20 +12,47 @@
|
||||
import cPickle as pickle
|
||||
from os import path
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.websupport.search import search_adapters
|
||||
|
||||
class WebSupportApp(Sphinx):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.search = kwargs.pop('search', None)
|
||||
Sphinx.__init__(self, *args, **kwargs)
|
||||
|
||||
class WebSupport(object):
|
||||
|
||||
def init(self, srcdir='', outdir=''):
|
||||
def init(self, srcdir='', outdir='', search=None):
|
||||
self.srcdir = srcdir
|
||||
self.outdir = outdir or path.join(self.srcdir, '_build',
|
||||
'websupport')
|
||||
self.init_templating()
|
||||
if search is not None:
|
||||
self.init_search(search)
|
||||
|
||||
def init_templating(self):
|
||||
import sphinx
|
||||
template_path = path.join(path.dirname(sphinx.__file__),
|
||||
'themes', 'basic')
|
||||
loader = FileSystemLoader(template_path)
|
||||
self.template_env = Environment(loader=loader)
|
||||
|
||||
def init_search(self, search):
|
||||
mod, cls = search_adapters[search]
|
||||
search_class = getattr(__import__('sphinx.websupport.search.' + mod,
|
||||
None, None, [cls]), cls)
|
||||
search_path = path.join(self.outdir, 'search')
|
||||
self.search = search_class(search_path)
|
||||
self.results_template = \
|
||||
self.template_env.get_template('searchresults.html')
|
||||
|
||||
def build(self, **kwargs):
|
||||
doctreedir = kwargs.pop('doctreedir',
|
||||
path.join(self.outdir, 'doctrees'))
|
||||
app = Sphinx(self.srcdir, self.srcdir,
|
||||
self.outdir, doctreedir, 'websupport')
|
||||
app = WebSupportApp(self.srcdir, self.srcdir,
|
||||
self.outdir, doctreedir, 'websupport',
|
||||
search=self.search)
|
||||
app.build()
|
||||
|
||||
def get_document(self, docname):
|
||||
@ -33,3 +60,12 @@ class WebSupport(object):
|
||||
f = open(infilename, 'rb')
|
||||
document = pickle.load(f)
|
||||
return document
|
||||
|
||||
def get_search_results(self, q):
|
||||
results, results_found, results_displayed = self.search.query(q)
|
||||
ctx = {'search_performed': True,
|
||||
'search_results': results}
|
||||
document = self.get_document('search')
|
||||
document['body'] = self.results_template.render(ctx)
|
||||
document['title'] = 'Search Results'
|
||||
return document
|
||||
|
36
sphinx/websupport/search/__init__.py
Normal file
36
sphinx/websupport/search/__init__.py
Normal file
@ -0,0 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.websupport.search
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Server side search support for the web support package.
|
||||
|
||||
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
class BaseSearch(object):
|
||||
def create_index(self, path):
|
||||
raise NotImplemented
|
||||
|
||||
def add_document(self, path, title, text):
|
||||
raise NotImplemented
|
||||
|
||||
def query(self, q):
|
||||
raise NotImplemented
|
||||
|
||||
def extract_context(self, text, query_string):
|
||||
# From GSOC 2009
|
||||
with_context_re = '([\W\w]{0,80})(%s)([\W\w]{0,80})' % (query_string)
|
||||
try:
|
||||
res = re.findall(with_context_re, text, re.I|re.U)[0]
|
||||
return tuple((unicode(i, errors='ignore') for i in res))
|
||||
except IndexError:
|
||||
return '', '', ''
|
||||
|
||||
search_adapters = {
|
||||
'xapian': ('xapiansearch', 'XapianSearch'),
|
||||
'whoosh': ('whooshsearch', 'WhooshSearch'),
|
||||
}
|
76
sphinx/websupport/search/xapiansearch.py
Normal file
76
sphinx/websupport/search/xapiansearch.py
Normal file
@ -0,0 +1,76 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.websupport.search.xapian
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Xapian search adapter.
|
||||
|
||||
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
from os import path
|
||||
|
||||
import xapian
|
||||
|
||||
from sphinx.util.osutil import ensuredir
|
||||
from sphinx.websupport.search import BaseSearch
|
||||
|
||||
class XapianSearch(BaseSearch):
|
||||
# Adapted from the GSOC 2009 webapp project.
|
||||
|
||||
# Xapian metadata constants
|
||||
DOC_PATH = 0
|
||||
DOC_TITLE = 1
|
||||
|
||||
def __init__(self, db_path):
|
||||
self.db_path = db_path
|
||||
|
||||
def create_index(self):
|
||||
ensuredir(self.db_path)
|
||||
self.database = xapian.WritableDatabase(self.db_path,
|
||||
xapian.DB_CREATE_OR_OPEN)
|
||||
self.indexer = xapian.TermGenerator()
|
||||
stemmer = xapian.Stem("english")
|
||||
self.indexer.set_stemmer(stemmer)
|
||||
|
||||
def add_document(self, path, title, text):
|
||||
self.database.begin_transaction()
|
||||
doc = xapian.Document()
|
||||
doc.set_data(text)
|
||||
doc.add_value(self.DOC_PATH, path)
|
||||
doc.add_value(self.DOC_TITLE, title)
|
||||
self.indexer.set_document(doc)
|
||||
self.indexer.index_text(text)
|
||||
for word in text.split():
|
||||
doc.add_posting(word, 1)
|
||||
self.database.add_document(doc)
|
||||
self.database.commit_transaction()
|
||||
|
||||
def query(self, q):
|
||||
database = xapian.Database(self.db_path)
|
||||
enquire = xapian.Enquire(database)
|
||||
qp = xapian.QueryParser()
|
||||
stemmer = xapian.Stem("english")
|
||||
qp.set_stemmer(stemmer)
|
||||
qp.set_database(database)
|
||||
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
|
||||
query = qp.parse_query(q)
|
||||
|
||||
# Find the top 100 results for the query.
|
||||
enquire.set_query(query)
|
||||
matches = enquire.get_mset(0, 100)
|
||||
|
||||
results_found = matches.get_matches_estimated()
|
||||
results_displayed = matches.size()
|
||||
|
||||
results = []
|
||||
|
||||
for m in matches:
|
||||
context = self.extract_context(m.document.get_data(), q)
|
||||
results.append((m.document.get_value(self.DOC_PATH),
|
||||
m.document.get_value(self.DOC_TITLE),
|
||||
''.join(context) ))
|
||||
|
||||
return results, results_found, results_displayed
|
||||
|
Loading…
Reference in New Issue
Block a user