mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Added xapian search
This commit is contained in:
parent
00f841be2a
commit
8c4e535170
@ -23,12 +23,26 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
|
|||||||
name = 'websupport'
|
name = 'websupport'
|
||||||
out_suffix = '.fpickle'
|
out_suffix = '.fpickle'
|
||||||
|
|
||||||
|
def init(self):
|
||||||
|
self.init_search()
|
||||||
|
StandaloneHTMLBuilder.init(self)
|
||||||
|
|
||||||
|
def init_search(self):
|
||||||
|
self.search = self.app.search
|
||||||
|
if self.search is not None:
|
||||||
|
self.search.create_index()
|
||||||
|
|
||||||
def init_translator_class(self):
|
def init_translator_class(self):
|
||||||
self.translator_class = WebSupportTranslator
|
self.translator_class = WebSupportTranslator
|
||||||
|
|
||||||
def write_doc(self, docname, doctree):
|
def write_doc(self, docname, doctree):
|
||||||
# The translator needs the docname to generate ids.
|
# The translator needs the docname to generate ids.
|
||||||
self.docname = docname
|
self.docname = docname
|
||||||
|
# Index the page if search is enabled.
|
||||||
|
if self.search is not None:
|
||||||
|
doc_contents = doctree.astext()
|
||||||
|
title = doc_contents[:20]
|
||||||
|
self.search.add_document(docname, title, doc_contents)
|
||||||
StandaloneHTMLBuilder.write_doc(self, docname, doctree)
|
StandaloneHTMLBuilder.write_doc(self, docname, doctree)
|
||||||
|
|
||||||
def get_target_uri(self, docname, typ=None):
|
def get_target_uri(self, docname, typ=None):
|
||||||
@ -59,7 +73,8 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
|
|||||||
ctx, event_arg)
|
ctx, event_arg)
|
||||||
|
|
||||||
# Create a dict that will be pickled and used by webapps.
|
# Create a dict that will be pickled and used by webapps.
|
||||||
doc_ctx = {'body': ctx.get('body', '')}
|
doc_ctx = {'body': ctx.get('body', ''),
|
||||||
|
'title': ctx.get('title', '')}
|
||||||
# Partially render the html template to proved a more useful ctx.
|
# Partially render the html template to proved a more useful ctx.
|
||||||
template = self.templates.environment.get_template(templatename)
|
template = self.templates.environment.get_template(templatename)
|
||||||
template_module = template.make_module(ctx)
|
template_module = template.make_module(ctx)
|
||||||
@ -86,4 +101,3 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
|
|||||||
os_path(ctx['sourcename']))
|
os_path(ctx['sourcename']))
|
||||||
ensuredir(path.dirname(source_name))
|
ensuredir(path.dirname(source_name))
|
||||||
copyfile(self.env.doc2path(pagename), source_name)
|
copyfile(self.env.doc2path(pagename), source_name)
|
||||||
|
|
||||||
|
36
sphinx/themes/basic/searchresults.html
Normal file
36
sphinx/themes/basic/searchresults.html
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
{#
|
||||||
|
basic/searchresults.html
|
||||||
|
~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Template for the body of the search results page.
|
||||||
|
|
||||||
|
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
||||||
|
:license: BSD, see LICENSE for details.
|
||||||
|
#}
|
||||||
|
<h1 id="search-documentation">Search</h1>
|
||||||
|
<p>
|
||||||
|
From here you can search these documents. Enter your search
|
||||||
|
words into the box below and click "search".
|
||||||
|
</p>
|
||||||
|
<form action="" method="get">
|
||||||
|
<input type="text" name="q" value="" />
|
||||||
|
<input type="submit" value="search" />
|
||||||
|
<span id="search-progress" style="padding-left: 10px"></span>
|
||||||
|
</form>
|
||||||
|
{% if search_performed %}
|
||||||
|
<h2>Search Results</h2>
|
||||||
|
{% if not search_results %}
|
||||||
|
<p>'Your search did not match any results.</p>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
<div id="search-results">
|
||||||
|
{% if search_results %}
|
||||||
|
<ul>
|
||||||
|
{% for href, caption, context in search_results %}
|
||||||
|
<li><a href="{{ href }}">{{ caption }}</a>
|
||||||
|
<div class="context">{{ context|e }}</div>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
@ -12,20 +12,47 @@
|
|||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
from sphinx.application import Sphinx
|
from sphinx.application import Sphinx
|
||||||
|
from sphinx.websupport.search import search_adapters
|
||||||
|
|
||||||
|
class WebSupportApp(Sphinx):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.search = kwargs.pop('search', None)
|
||||||
|
Sphinx.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
class WebSupport(object):
|
class WebSupport(object):
|
||||||
|
def init(self, srcdir='', outdir='', search=None):
|
||||||
def init(self, srcdir='', outdir=''):
|
|
||||||
self.srcdir = srcdir
|
self.srcdir = srcdir
|
||||||
self.outdir = outdir or path.join(self.srcdir, '_build',
|
self.outdir = outdir or path.join(self.srcdir, '_build',
|
||||||
'websupport')
|
'websupport')
|
||||||
|
self.init_templating()
|
||||||
|
if search is not None:
|
||||||
|
self.init_search(search)
|
||||||
|
|
||||||
|
def init_templating(self):
|
||||||
|
import sphinx
|
||||||
|
template_path = path.join(path.dirname(sphinx.__file__),
|
||||||
|
'themes', 'basic')
|
||||||
|
loader = FileSystemLoader(template_path)
|
||||||
|
self.template_env = Environment(loader=loader)
|
||||||
|
|
||||||
|
def init_search(self, search):
|
||||||
|
mod, cls = search_adapters[search]
|
||||||
|
search_class = getattr(__import__('sphinx.websupport.search.' + mod,
|
||||||
|
None, None, [cls]), cls)
|
||||||
|
search_path = path.join(self.outdir, 'search')
|
||||||
|
self.search = search_class(search_path)
|
||||||
|
self.results_template = \
|
||||||
|
self.template_env.get_template('searchresults.html')
|
||||||
|
|
||||||
def build(self, **kwargs):
|
def build(self, **kwargs):
|
||||||
doctreedir = kwargs.pop('doctreedir',
|
doctreedir = kwargs.pop('doctreedir',
|
||||||
path.join(self.outdir, 'doctrees'))
|
path.join(self.outdir, 'doctrees'))
|
||||||
app = Sphinx(self.srcdir, self.srcdir,
|
app = WebSupportApp(self.srcdir, self.srcdir,
|
||||||
self.outdir, doctreedir, 'websupport')
|
self.outdir, doctreedir, 'websupport',
|
||||||
|
search=self.search)
|
||||||
app.build()
|
app.build()
|
||||||
|
|
||||||
def get_document(self, docname):
|
def get_document(self, docname):
|
||||||
@ -33,3 +60,12 @@ class WebSupport(object):
|
|||||||
f = open(infilename, 'rb')
|
f = open(infilename, 'rb')
|
||||||
document = pickle.load(f)
|
document = pickle.load(f)
|
||||||
return document
|
return document
|
||||||
|
|
||||||
|
def get_search_results(self, q):
|
||||||
|
results, results_found, results_displayed = self.search.query(q)
|
||||||
|
ctx = {'search_performed': True,
|
||||||
|
'search_results': results}
|
||||||
|
document = self.get_document('search')
|
||||||
|
document['body'] = self.results_template.render(ctx)
|
||||||
|
document['title'] = 'Search Results'
|
||||||
|
return document
|
||||||
|
36
sphinx/websupport/search/__init__.py
Normal file
36
sphinx/websupport/search/__init__.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
sphinx.websupport.search
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Server side search support for the web support package.
|
||||||
|
|
||||||
|
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
||||||
|
:license: BSD, see LICENSE for details.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
class BaseSearch(object):
|
||||||
|
def create_index(self, path):
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
def add_document(self, path, title, text):
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
def query(self, q):
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
def extract_context(self, text, query_string):
|
||||||
|
# From GSOC 2009
|
||||||
|
with_context_re = '([\W\w]{0,80})(%s)([\W\w]{0,80})' % (query_string)
|
||||||
|
try:
|
||||||
|
res = re.findall(with_context_re, text, re.I|re.U)[0]
|
||||||
|
return tuple((unicode(i, errors='ignore') for i in res))
|
||||||
|
except IndexError:
|
||||||
|
return '', '', ''
|
||||||
|
|
||||||
|
search_adapters = {
|
||||||
|
'xapian': ('xapiansearch', 'XapianSearch'),
|
||||||
|
'whoosh': ('whooshsearch', 'WhooshSearch'),
|
||||||
|
}
|
76
sphinx/websupport/search/xapiansearch.py
Normal file
76
sphinx/websupport/search/xapiansearch.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
sphinx.websupport.search.xapian
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Xapian search adapter.
|
||||||
|
|
||||||
|
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
||||||
|
:license: BSD, see LICENSE for details.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from os import path
|
||||||
|
|
||||||
|
import xapian
|
||||||
|
|
||||||
|
from sphinx.util.osutil import ensuredir
|
||||||
|
from sphinx.websupport.search import BaseSearch
|
||||||
|
|
||||||
|
class XapianSearch(BaseSearch):
|
||||||
|
# Adapted from the GSOC 2009 webapp project.
|
||||||
|
|
||||||
|
# Xapian metadata constants
|
||||||
|
DOC_PATH = 0
|
||||||
|
DOC_TITLE = 1
|
||||||
|
|
||||||
|
def __init__(self, db_path):
|
||||||
|
self.db_path = db_path
|
||||||
|
|
||||||
|
def create_index(self):
|
||||||
|
ensuredir(self.db_path)
|
||||||
|
self.database = xapian.WritableDatabase(self.db_path,
|
||||||
|
xapian.DB_CREATE_OR_OPEN)
|
||||||
|
self.indexer = xapian.TermGenerator()
|
||||||
|
stemmer = xapian.Stem("english")
|
||||||
|
self.indexer.set_stemmer(stemmer)
|
||||||
|
|
||||||
|
def add_document(self, path, title, text):
|
||||||
|
self.database.begin_transaction()
|
||||||
|
doc = xapian.Document()
|
||||||
|
doc.set_data(text)
|
||||||
|
doc.add_value(self.DOC_PATH, path)
|
||||||
|
doc.add_value(self.DOC_TITLE, title)
|
||||||
|
self.indexer.set_document(doc)
|
||||||
|
self.indexer.index_text(text)
|
||||||
|
for word in text.split():
|
||||||
|
doc.add_posting(word, 1)
|
||||||
|
self.database.add_document(doc)
|
||||||
|
self.database.commit_transaction()
|
||||||
|
|
||||||
|
def query(self, q):
|
||||||
|
database = xapian.Database(self.db_path)
|
||||||
|
enquire = xapian.Enquire(database)
|
||||||
|
qp = xapian.QueryParser()
|
||||||
|
stemmer = xapian.Stem("english")
|
||||||
|
qp.set_stemmer(stemmer)
|
||||||
|
qp.set_database(database)
|
||||||
|
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
|
||||||
|
query = qp.parse_query(q)
|
||||||
|
|
||||||
|
# Find the top 100 results for the query.
|
||||||
|
enquire.set_query(query)
|
||||||
|
matches = enquire.get_mset(0, 100)
|
||||||
|
|
||||||
|
results_found = matches.get_matches_estimated()
|
||||||
|
results_displayed = matches.size()
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for m in matches:
|
||||||
|
context = self.extract_context(m.document.get_data(), q)
|
||||||
|
results.append((m.document.get_value(self.DOC_PATH),
|
||||||
|
m.document.get_value(self.DOC_TITLE),
|
||||||
|
''.join(context) ))
|
||||||
|
|
||||||
|
return results, results_found, results_displayed
|
||||||
|
|
Loading…
Reference in New Issue
Block a user