delete replaced documents in xapian when docs are rebuilt

This commit is contained in:
Jacob Mason 2010-06-25 18:38:40 -05:00
parent 3babf6e968
commit d9b07f4c7d
3 changed files with 21 additions and 17 deletions

View File

@ -37,10 +37,7 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
def load_indexer(self, docnames):
keep = set(self.env.all_docs) - set(docnames)
self.indexer = self.app.search
if self.indexer is not None:
self.indexer.create_index()
# delete all entries for files that will be rebuilt
self.indexer.prune(keep)
self.indexer.init_indexing()
def handle_page(self, pagename, addctx, templatename='page.html',
outfilename=None, event_arg=None):
@ -97,4 +94,4 @@ class WebSupportBuilder(StandaloneHTMLBuilder):
copyfile(self.env.doc2path(pagename), source_name)
def dump_search_index(self):
pass
self.indexer.finish_indexing()

View File

@ -12,8 +12,11 @@
import re
class BaseSearch(object):
def create_index(self, path):
raise NotImplemented
def init_indexing(self):
pass
def finish_indexing(self):
pass
def feed(self, pagename, title, doctree):
self.add_document(pagename, title, doctree.astext())
@ -21,9 +24,6 @@ class BaseSearch(object):
def add_document(self, path, title, text):
raise NotImplemented
def prune(self, keep):
raise NotImplemented
def query(self, q):
self.context_re = re.compile(q, re.I)
return self.handle_query(q)
@ -33,12 +33,14 @@ class BaseSearch(object):
def extract_context(self, text, query_string):
res = self.context_re.search(text)
if res is None:
return ''
start = max(res.start() - 120, 0)
end = start + 240
context = ['...' if start > 0 else '',
text[start:end],
'...' if end < len(text) else '']
return ''.join(context)
return unicode(''.join(context), errors='ignore')
search_adapters = {
'xapian': ('xapiansearch', 'XapianSearch'),

View File

@ -26,30 +26,37 @@ class XapianSearch(BaseSearch):
def __init__(self, db_path):
self.db_path = db_path
def create_index(self):
def init_indexing(self):
ensuredir(self.db_path)
self.database = xapian.WritableDatabase(self.db_path,
xapian.DB_CREATE_OR_OPEN)
self.indexer = xapian.TermGenerator()
stemmer = xapian.Stem("english")
self.indexer.set_stemmer(stemmer)
def finish_indexing(self):
# Ensure the db lock is removed.
del self.database
def add_document(self, path, title, text):
self.database.begin_transaction()
# sphinx_page_path is used to easily retrieve documents by path.
sphinx_page_path = '"sphinxpagepath%s"' % path.replace('/', '_')
# Delete the old document if it exists.
self.database.delete_document(sphinx_page_path)
doc = xapian.Document()
doc.set_data(text)
doc.add_value(self.DOC_PATH, path)
doc.add_value(self.DOC_TITLE, title)
self.indexer.set_document(doc)
self.indexer.index_text(text)
doc.add_term(sphinx_page_path)
for word in text.split():
doc.add_posting(word, 1)
self.database.add_document(doc)
self.database.commit_transaction()
def prune(self, keep):
pass
def handle_query(self, q):
database = xapian.Database(self.db_path)
enquire = xapian.Enquire(database)
@ -63,7 +70,6 @@ class XapianSearch(BaseSearch):
# Find the top 100 results for the query.
enquire.set_query(query)
matches = enquire.get_mset(0, 100)
results_found = matches.get_matches_estimated()
results_displayed = matches.size()
@ -76,4 +82,3 @@ class XapianSearch(BaseSearch):
''.join(context) ))
return results, results_found, results_displayed