mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Fix searching and search index creation for incremental builds.
This commit is contained in:
@@ -26,11 +26,13 @@ def usage(argv, msg=None):
|
||||
print >>sys.stderr, """\
|
||||
usage: %s [options] sourcedir outdir [filenames...]"
|
||||
options: -b <builder> -- builder to use (one of %s)
|
||||
-a -- write all files; default is to only write new and changed files
|
||||
-d <path> -- path for the cached doctree files (default outdir/.doctrees)
|
||||
-a -- write all files; default is to only write new and changed files
|
||||
-E -- don't use a saved environment, always read all files
|
||||
-d <path> -- path for the cached environment and doctree files
|
||||
(default outdir/.doctrees)
|
||||
-O <option[=value]> -- give option to to the builder (-O help for list)
|
||||
-D <setting=value> -- override a setting in sourcedir/conf.py
|
||||
-N -- do not do colored output
|
||||
-N -- do not do colored output
|
||||
modi:
|
||||
* without -a and without filenames, write new and changed files.
|
||||
* with -a, write all files.
|
||||
@@ -39,7 +41,7 @@ modi:
|
||||
|
||||
def main(argv):
|
||||
try:
|
||||
opts, args = getopt.getopt(argv[1:], 'ab:d:O:D:N')
|
||||
opts, args = getopt.getopt(argv[1:], 'ab:d:O:D:NE')
|
||||
srcdirname = path.abspath(args[0])
|
||||
if not path.isdir(srcdirname):
|
||||
print >>sys.stderr, 'Error: Cannot find source directory.'
|
||||
@@ -65,7 +67,7 @@ def main(argv):
|
||||
return 1
|
||||
|
||||
builder = all_files = None
|
||||
opt_help = False
|
||||
opt_help = freshenv = False
|
||||
options = {}
|
||||
confoverrides = {}
|
||||
doctreedir = path.join(outdirname, '.doctrees')
|
||||
@@ -102,6 +104,8 @@ def main(argv):
|
||||
confoverrides[key] = val
|
||||
elif opt == '-N':
|
||||
nocolor()
|
||||
elif opt == '-E':
|
||||
freshenv = True
|
||||
|
||||
if not sys.stdout.isatty() or sys.platform == 'win32':
|
||||
# Windows' cmd box doesn't understand ANSI sequences
|
||||
@@ -122,7 +126,8 @@ def main(argv):
|
||||
builderobj = builderobj(srcdirname, outdirname, doctreedir, options,
|
||||
status_stream=sys.stdout,
|
||||
warning_stream=sys.stderr,
|
||||
confoverrides=confoverrides)
|
||||
confoverrides=confoverrides,
|
||||
freshenv=freshenv)
|
||||
if all_files:
|
||||
builderobj.build_all()
|
||||
elif filenames:
|
||||
|
||||
@@ -72,18 +72,18 @@ class Builder(object):
|
||||
Builds target formats from the reST sources.
|
||||
"""
|
||||
|
||||
option_spec = {
|
||||
'freshenv': 'Don\'t use a pickled environment',
|
||||
}
|
||||
option_spec = {}
|
||||
|
||||
def __init__(self, srcdirname, outdirname, doctreedirname,
|
||||
options, confoverrides=None, env=None,
|
||||
status_stream=None, warning_stream=None):
|
||||
status_stream=None, warning_stream=None,
|
||||
freshenv=False):
|
||||
self.srcdir = srcdirname
|
||||
self.outdir = outdirname
|
||||
self.doctreedir = doctreedirname
|
||||
if not path.isdir(doctreedirname):
|
||||
os.mkdir(doctreedirname)
|
||||
self.freshenv = freshenv
|
||||
|
||||
self.options = attrdict(options)
|
||||
self.validate_options()
|
||||
@@ -161,7 +161,7 @@ class Builder(object):
|
||||
successfully loaded, False if a new environment had to be created."""
|
||||
if self.env:
|
||||
return
|
||||
if not self.options.freshenv:
|
||||
if not self.freshenv:
|
||||
try:
|
||||
self.msg('trying to load pickled env...', nonl=True)
|
||||
self.env = BuildEnvironment.frompickle(
|
||||
@@ -223,8 +223,6 @@ class Builder(object):
|
||||
self.msg('creating index...')
|
||||
self.env.create_index(self)
|
||||
|
||||
self.prepare_writing()
|
||||
|
||||
if filenames:
|
||||
# add all TOC files that may have changed
|
||||
filenames_set = set(filenames)
|
||||
@@ -236,6 +234,8 @@ class Builder(object):
|
||||
# build all
|
||||
filenames_set = set(self.env.all_files)
|
||||
|
||||
self.prepare_writing(filenames)
|
||||
|
||||
# write target files
|
||||
with collect_env_warnings(self):
|
||||
self.msg('writing output...')
|
||||
@@ -249,7 +249,7 @@ class Builder(object):
|
||||
self.finish()
|
||||
self.msg('done!')
|
||||
|
||||
def prepare_writing(self):
|
||||
def prepare_writing(self, filenames):
|
||||
raise NotImplementedError
|
||||
|
||||
def write_file(self, filename, doctree):
|
||||
@@ -265,12 +265,6 @@ class StandaloneHTMLBuilder(Builder):
|
||||
"""
|
||||
name = 'html'
|
||||
|
||||
option_spec = Builder.option_spec
|
||||
option_spec.update({
|
||||
'nostyle': 'Don\'t copy style and script files',
|
||||
'nosearchindex': 'Don\'t create a JSON search index for offline search',
|
||||
})
|
||||
|
||||
copysource = True
|
||||
|
||||
def init(self):
|
||||
@@ -301,12 +295,10 @@ class StandaloneHTMLBuilder(Builder):
|
||||
settings_overrides={'output_encoding': 'unicode'}
|
||||
)
|
||||
|
||||
def prepare_writing(self):
|
||||
if not self.options.nosearchindex:
|
||||
from .search import IndexBuilder
|
||||
self.indexer = IndexBuilder()
|
||||
else:
|
||||
self.indexer = None
|
||||
def prepare_writing(self, filenames):
|
||||
from .search import IndexBuilder
|
||||
self.indexer = IndexBuilder()
|
||||
self.load_indexer(filenames)
|
||||
self.docwriter = HTMLWriter(self.config)
|
||||
self.docsettings = OptionParser(
|
||||
defaults=self.env.settings,
|
||||
@@ -463,20 +455,19 @@ class StandaloneHTMLBuilder(Builder):
|
||||
)
|
||||
self.handle_file('search.rst', searchcontext, 'search')
|
||||
|
||||
if not self.options.nostyle:
|
||||
self.msg('copying style files...')
|
||||
# copy style files
|
||||
styledirname = path.join(path.dirname(__file__), 'style')
|
||||
ensuredir(path.join(self.outdir, 'style'))
|
||||
for filename in os.listdir(styledirname):
|
||||
if not filename.startswith('.'):
|
||||
shutil.copyfile(path.join(styledirname, filename),
|
||||
path.join(self.outdir, 'style', filename))
|
||||
# add pygments style file
|
||||
f = open(path.join(self.outdir, 'style', 'pygments.css'), 'w')
|
||||
if pygments:
|
||||
f.write(get_stylesheet())
|
||||
f.close()
|
||||
# copy style files
|
||||
self.msg('copying style files...')
|
||||
styledirname = path.join(path.dirname(__file__), 'style')
|
||||
ensuredir(path.join(self.outdir, 'style'))
|
||||
for filename in os.listdir(styledirname):
|
||||
if not filename.startswith('.'):
|
||||
shutil.copyfile(path.join(styledirname, filename),
|
||||
path.join(self.outdir, 'style', filename))
|
||||
# add pygments style file
|
||||
f = open(path.join(self.outdir, 'style', 'pygments.css'), 'w')
|
||||
if pygments:
|
||||
f.write(get_stylesheet())
|
||||
f.close()
|
||||
|
||||
# dump the search index
|
||||
self.handle_finish()
|
||||
@@ -497,6 +488,16 @@ class StandaloneHTMLBuilder(Builder):
|
||||
if path.getmtime(path.join(self.srcdir, filename)) > targetmtime:
|
||||
yield filename
|
||||
|
||||
|
||||
def load_indexer(self, filenames):
|
||||
try:
|
||||
with open(path.join(self.outdir, 'searchindex.json'), 'r') as f:
|
||||
self.indexer.load(f, 'json')
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
# delete all entries for files that will be rebuilt
|
||||
self.indexer.prune(set(self.env.all_files) - set(filenames))
|
||||
|
||||
def index_file(self, filename, doctree, title):
|
||||
# only index pages with title
|
||||
if self.indexer is not None and title:
|
||||
@@ -522,11 +523,10 @@ class StandaloneHTMLBuilder(Builder):
|
||||
path.join(self.outdir, context['sourcename']))
|
||||
|
||||
def handle_finish(self):
|
||||
if self.indexer is not None:
|
||||
self.msg('dumping search index...')
|
||||
f = open(path.join(self.outdir, 'searchindex.json'), 'w')
|
||||
self.msg('dumping search index...')
|
||||
self.indexer.prune([self.get_target_uri(fn)[:-5] for fn in self.env.all_files])
|
||||
with open(path.join(self.outdir, 'searchindex.json'), 'w') as f:
|
||||
self.indexer.dump(f, 'json')
|
||||
f.close()
|
||||
|
||||
|
||||
class WebHTMLBuilder(StandaloneHTMLBuilder):
|
||||
@@ -535,13 +535,6 @@ class WebHTMLBuilder(StandaloneHTMLBuilder):
|
||||
"""
|
||||
name = 'web'
|
||||
|
||||
# doesn't use the standalone specific options
|
||||
option_spec = Builder.option_spec.copy()
|
||||
option_spec.update({
|
||||
'nostyle': 'Don\'t copy style and script files',
|
||||
'nosearchindex': 'Don\'t create a search index for the online search',
|
||||
})
|
||||
|
||||
def init(self):
|
||||
# Nothing to do here.
|
||||
pass
|
||||
@@ -564,6 +557,15 @@ class WebHTMLBuilder(StandaloneHTMLBuilder):
|
||||
return source_filename[:-9] # up to /
|
||||
return source_filename[:-4] + '/'
|
||||
|
||||
def load_indexer(self, filenames):
|
||||
try:
|
||||
with open(path.join(self.outdir, 'searchindex.pickle'), 'r') as f:
|
||||
self.indexer.load(f, 'pickle')
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
# delete all entries for files that will be rebuilt
|
||||
self.indexer.prune(set(self.env.all_files) - set(filenames))
|
||||
|
||||
def index_file(self, filename, doctree, title):
|
||||
# only index pages with title and category
|
||||
if self.indexer is not None and title:
|
||||
@@ -590,11 +592,11 @@ class WebHTMLBuilder(StandaloneHTMLBuilder):
|
||||
with file(outfilename, 'wb') as fp:
|
||||
pickle.dump(self.globalcontext, fp, 2)
|
||||
|
||||
if self.indexer is not None:
|
||||
self.msg('dumping search index...')
|
||||
f = open(path.join(self.outdir, 'searchindex.pickle'), 'w')
|
||||
self.msg('dumping search index...')
|
||||
self.indexer.prune(self.env.all_files)
|
||||
with open(path.join(self.outdir, 'searchindex.pickle'), 'wb') as f:
|
||||
self.indexer.dump(f, 'pickle')
|
||||
f.close()
|
||||
|
||||
# touch 'last build' file, used by the web application to determine
|
||||
# when to reload its environment and clear the cache
|
||||
open(path.join(self.outdir, LAST_BUILD_FILENAME), 'w').close()
|
||||
@@ -611,10 +613,9 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):
|
||||
"""
|
||||
name = 'htmlhelp'
|
||||
|
||||
option_spec = Builder.option_spec.copy()
|
||||
option_spec.update({
|
||||
option_spec = {
|
||||
'outname': 'Output file base name (default "pydoc")'
|
||||
})
|
||||
}
|
||||
|
||||
# don't copy the reST source
|
||||
copysource = False
|
||||
|
||||
@@ -14,7 +14,7 @@ import pickle
|
||||
from collections import defaultdict
|
||||
from docutils.nodes import Text, NodeVisitor
|
||||
from .util.stemmer import PorterStemmer
|
||||
from .util.json import dump_json
|
||||
from .util.json import dump_json, load_json
|
||||
|
||||
|
||||
word_re = re.compile(r'\w+(?u)')
|
||||
@@ -50,47 +50,71 @@ class IndexBuilder(object):
|
||||
passed to the `feed` method.
|
||||
"""
|
||||
formats = {
|
||||
'json': dump_json,
|
||||
'pickle': pickle.dumps
|
||||
'json': (dump_json, load_json),
|
||||
'pickle': (pickle.dumps, pickle.loads),
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self._filenames = {}
|
||||
self._mapping = {}
|
||||
self._titles = {}
|
||||
self._categories = {}
|
||||
self._stemmer = Stemmer()
|
||||
# filename -> title
|
||||
self._titles = {}
|
||||
# stemmed word -> set(filenames)
|
||||
self._mapping = {}
|
||||
# category -> set(filenames)
|
||||
self._categories = {}
|
||||
|
||||
def load(self, stream, format):
|
||||
"""Reconstruct from frozen data."""
|
||||
frozen = self.formats[format][1](stream.read())
|
||||
index2fn = frozen[0]
|
||||
self._titles = dict(zip(frozen[0], frozen[2]))
|
||||
self._categories = dict((k, set(index2fn[i] for i in v))
|
||||
for (k, v) in frozen[1].iteritems())
|
||||
self._mapping = dict((k, set(index2fn[i] for i in v))
|
||||
for (k, v) in frozen[3].iteritems())
|
||||
|
||||
def dump(self, stream, format):
|
||||
"""Dump the freezed index to a stream."""
|
||||
stream.write(self.formats[format](self.freeze()))
|
||||
"""Dump the frozen index to a stream."""
|
||||
stream.write(self.formats[format][0](self.freeze()))
|
||||
|
||||
def freeze(self):
|
||||
"""
|
||||
Create a useable data structure. You can pass this output
|
||||
to the `SearchFrontend` to search the index.
|
||||
"""
|
||||
fns, titles = self._titles.keys(), self._titles.values()
|
||||
fn2index = dict((f, i) for (i, f) in enumerate(fns))
|
||||
return [
|
||||
[k for k, v in sorted(self._filenames.items(),
|
||||
key=lambda x: x[1])],
|
||||
dict(item for item in sorted(self._categories.items(),
|
||||
key=lambda x: x[0])),
|
||||
[v for k, v in sorted(self._titles.items(),
|
||||
key=lambda x: x[0])],
|
||||
dict(item for item in sorted(self._mapping.items(),
|
||||
key=lambda x: x[0])),
|
||||
fns,
|
||||
dict((k, [fn2index[fn] for fn in v])
|
||||
for (k, v) in self._categories.iteritems()),
|
||||
titles,
|
||||
dict((k, [fn2index[fn] for fn in v])
|
||||
for (k, v) in self._mapping.iteritems()),
|
||||
]
|
||||
|
||||
def prune(self, filenames):
|
||||
"""Remove data for all filenames not in the list."""
|
||||
new_titles = {}
|
||||
for filename in filenames:
|
||||
if filename in self._titles:
|
||||
new_titles[filename] = self._titles[filename]
|
||||
self._titles = new_titles
|
||||
for wordnames in self._mapping.itervalues():
|
||||
wordnames.intersection_update(filenames)
|
||||
for catnames in self._categories.itervalues():
|
||||
catnames.intersection_update(filenames)
|
||||
|
||||
def feed(self, filename, category, title, doctree):
|
||||
"""Feed a doctree to the index."""
|
||||
file_id = self._filenames.setdefault(filename, len(self._filenames))
|
||||
self._titles[file_id] = title
|
||||
self._titles[filename] = title
|
||||
self._categories.setdefault(category, set()).add(filename)
|
||||
|
||||
visitor = WordCollector(doctree)
|
||||
doctree.walk(visitor)
|
||||
self._categories.setdefault(category, set()).add(file_id)
|
||||
for word in word_re.findall(title) + visitor.found_words:
|
||||
self._mapping.setdefault(self._stemmer.stem(word.lower()),
|
||||
set()).add(file_id)
|
||||
set()).add(filename)
|
||||
|
||||
|
||||
class SearchFrontend(object):
|
||||
|
||||
@@ -424,5 +424,5 @@ var Search = {
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
Documentation.Search.init();
|
||||
Search.init();
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends "layout.html" %}
|
||||
{% set title = 'Search Documentation' %}
|
||||
{% block header %}
|
||||
{% block head %}
|
||||
<script type="text/javascript" src="{{ pathto('style/searchtools.js', 1) }}"></script>
|
||||
{% endblock %}
|
||||
{% block body %}
|
||||
@@ -26,13 +26,13 @@
|
||||
('tutorial', 'Python Tutorial', true),
|
||||
('library', 'Library Reference', true),
|
||||
('maclib', 'Macintosh Library Modules', false),
|
||||
('reference', 'Language Reference', false),
|
||||
('extending', 'Extending and Embedding', false),
|
||||
('c-api', 'Python/C API', false),
|
||||
('install', 'Installing Python Modules', true),
|
||||
('distutils', 'Distributing Python Modules', true),
|
||||
('documenting', 'Documenting Python', false),
|
||||
('whatsnew', 'What\'s new in Python?', false),
|
||||
('reference', 'Language Reference', false)
|
||||
] -%}
|
||||
<li><input type="checkbox" name="area" id="area-{{ id }}" value="{{ id
|
||||
}}"{% if checked %} checked{% endif %}>
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import re
|
||||
|
||||
ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
|
||||
# escape \, ", control characters and everything outside ASCII
|
||||
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
|
||||
ESCAPE_DICT = {
|
||||
'\\': '\\\\',
|
||||
@@ -27,8 +27,6 @@ ESCAPE_DICT = {
|
||||
'\r': '\\r',
|
||||
'\t': '\\t',
|
||||
}
|
||||
for i in range(0x20):
|
||||
ESCAPE_DICT.setdefault(chr(i), '\\u%04x' % (i,))
|
||||
|
||||
|
||||
def encode_basestring_ascii(s):
|
||||
@@ -70,3 +68,11 @@ def dump_json(obj, key=False):
|
||||
elif isinstance(obj, basestring):
|
||||
return encode_basestring_ascii(obj)
|
||||
raise TypeError(type(obj))
|
||||
|
||||
|
||||
STRING = re.compile(r'("(\\\\|\\"|[^"])*")')
|
||||
|
||||
def load_json(s):
|
||||
d = {'null': None, 'true': True, 'false': False}
|
||||
s = STRING.sub(r'u\1', s)
|
||||
return eval(s, d)
|
||||
|
||||
Reference in New Issue
Block a user