diff --git a/CHANGES b/CHANGES index b018d1902..1bc081f76 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,8 @@ Release 0.6.2 (in development) ============================== +* Don't consider contents of source comments for the search index. + * Set the default encoding to ``utf-8-sig`` to handle files with a UTF-8 BOM correctly. diff --git a/sphinx/search.py b/sphinx/search.py index fe20c24a5..f6e3ef3f9 100644 --- a/sphinx/search.py +++ b/sphinx/search.py @@ -12,7 +12,7 @@ import re import cPickle as pickle from cStringIO import StringIO -from docutils.nodes import Text, NodeVisitor +from docutils.nodes import comment, Text, NodeVisitor, SkipNode from sphinx.util.stemmer import PorterStemmer from sphinx.util import jsdump, rpartition @@ -83,6 +83,8 @@ class WordCollector(NodeVisitor): self.found_words = [] def dispatch_visit(self, node): + if node.__class__ is comment: + raise SkipNode if node.__class__ is Text: self.found_words.extend(word_re.findall(node.astext())) diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 000000000..3dd043bc7 --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" + test_search + ~~~~~~~~~~~ + + Test the search index builder. + + :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from docutils import frontend, utils, nodes +from docutils.parsers import rst + +from sphinx.search import IndexBuilder + + +def setup_module(): + global settings, parser + optparser = frontend.OptionParser(components=(rst.Parser,)) + settings = optparser.get_default_values() + parser = rst.Parser() + + +FILE_CONTENTS = '''\ +.. test that comments are not indexed: boson + +test that non-comments are indexed: fermion +''' + +def test_wordcollector(): + doc = utils.new_document('test data', settings) + doc['file'] = 'dummy' + parser.parse(FILE_CONTENTS, doc) + + ix = IndexBuilder(None) + ix.feed('filename', 'title', doc) + assert 'boson' not in ix._mapping + assert 'fermion' in ix._mapping