Don't consider contents of source comments for the search index.

This commit is contained in:
Georg Brandl 2009-05-31 19:47:42 +02:00
parent 92eef1b2e5
commit cfd0d8c42d
3 changed files with 44 additions and 1 deletions

View File

@ -1,6 +1,8 @@
Release 0.6.2 (in development)
==============================
* Don't consider contents of source comments for the search index.
* Set the default encoding to ``utf-8-sig`` to handle files with a
UTF-8 BOM correctly.

View File

@ -12,7 +12,7 @@ import re
import cPickle as pickle
from cStringIO import StringIO
from docutils.nodes import Text, NodeVisitor
from docutils.nodes import comment, Text, NodeVisitor, SkipNode
from sphinx.util.stemmer import PorterStemmer
from sphinx.util import jsdump, rpartition
@ -83,6 +83,8 @@ class WordCollector(NodeVisitor):
self.found_words = []
def dispatch_visit(self, node):
if node.__class__ is comment:
raise SkipNode
if node.__class__ is Text:
self.found_words.extend(word_re.findall(node.astext()))

39
tests/test_search.py Normal file
View File

@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""
test_search
~~~~~~~~~~~
Test the search index builder.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from docutils import frontend, utils, nodes
from docutils.parsers import rst
from sphinx.search import IndexBuilder
def setup_module():
global settings, parser
optparser = frontend.OptionParser(components=(rst.Parser,))
settings = optparser.get_default_values()
parser = rst.Parser()
FILE_CONTENTS = '''\
.. test that comments are not indexed: boson
test that non-comments are indexed: fermion
'''
def test_wordcollector():
doc = utils.new_document('test data', settings)
doc['file'] = 'dummy'
parser.parse(FILE_CONTENTS, doc)
ix = IndexBuilder(None)
ix.feed('filename', 'title', doc)
assert 'boson' not in ix._mapping
assert 'fermion' in ix._mapping