Fix #3045: HTML search index creator should ignore "raw" content if now html

This commit is contained in:
Takeshi KOMIYA 2016-10-17 16:06:45 +09:00
parent 78d96b4abb
commit 53ea1cb280
4 changed files with 27 additions and 9 deletions

View File

@ -53,6 +53,7 @@ Bugs fixed
* #3031: incompatibility with LaTeX package ``tocloft``
* #3003: literal blocks in footnotes are not supported by Latex
* #3047: spacing before footnote in pdf output is not coherent and allows breaks
* #3045: HTML search index creator should ignore "raw" content if now html
Testing
--------

View File

@ -196,13 +196,14 @@ class WordCollector(NodeVisitor):
if issubclass(nodetype, comment):
raise SkipNode
if issubclass(nodetype, raw):
# Some people might put content in raw HTML that should be searched,
# so we just amateurishly strip HTML tags and index the remaining
# content
nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext())
nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext)
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
self.found_words.extend(self.lang.split(nodetext))
if 'html' in node.get('format', '').split():
# Some people might put content in raw HTML that should be searched,
# so we just amateurishly strip HTML tags and index the remaining
# content
nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext())
nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext)
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
self.found_words.extend(self.lang.split(nodetext))
raise SkipNode
if issubclass(nodetype, Text):
self.found_words.extend(self.lang.split(node.astext()))

View File

@ -17,4 +17,12 @@ textinheading
.. toctree::
tocitem
tocitem
.. raw:: html
<span class="raw">rawword"</span>
.. raw:: latex
latex_keyword

View File

@ -114,4 +114,12 @@ def test_term_in_heading_and_section(app, status, warning):
# both documents should be a hit in the search index as a title,
# respectively text hit
assert 'textinhead:1' in searchindex
assert 'textinhead:0' in searchindex
assert 'textinhead:0' in searchindex
@with_app(testroot='search')
def test_term_in_raw_directive(app, status, warning):
searchindex = jsload(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'raw')
assert is_registered_term(searchindex, 'rawword')
assert not is_registered_term(searchindex, 'latex_keyword')