mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Closes #1308: Strip HTML tags from the content of "raw" nodes before feeding it to the search indexer.
This commit is contained in:
3
CHANGES
3
CHANGES
@@ -88,6 +88,9 @@ Bugs fixed
|
|||||||
* #1299: Make behavior of the :rst:dir:`math` directive more consistent and
|
* #1299: Make behavior of the :rst:dir:`math` directive more consistent and
|
||||||
avoid producing empty environments in LaTeX output.
|
avoid producing empty environments in LaTeX output.
|
||||||
|
|
||||||
|
* #1308: Strip HTML tags from the content of "raw" nodes before feeding it
|
||||||
|
to the search indexer.
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
|||||||
@@ -10,10 +10,9 @@
|
|||||||
"""
|
"""
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
import re
|
import re
|
||||||
import itertools
|
|
||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
|
|
||||||
from docutils.nodes import comment, title, Text, NodeVisitor, SkipNode
|
from docutils.nodes import raw, comment, title, Text, NodeVisitor, SkipNode
|
||||||
|
|
||||||
from sphinx.util import jsdump, rpartition
|
from sphinx.util import jsdump, rpartition
|
||||||
|
|
||||||
@@ -146,7 +145,16 @@ class WordCollector(NodeVisitor):
|
|||||||
def dispatch_visit(self, node):
|
def dispatch_visit(self, node):
|
||||||
if node.__class__ is comment:
|
if node.__class__ is comment:
|
||||||
raise SkipNode
|
raise SkipNode
|
||||||
elif node.__class__ is Text:
|
if node.__class__ is raw:
|
||||||
|
# Some people might put content in raw HTML that should be searched,
|
||||||
|
# so we just amateurishly strip HTML tags and index the remaining
|
||||||
|
# content
|
||||||
|
nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext())
|
||||||
|
nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext)
|
||||||
|
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
|
||||||
|
self.found_words.extend(self.lang.split(nodetext))
|
||||||
|
raise SkipNode
|
||||||
|
if node.__class__ is Text:
|
||||||
self.found_words.extend(self.lang.split(node.astext()))
|
self.found_words.extend(self.lang.split(node.astext()))
|
||||||
elif node.__class__ is title:
|
elif node.__class__ is title:
|
||||||
self.found_title_words.extend(self.lang.split(node.astext()))
|
self.found_title_words.extend(self.lang.split(node.astext()))
|
||||||
|
|||||||
Reference in New Issue
Block a user