mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Use html5lib to parse HTML in tests instead of XMLParser
This commit is contained in:
parent
9453aa542c
commit
76b92ad24f
1
setup.py
1
setup.py
@ -64,6 +64,7 @@ extras_require = {
|
|||||||
'nose',
|
'nose',
|
||||||
'mock', # it would be better for 'test:python_version in "2.6,2.7"'
|
'mock', # it would be better for 'test:python_version in "2.6,2.7"'
|
||||||
'simplejson', # better: 'test:platform_python_implementation=="PyPy"'
|
'simplejson', # better: 'test:platform_python_implementation=="PyPy"'
|
||||||
|
'html5lib',
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,3 +13,4 @@ alabaster
|
|||||||
sphinx_rtd_theme
|
sphinx_rtd_theme
|
||||||
imagesize
|
imagesize
|
||||||
requests
|
requests
|
||||||
|
html5lib
|
||||||
|
@ -23,7 +23,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(testroot, os.path.pardir)))
|
|||||||
# check dependencies before testing
|
# check dependencies before testing
|
||||||
print('Checking dependencies...')
|
print('Checking dependencies...')
|
||||||
for modname in ('nose', 'mock', 'six', 'docutils', 'jinja2', 'pygments',
|
for modname in ('nose', 'mock', 'six', 'docutils', 'jinja2', 'pygments',
|
||||||
'snowballstemmer', 'babel'):
|
'snowballstemmer', 'babel', 'html5lib'):
|
||||||
try:
|
try:
|
||||||
__import__(modname)
|
__import__(modname)
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
|
@ -13,13 +13,16 @@ import os
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from six import PY3, iteritems
|
from six import PY3, iteritems
|
||||||
from six.moves import html_entities
|
|
||||||
|
|
||||||
from sphinx import __display_version__
|
from sphinx import __display_version__
|
||||||
from util import remove_unicode_literals, gen_with_app, with_app
|
from util import remove_unicode_literals, gen_with_app, with_app
|
||||||
from etree13 import ElementTree as ET
|
from etree13 import ElementTree
|
||||||
|
from html5lib import getTreeBuilder, HTMLParser
|
||||||
|
|
||||||
|
|
||||||
|
TREE_BUILDER = getTreeBuilder('etree', implementation=ElementTree)
|
||||||
|
HTML_PARSER = HTMLParser(TREE_BUILDER, namespaceHTMLElements=False)
|
||||||
|
|
||||||
ENV_WARNINGS = """\
|
ENV_WARNINGS = """\
|
||||||
(%(root)s/autodoc_fodder.py:docstring of autodoc_fodder\\.MarkupError:2: \
|
(%(root)s/autodoc_fodder.py:docstring of autodoc_fodder\\.MarkupError:2: \
|
||||||
WARNING: Explicit markup ends without a blank line; unexpected \
|
WARNING: Explicit markup ends without a blank line; unexpected \
|
||||||
@ -174,7 +177,7 @@ HTML_XPATH = {
|
|||||||
# ``seealso`` directive
|
# ``seealso`` directive
|
||||||
(".//div/p[@class='first admonition-title']", 'See also'),
|
(".//div/p[@class='first admonition-title']", 'See also'),
|
||||||
# a ``hlist`` directive
|
# a ``hlist`` directive
|
||||||
(".//table[@class='hlist']/tr/td/ul/li", '^This$'),
|
(".//table[@class='hlist']/tbody/tr/td/ul/li", '^This$'),
|
||||||
# a ``centered`` directive
|
# a ``centered`` directive
|
||||||
(".//p[@class='centered']/strong", 'LICENSE'),
|
(".//p[@class='centered']/strong", 'LICENSE'),
|
||||||
# a glossary
|
# a glossary
|
||||||
@ -319,21 +322,6 @@ HTML_XPATH = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NslessParser(ET.XMLParser):
|
|
||||||
"""XMLParser that throws away namespaces in tag names."""
|
|
||||||
|
|
||||||
def _fixname(self, key):
|
|
||||||
try:
|
|
||||||
return self._names[key]
|
|
||||||
except KeyError:
|
|
||||||
name = key
|
|
||||||
br = name.find('}')
|
|
||||||
if br > 0:
|
|
||||||
name = name[br+1:]
|
|
||||||
self._names[key] = name = self._fixtext(name)
|
|
||||||
return name
|
|
||||||
|
|
||||||
|
|
||||||
def check_xpath(etree, fname, path, check, be_found=True):
|
def check_xpath(etree, fname, path, check, be_found=True):
|
||||||
nodes = list(etree.findall(path))
|
nodes = list(etree.findall(path))
|
||||||
if check is None:
|
if check is None:
|
||||||
@ -405,10 +393,8 @@ def test_html_output(app, status, warning):
|
|||||||
'--- Got:\n' + html_warnings
|
'--- Got:\n' + html_warnings
|
||||||
|
|
||||||
for fname, paths in iteritems(HTML_XPATH):
|
for fname, paths in iteritems(HTML_XPATH):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
for path, check in paths:
|
for path, check in paths:
|
||||||
yield check_xpath, etree, fname, path, check
|
yield check_xpath, etree, fname, path, check
|
||||||
|
|
||||||
@ -455,10 +441,8 @@ def test_tocdepth(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -497,10 +481,8 @@ def test_tocdepth_singlehtml(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -553,10 +535,8 @@ def test_numfig_disabled(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -654,10 +634,8 @@ def test_numfig_without_numbered_toctree(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -751,10 +729,8 @@ def test_numfig_with_numbered_toctree(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -851,10 +827,8 @@ def test_numfig_with_prefix(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -948,10 +922,8 @@ def test_numfig_with_secnum_depth(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
@ -980,10 +952,8 @@ def test_enumerable_node(app, status, warning):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fname, paths in iteritems(expects):
|
for fname, paths in iteritems(expects):
|
||||||
parser = NslessParser()
|
|
||||||
parser.entity.update(html_entities.entitydefs)
|
|
||||||
with (app.outdir / fname).open('rb') as fp:
|
with (app.outdir / fname).open('rb') as fp:
|
||||||
etree = ET.parse(fp, parser)
|
etree = HTML_PARSER.parse(fp)
|
||||||
|
|
||||||
for xpath, check, be_found in paths:
|
for xpath, check, be_found in paths:
|
||||||
yield check_xpath, etree, fname, xpath, check, be_found
|
yield check_xpath, etree, fname, xpath, check, be_found
|
||||||
|
Loading…
Reference in New Issue
Block a user