Use html5lib to parse HTML in tests instead of XMLParser

This commit is contained in:
Avram Lubkin 2016-05-28 02:51:21 -04:00
parent 9453aa542c
commit 76b92ad24f
4 changed files with 18 additions and 46 deletions

View File

@ -64,6 +64,7 @@ extras_require = {
'nose', 'nose',
'mock', # it would be better for 'test:python_version in "2.6,2.7"' 'mock', # it would be better for 'test:python_version in "2.6,2.7"'
'simplejson', # better: 'test:platform_python_implementation=="PyPy"' 'simplejson', # better: 'test:platform_python_implementation=="PyPy"'
'html5lib',
], ],
} }

View File

@ -13,3 +13,4 @@ alabaster
sphinx_rtd_theme sphinx_rtd_theme
imagesize imagesize
requests requests
html5lib

View File

@ -23,7 +23,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(testroot, os.path.pardir)))
# check dependencies before testing # check dependencies before testing
print('Checking dependencies...') print('Checking dependencies...')
for modname in ('nose', 'mock', 'six', 'docutils', 'jinja2', 'pygments', for modname in ('nose', 'mock', 'six', 'docutils', 'jinja2', 'pygments',
'snowballstemmer', 'babel'): 'snowballstemmer', 'babel', 'html5lib'):
try: try:
__import__(modname) __import__(modname)
except ImportError as err: except ImportError as err:

View File

@ -13,13 +13,16 @@ import os
import re import re
from six import PY3, iteritems from six import PY3, iteritems
from six.moves import html_entities
from sphinx import __display_version__ from sphinx import __display_version__
from util import remove_unicode_literals, gen_with_app, with_app from util import remove_unicode_literals, gen_with_app, with_app
from etree13 import ElementTree as ET from etree13 import ElementTree
from html5lib import getTreeBuilder, HTMLParser
TREE_BUILDER = getTreeBuilder('etree', implementation=ElementTree)
HTML_PARSER = HTMLParser(TREE_BUILDER, namespaceHTMLElements=False)
ENV_WARNINGS = """\ ENV_WARNINGS = """\
(%(root)s/autodoc_fodder.py:docstring of autodoc_fodder\\.MarkupError:2: \ (%(root)s/autodoc_fodder.py:docstring of autodoc_fodder\\.MarkupError:2: \
WARNING: Explicit markup ends without a blank line; unexpected \ WARNING: Explicit markup ends without a blank line; unexpected \
@ -174,7 +177,7 @@ HTML_XPATH = {
# ``seealso`` directive # ``seealso`` directive
(".//div/p[@class='first admonition-title']", 'See also'), (".//div/p[@class='first admonition-title']", 'See also'),
# a ``hlist`` directive # a ``hlist`` directive
(".//table[@class='hlist']/tr/td/ul/li", '^This$'), (".//table[@class='hlist']/tbody/tr/td/ul/li", '^This$'),
# a ``centered`` directive # a ``centered`` directive
(".//p[@class='centered']/strong", 'LICENSE'), (".//p[@class='centered']/strong", 'LICENSE'),
# a glossary # a glossary
@ -319,21 +322,6 @@ HTML_XPATH = {
} }
class NslessParser(ET.XMLParser):
"""XMLParser that throws away namespaces in tag names."""
def _fixname(self, key):
try:
return self._names[key]
except KeyError:
name = key
br = name.find('}')
if br > 0:
name = name[br+1:]
self._names[key] = name = self._fixtext(name)
return name
def check_xpath(etree, fname, path, check, be_found=True): def check_xpath(etree, fname, path, check, be_found=True):
nodes = list(etree.findall(path)) nodes = list(etree.findall(path))
if check is None: if check is None:
@ -405,10 +393,8 @@ def test_html_output(app, status, warning):
'--- Got:\n' + html_warnings '--- Got:\n' + html_warnings
for fname, paths in iteritems(HTML_XPATH): for fname, paths in iteritems(HTML_XPATH):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for path, check in paths: for path, check in paths:
yield check_xpath, etree, fname, path, check yield check_xpath, etree, fname, path, check
@ -455,10 +441,8 @@ def test_tocdepth(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -497,10 +481,8 @@ def test_tocdepth_singlehtml(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -553,10 +535,8 @@ def test_numfig_disabled(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -654,10 +634,8 @@ def test_numfig_without_numbered_toctree(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -751,10 +729,8 @@ def test_numfig_with_numbered_toctree(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -851,10 +827,8 @@ def test_numfig_with_prefix(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -948,10 +922,8 @@ def test_numfig_with_secnum_depth(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found
@ -980,10 +952,8 @@ def test_enumerable_node(app, status, warning):
} }
for fname, paths in iteritems(expects): for fname, paths in iteritems(expects):
parser = NslessParser()
parser.entity.update(html_entities.entitydefs)
with (app.outdir / fname).open('rb') as fp: with (app.outdir / fname).open('rb') as fp:
etree = ET.parse(fp, parser) etree = HTML_PARSER.parse(fp)
for xpath, check, be_found in paths: for xpath, check, be_found in paths:
yield check_xpath, etree, fname, xpath, check, be_found yield check_xpath, etree, fname, xpath, check, be_found