Merge pull request #6023 from tk0miya/refactor_htmlhelp

Refactor htmlhelp builder
2025-02-25 18:55:22 -06:00 · 2019-02-10 20:50:10 +09:00
parent dce1ccff67 7415f64eab
commit 7aaae7760a
10 changed files with 313 additions and 124 deletions
--- a/sphinx/builders/htmlhelp.py
+++ b/sphinx/builders/htmlhelp.py
@@ -17,13 +17,17 @@ from os import path
 from docutils import nodes

 from sphinx import addnodes
+from sphinx import package_dir
 from sphinx.builders.html import StandaloneHTMLBuilder
 from sphinx.deprecation import RemovedInSphinx40Warning
 from sphinx.environment.adapters.indexentries import IndexEntries
 from sphinx.locale import __
 from sphinx.util import logging
+from sphinx.util import progress_message
+from sphinx.util.fileutil import copy_asset_file
 from sphinx.util.nodes import NodeMatcher
-from sphinx.util.osutil import make_filename_from_project
+from sphinx.util.osutil import make_filename_from_project, relpath
+from sphinx.util.template import SphinxRenderer

 if False:
    # For type annotation
@@ -34,6 +38,8 @@ if False:

 logger = logging.getLogger(__name__)

+template_dir = path.join(package_dir, 'templates', 'htmlhelp')
+

 # Project file (*.hhp) template.  'outname' is the file basename (like
 # the pythlp in pythlp.hhp); 'version' is the doc version number (like
@@ -69,46 +75,6 @@ logger = logging.getLogger(__name__)
 #    0x200000   TOC Next
 #    0x400000   TOC Prev

-project_template = '''\
-[OPTIONS]
-Binary TOC=No
-Binary Index=No
-Compiled file=%(outname)s.chm
-Contents file=%(outname)s.hhc
-Default Window=%(outname)s
-Default topic=%(master_doc)s
-Display compile progress=No
-Full text search stop list file=%(outname)s.stp
-Full-text search=Yes
-Index file=%(outname)s.hhk
-Language=%(lcid)#x
-Title=%(title)s
-
-[WINDOWS]
-%(outname)s="%(title)s","%(outname)s.hhc","%(outname)s.hhk",\
-"%(master_doc)s","%(master_doc)s",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0
-
-[FILES]
-'''
-
-contents_header = '''\
-<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
-<HTML>
-<HEAD>
-<meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
-<!-- Sitemap 1.0 -->
-</HEAD><BODY>
-<OBJECT type="text/site properties">
-        <param name="Window Styles" value="0x801227">
-        <param name="ImageType" value="Folder">
-</OBJECT>
-<UL>
-'''
-
-contents_footer = '''\
-</UL></BODY></HTML>
-'''
-
 object_sitemap = '''\
 <OBJECT type="text/sitemap">
    <param name="Name" value="%s">
@@ -116,24 +82,6 @@ object_sitemap = '''\
 </OBJECT>
 '''

-# List of words the full text search facility shouldn't index.  This
-# becomes file outname.stp.  Note that this list must be pretty small!
-# Different versions of the MS docs claim the file has a maximum size of
-# 256 or 512 bytes (including \r\n at the end of each line).
-# Note that "and", "or", "not" and "near" are operators in the search
-# language, so no point indexing them even if we wanted to.
-stopwords = """
-a  and  are  as  at
-be  but  by
-for
-if  in  into  is  it
-near  no  not
-of  on  or
-such
-that  the  their  then  there  these  they  this  to
-was  will  with
-""".split()
-
 # The following list includes only languages supported by Sphinx. See
 # https://docs.microsoft.com/en-us/previous-versions/windows/embedded/ms930130(v=msdn.10)
 # for more.
@@ -185,6 +133,63 @@ def chm_htmlescape(s, quote=True):
    return s


+class ToCTreeVisitor(nodes.NodeVisitor):
+    def __init__(self, document):
+        # type: (nodes.document) -> None
+        super().__init__(document)
+        self.body = []  # type: List[str]
+        self.depth = 0
+
+    def append(self, text):
+        # type: (str) -> None
+        indent = '  ' * (self.depth - 1)
+        self.body.append(indent + text)
+
+    def astext(self):
+        # type: () -> str
+        return '\n'.join(self.body)
+
+    def unknown_visit(self, node):
+        # type: (nodes.Node) -> None
+        pass
+
+    def unknown_departure(self, node):
+        # type: (nodes.Node) -> None
+        pass
+
+    def visit_bullet_list(self, node):
+        # type: (nodes.Element) -> None
+        if self.depth > 0:
+            self.append('<UL>')
+
+        self.depth += 1
+
+    def depart_bullet_list(self, node):
+        # type: (nodes.Element) -> None
+        self.depth -= 1
+        if self.depth > 0:
+            self.append('</UL>')
+
+    def visit_list_item(self, node):
+        # type: (nodes.Element) -> None
+        self.append('<LI>')
+        self.depth += 1
+
+    def depart_list_item(self, node):
+        # type: (nodes.Element) -> None
+        self.depth -= 1
+        self.append('</LI>')
+
+    def visit_reference(self, node):
+        # type: (nodes.Element) -> None
+        title = chm_htmlescape(node.astext(), True)
+        self.append('<OBJECT type="text/sitemap">')
+        self.append('  <PARAM name="Name" value="%s" />' % title)
+        self.append('  <PARAM name="Local" value="%s" />' % node['refuri'])
+        self.append('</OBJECT>')
+        raise nodes.SkipNode
+
+
 class HTMLHelpBuilder(StandaloneHTMLBuilder):
    """
    Builder that also outputs Windows HTML help project, contents and
@@ -234,6 +239,9 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):

    def handle_finish(self):
        # type: () -> None
+        self.copy_stopword_list()
+        self.build_project_file()
+        self.build_toc_file()
        self.build_hhx(self.outdir, self.config.htmlhelp_basename)

    def write_doc(self, docname, doctree):
@@ -245,78 +253,80 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):

        super().write_doc(docname, doctree)

-    def build_hhx(self, outdir, outname):
-        # type: (str, str) -> None
-        logger.info(__('dumping stopword list...'))
-        filename = path.join(outdir, outname + '.stp')
-        with open(filename, 'w', encoding=self.encoding, errors='xmlcharrefreplace') as f:
-            for word in sorted(stopwords):
-                print(word, file=f)
+    def render(self, name, context):
+        # type: (str, Dict) -> str
+        template = SphinxRenderer(template_dir)
+        return template.render(name, context)

-        logger.info(__('writing project file...'))
-        filename = path.join(outdir, outname + '.hhp')
+    @progress_message(__('copying stopword list'))
+    def copy_stopword_list(self):
+        # type: () -> None
+        """Copy a stopword list (.stp) to outdir.
+
+        The stopword list contains a list of words the full text search facility
+        shouldn't index.  Note that this list must be pretty small.  Different
+        versions of the MS docs claim the file has a maximum size of 256 or 512
+        bytes (including \r\n at the end of each line).  Note that "and", "or",
+        "not" and "near" are operators in the search language, so no point
+        indexing them even if we wanted to.
+        """
+        template = path.join(template_dir, 'project.stp')
+        filename = path.join(self.outdir, self.config.htmlhelp_basename + '.stp')
+        copy_asset_file(template, filename)
+
+    @progress_message(__('writing project file'))
+    def build_project_file(self):
+        # type: () -> None
+        """Create a project file (.hhp) on outdir."""
+        # scan project files
+        project_files = []  # type: List[str]
+        for root, dirs, files in os.walk(self.outdir):
+            dirs.sort()
+            files.sort()
+            in_staticdir = root.startswith(path.join(self.outdir, '_static'))
+            for fn in sorted(files):
+                if (in_staticdir and not fn.endswith('.js')) or fn.endswith('.html'):
+                    fn = relpath(path.join(root, fn), self.outdir)
+                    project_files.append(fn.replace(os.sep, '\\'))
+
+        filename = path.join(self.outdir, self.config.htmlhelp_basename + '.hhp')
        with open(filename, 'w', encoding=self.encoding, errors='xmlcharrefreplace') as f:
-            f.write(project_template % {
-                'outname': outname,
+            context = {
+                'outname': self.config.htmlhelp_basename,
                'title': self.config.html_title,
                'version': self.config.version,
                'project': self.config.project,
                'lcid': self.lcid,
-                'master_doc': self.config.master_doc + self.out_suffix
-            })
-            if not outdir.endswith(os.sep):
-                outdir += os.sep
-            olen = len(outdir)
-            for root, dirs, files in os.walk(outdir):
-                dirs.sort()
-                files.sort()
-                staticdir = root.startswith(path.join(outdir, '_static'))
-                for fn in sorted(files):
-                    if (staticdir and not fn.endswith('.js')) or \
-                       fn.endswith('.html'):
-                        print(path.join(root, fn)[olen:].replace(os.sep, '\\'),
-                              file=f)
+                'master_doc': self.config.master_doc + self.out_suffix,
+                'files': project_files,
+            }
+            body = self.render('project.hhp', context)
+            f.write(body)

-        logger.info(__('writing TOC file...'))
-        filename = path.join(outdir, outname + '.hhc')
+    @progress_message(__('writing TOC file'))
+    def build_toc_file(self):
+        # type: () -> None
+        """Create a ToC file (.hhp) on outdir."""
+        filename = path.join(self.outdir, self.config.htmlhelp_basename + '.hhc')
        with open(filename, 'w', encoding=self.encoding, errors='xmlcharrefreplace') as f:
-            f.write(contents_header)
-            # special books
-            f.write('<LI> ' + object_sitemap % (self.config.html_short_title,
-                                                self.config.master_doc + self.out_suffix))
-            for indexname, indexcls, content, collapse in self.domain_indices:
-                f.write('<LI> ' + object_sitemap % (indexcls.localname,
-                                                    '%s.html' % indexname))
-            # the TOC
-            tocdoc = self.env.get_and_resolve_doctree(
-                self.config.master_doc, self, prune_toctrees=False)
-
-            def write_toc(node, ullevel=0):
-                # type: (nodes.Node, int) -> None
-                if isinstance(node, nodes.list_item):
-                    f.write('<LI> ')
-                    for subnode in node:
-                        write_toc(subnode, ullevel)
-                elif isinstance(node, nodes.reference):
-                    link = node['refuri']
-                    title = chm_htmlescape(node.astext(), True)
-                    f.write(object_sitemap % (title, link))
-                elif isinstance(node, nodes.bullet_list):
-                    if ullevel != 0:
-                        f.write('<UL>\n')
-                    for subnode in node:
-                        write_toc(subnode, ullevel + 1)
-                    if ullevel != 0:
-                        f.write('</UL>\n')
-                elif isinstance(node, addnodes.compact_paragraph):
-                    for subnode in node:
-                        write_toc(subnode, ullevel)
-
+            toctree = self.env.get_and_resolve_doctree(self.config.master_doc, self,
+                                                       prune_toctrees=False)
+            visitor = ToCTreeVisitor(toctree)
            matcher = NodeMatcher(addnodes.compact_paragraph, toctree=True)
-            for node in tocdoc.traverse(matcher):  # type: addnodes.compact_paragraph
-                write_toc(node)
-            f.write(contents_footer)
+            for node in toctree.traverse(matcher):  # type: addnodes.compact_paragraph
+                node.walkabout(visitor)

+            context = {
+                'body': visitor.astext(),
+                'suffix': self.out_suffix,
+                'short_title': self.config.html_short_title,
+                'master_doc': self.config.master_doc,
+                'domain_indices': self.domain_indices,
+            }
+            f.write(self.render('project.hhc', context))
+
+    def build_hhx(self, outdir, outname):
+        # type: (str, str) -> None
        logger.info(__('writing index file...'))
        index = IndexEntries(self.env).create_index(self)
        filename = path.join(outdir, outname + '.hhk')
--- a/sphinx/templates/htmlhelp/project.hhc
+++ b/sphinx/templates/htmlhelp/project.hhc
@@ -0,0 +1,31 @@
+{%- macro sitemap(name, docname) -%}
+<OBJECT type="text/sitemap">
+  <PARAM name="Name" value="{{ name|e }}" />
+  <PARAM name="Local" value="{{ docname|e }}{{ suffix }}" />
+</OBJECT>
+{%- endmacro -%}
+
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<HTML>
+  <HEAD>
+    <META name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1" />
+    <!-- Sitemap 1.0 -->
+  </HEAD>
+  <BODY>
+    <OBJECT type="text/site properties">
+      <PARAM name="Window Styles" value="0x801227" />
+      <PARAM name="ImageType" value="Folder" />
+    </OBJECT>
+    <UL>
+      <LI>
+        {{ sitemap(short_title, master_doc)|indent(8) }}
+      </LI>
+      {%- for indexname, indexcls, content, collapse in domain_indices %}
+      <LI>
+        {{ sitemap(indexcls.localname, indexname)|indent(8) }}
+      </LI>
+      {%- endfor %}
+      {{ body|indent(6) }}
+    </UL>
+  </BODY>
+</HTML>
--- a/sphinx/templates/htmlhelp/project.hhp
+++ b/sphinx/templates/htmlhelp/project.hhp
@@ -0,0 +1,21 @@
+[OPTIONS]
+Binary TOC=No
+Binary Index=No
+Compiled file={{ outname }}.chm
+Contents file={{ outname }}.hhc
+Default Window={{ outname }}
+Default topic={{ master_doc }}
+Display compile progress=No
+Full text search stop list file={{ outname }}.stp
+Full-text search=Yes
+Index file={{ outname }}.hhk
+Language={{ "%#x"|format(lcid) }}
+Title={{ title }}
+
+[WINDOWS]
+{{ outname }}="{{ title }}","{{ outname }}.hhc","{{ outname }}.hhk","{{ master_doc }}","{{ master_doc }}",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0
+
+[FILES]
+{%- for filename in files %}
+{{ filename }}
+{%- endfor %}
--- a/sphinx/templates/htmlhelp/project.stp
+++ b/sphinx/templates/htmlhelp/project.stp
@@ -0,0 +1,33 @@
+a
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+near
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
--- a/tests/roots/test-htmlhelp-hhc/bar.rst
+++ b/tests/roots/test-htmlhelp-hhc/bar.rst
@@ -0,0 +1,2 @@
+bar
+---
--- a/tests/roots/test-htmlhelp-hhc/baz.rst
+++ b/tests/roots/test-htmlhelp-hhc/baz.rst
@@ -0,0 +1,2 @@
+baz
+---
--- a/tests/roots/test-htmlhelp-hhc/conf.py
+++ b/tests/roots/test-htmlhelp-hhc/conf.py
@@ -0,0 +1 @@
+html_short_title = "Sphinx's documentation"
--- a/tests/roots/test-htmlhelp-hhc/foo.rst
+++ b/tests/roots/test-htmlhelp-hhc/foo.rst
@@ -0,0 +1,6 @@
+foo
+---
+
+.. toctree::
+
+   bar
--- a/tests/roots/test-htmlhelp-hhc/index.rst
+++ b/tests/roots/test-htmlhelp-hhc/index.rst
@@ -0,0 +1,15 @@
+test-htmlhelp-domain_indices
+----------------------------
+
+section
+~~~~~~~
+
+.. py:module:: sphinx
+
+subsection
+^^^^^^^^^^
+
+.. toctree::
+
+   foo
+   baz
--- a/tests/test_build_htmlhelp.py
+++ b/tests/test_build_htmlhelp.py
@@ -11,13 +11,35 @@
 import re

 import pytest
+from html5lib import HTMLParser

-from sphinx.builders.htmlhelp import chm_htmlescape
-
-from sphinx.builders.htmlhelp import default_htmlhelp_basename
+from sphinx.builders.htmlhelp import chm_htmlescape, default_htmlhelp_basename
 from sphinx.config import Config


+@pytest.mark.sphinx('htmlhelp', testroot='basic')
+def test_build_htmlhelp(app, status, warning):
+    app.build()
+
+    hhp = (app.outdir / 'pythondoc.hhp').text()
+    assert 'Compiled file=pythondoc.chm' in hhp
+    assert 'Contents file=pythondoc.hhc' in hhp
+    assert 'Default Window=pythondoc' in hhp
+    assert 'Default topic=index.html' in hhp
+    assert 'Full text search stop list file=pythondoc.stp' in hhp
+    assert 'Index file=pythondoc.hhk' in hhp
+    assert 'Language=0x409' in hhp
+    assert 'Title=Python  documentation' in hhp
+    assert ('pythondoc="Python  documentation","pythondoc.hhc",'
+            '"pythondoc.hhk","index.html","index.html",,,,,'
+            '0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0' in hhp)
+
+    files = ['genindex.html', 'index.html', '_static\\alabaster.css', '_static\\basic.css',
+             '_static\\custom.css', '_static\\file.png', '_static\\minus.png',
+             '_static\\plus.png', '_static\\pygments.css']
+    assert '[FILES]\n%s' % '\n'.join(files) in hhp
+
+
@pytest.mark.sphinx('htmlhelp', testroot='basic')
 def test_default_htmlhelp_file_suffix(app, warning):
    assert app.builder.out_suffix == '.html'
@@ -49,6 +71,52 @@ def test_chm(app):
    assert m is None, 'Hex escaping exists in .hhk file: ' + str(m.group(0))


+@pytest.mark.sphinx('htmlhelp', testroot='htmlhelp-hhc')
+def test_htmlhelp_hhc(app):
+    app.build()
+
+    def assert_sitemap(node, name, filename):
+        assert node.tag == 'object'
+        assert len(node) == 2
+        assert node[0].tag == 'param'
+        assert node[0].attrib == {'name': 'Name',  'value': name}
+        assert node[1].tag == 'param'
+        assert node[1].attrib == {'name': 'Local', 'value': filename}
+
+    # .hhc file
+    hhc = (app.outdir / 'pythondoc.hhc').text()
+    tree = HTMLParser(namespaceHTMLElements=False).parse(hhc)
+    items = tree.find('.//body/ul')
+    assert len(items) == 4
+
+    # index
+    assert items[0].tag == 'li'
+    assert len(items[0]) == 1
+    assert_sitemap(items[0][0], "Sphinx's documentation", 'index.html')
+
+    # py-modindex
+    assert items[1].tag == 'li'
+    assert len(items[1]) == 1
+    assert_sitemap(items[1][0], 'Python Module Index', 'py-modindex.html')
+
+    # toctree
+    assert items[2].tag == 'li'
+    assert len(items[2]) == 2
+    assert_sitemap(items[2][0], 'foo', 'foo.html')
+
+    assert items[2][1].tag == 'ul'
+    assert len(items[2][1]) == 1
+    assert items[2][1][0].tag == 'li'
+    assert_sitemap(items[2][1][0][0], 'bar', 'bar.html')
+
+    assert items[3].tag == 'li'
+    assert len(items[3]) == 1
+    assert_sitemap(items[3][0], 'baz', 'baz.html')
+
+    # single quotes should be escaped as decimal (&#39;)
+    assert "Sphinx&#39;s documentation" in hhc
+
+
 def test_chm_htmlescape():
    assert chm_htmlescape('Hello world') == 'Hello world'
    assert chm_htmlescape(u'Unicode 文字') == u'Unicode 文字'
				`@@ -0,0 +1 @@`
				`html_short_title = "Sphinx's documentation"`