HTML builder: compress inventory, which makes e.g. the Python inventory 85k instead of 450k.

2025-02-25 18:55:22 -06:00 · 2009-09-08 00:40:06 +02:00
parent 92aa430241
commit 08586e6775
2 changed files with 48 additions and 13 deletions
--- a/sphinx/builders/html.py
+++ b/sphinx/builders/html.py
@@ -10,6 +10,7 @@
 """

 import os
+import zlib
 import codecs
 import posixpath
 import cPickle as pickle
@@ -711,15 +712,22 @@ class StandaloneHTMLBuilder(Builder):
        self.info('done')

        self.info(bold('dumping object inventory... '), nonl=True)
-        f = open(path.join(self.outdir, INVENTORY_FILENAME), 'w')
+        f = open(path.join(self.outdir, INVENTORY_FILENAME), 'wb')
        try:
            f.write('# Sphinx inventory version 2\n')
            f.write('# Project: %s\n' % self.config.project.encode('utf-8'))
            f.write('# Version: %s\n' % self.config.version)
+            f.write('# The remainder of this file is compressed using zlib.\n')
+            compressor = zlib.compressobj(9)
            for domainname, domain in self.env.domains.iteritems():
                for name, type, docname, anchor, prio in domain.get_objects():
-                    f.write('%s %s:%s %s %s\n' % (name, domainname, type, prio,
-                            self.get_target_uri(docname) + '#' + anchor))
+                    if anchor.endswith(name):
+                        # this can shorten the inventory by as much as 25%
+                        anchor = anchor[:-len(name)] + '$'
+                    f.write(compressor.compress(
+                        '%s %s:%s %s %s\n' % (name, domainname, type, prio,
+                        self.get_target_uri(docname) + '#' + anchor)))
+            f.write(compressor.flush())
        finally:
            f.close()
        self.info('done')
--- a/sphinx/ext/intersphinx.py
+++ b/sphinx/ext/intersphinx.py
@@ -25,6 +25,7 @@
 """

 import time
+import zlib
 import urllib2
 import posixpath
 from os import path
@@ -62,12 +63,35 @@ def fetch_inventory_v1(f, uri, join):

 def fetch_inventory_v2(f, uri, join):
    invdata = {}
-    line = f.next()
+    line = f.readline()
    projname = line.rstrip()[11:].decode('utf-8')
-    line = f.next()
+    line = f.readline()
    version = line.rstrip()[11:]
-    for line in f:
+    line = f.readline()
+    if 'zlib' not in line:
+        raise ValueError
+
+    def read_chunks():
+        decompressor = zlib.decompressobj()
+        for chunk in iter(lambda: f.read(16 * 1024), ''):
+            yield decompressor.decompress(chunk)
+        yield decompressor.flush()
+
+    def split_lines(iter):
+        buf = ''
+        for chunk in iter:
+            buf += chunk
+            lineend = buf.find('\n')
+            while lineend != -1:
+                yield buf[:lineend]
+                buf = buf[lineend+1:]
+                lineend = buf.find('\n')
+        assert not buf
+
+    for line in split_lines(read_chunks()):
        name, type, prio, location = line.rstrip().split(None, 3)
+        if location.endswith('$'):
+            location = location[:-1] + name
        location = join(uri, location)
        invdata.setdefault(type, {})[name] = (projname, version, location)
    return invdata
@@ -89,15 +113,18 @@ def fetch_inventory(app, uri, inv):
                 '%s: %s' % (inv, err.__class__, err))
        return
    try:
-        line = f.next().rstrip()
-        if line == '# Sphinx inventory version 1':
-            invdata = fetch_inventory_v1(f, uri, join)
-        elif line == '# Sphinx inventory version 2':
-            invdata = fetch_inventory_v2(f, uri, join)
-        else:
+        line = f.readline().rstrip()
+        try:
+            if line == '# Sphinx inventory version 1':
+                invdata = fetch_inventory_v1(f, uri, join)
+            elif line == '# Sphinx inventory version 2':
+                invdata = fetch_inventory_v2(f, uri, join)
+            else:
+                raise ValueError
+            f.close()
+        except ValueError:
            f.close()
            raise ValueError('unknown or unsupported inventory version')
-        f.close()
    except Exception, err:
        app.warn('intersphinx inventory %r not readable due to '
                 '%s: %s' % (inv, err.__class__.__name__, err))