fixed file handling and parsing in intersphinx so it properly handles encodings

This commit is contained in:
Daniel Neuhäuser 2010-06-07 01:34:01 +02:00
parent 5727214299
commit 817e1dd4cd
2 changed files with 29 additions and 15 deletions

View File

@ -28,12 +28,20 @@ import time
import zlib import zlib
import urllib2 import urllib2
import posixpath import posixpath
import codecs
import sys
from os import path from os import path
from docutils import nodes from docutils import nodes
from sphinx.builders.html import INVENTORY_FILENAME from sphinx.builders.html import INVENTORY_FILENAME
if sys.version_info >= (3, 0):
def b(s):
return s.encode('utf-8')
else:
b = str
handlers = [urllib2.ProxyHandler(), urllib2.HTTPRedirectHandler(), handlers = [urllib2.ProxyHandler(), urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler()] urllib2.HTTPHandler()]
try: try:
@ -43,11 +51,14 @@ except NameError:
urllib2.install_opener(urllib2.build_opener(*handlers)) urllib2.install_opener(urllib2.build_opener(*handlers))
UTF8StreamReader = codecs.lookup('utf-8')[2]
def read_inventory_v1(f, uri, join): def read_inventory_v1(f, uri, join):
f = UTF8StreamReader(f)
invdata = {} invdata = {}
line = f.next() line = f.next()
projname = line.rstrip()[11:].decode('utf-8') projname = line.rstrip()[11:]
line = f.next() line = f.next()
version = line.rstrip()[11:] version = line.rstrip()[11:]
for line in f: for line in f:
@ -70,25 +81,25 @@ def read_inventory_v2(f, uri, join, bufsize=16*1024):
projname = line.rstrip()[11:].decode('utf-8') projname = line.rstrip()[11:].decode('utf-8')
line = f.readline() line = f.readline()
version = line.rstrip()[11:].decode('utf-8') version = line.rstrip()[11:].decode('utf-8')
line = f.readline() line = f.readline().decode('utf-8')
if 'zlib' not in line: if 'zlib' not in line:
raise ValueError raise ValueError
def read_chunks(): def read_chunks():
decompressor = zlib.decompressobj() decompressor = zlib.decompressobj()
for chunk in iter(lambda: f.read(bufsize), ''): for chunk in iter(lambda: f.read(bufsize), b('')):
yield decompressor.decompress(chunk) yield decompressor.decompress(chunk)
yield decompressor.flush() yield decompressor.flush()
def split_lines(iter): def split_lines(iter):
buf = '' buf = b('')
for chunk in iter: for chunk in iter:
buf += chunk buf += chunk
lineend = buf.find('\n') lineend = buf.find(b('\n'))
while lineend != -1: while lineend != -1:
yield buf[:lineend].decode('utf-8') yield buf[:lineend].decode('utf-8')
buf = buf[lineend+1:] buf = buf[lineend+1:]
lineend = buf.find('\n') lineend = buf.find(b('\n'))
assert not buf assert not buf
for line in split_lines(read_chunks()): for line in split_lines(read_chunks()):
@ -111,13 +122,13 @@ def fetch_inventory(app, uri, inv):
if inv.find('://') != -1: if inv.find('://') != -1:
f = urllib2.urlopen(inv) f = urllib2.urlopen(inv)
else: else:
f = open(path.join(app.srcdir, inv)) f = open(path.join(app.srcdir, inv), 'rb')
except Exception, err: except Exception, err:
app.warn('intersphinx inventory %r not fetchable due to ' app.warn('intersphinx inventory %r not fetchable due to '
'%s: %s' % (inv, err.__class__, err)) '%s: %s' % (inv, err.__class__, err))
return return
try: try:
line = f.readline().rstrip() line = f.readline().rstrip().decode('utf-8')
try: try:
if line == '# Sphinx inventory version 1': if line == '# Sphinx inventory version 1':
invdata = read_inventory_v1(f, uri, join) invdata = read_inventory_v1(f, uri, join)

View File

@ -11,7 +11,10 @@
import zlib import zlib
import posixpath import posixpath
from cStringIO import StringIO try:
from io import BytesIO
except ImportError:
from cStringIO import StringIO as BytesIO
from docutils import nodes from docutils import nodes
@ -28,23 +31,23 @@ inventory_v1 = '''\
# Version: 1.0 # Version: 1.0
module mod foo.html module mod foo.html
module.cls class foo.html module.cls class foo.html
''' '''.encode('utf-8')
inventory_v2 = '''\ inventory_v2 = '''\
# Sphinx inventory version 2 # Sphinx inventory version 2
# Project: foo # Project: foo
# Version: 2.0 # Version: 2.0
# The remainder of this file is compressed with zlib. # The remainder of this file is compressed with zlib.
''' + zlib.compress('''\ '''.encode('utf-8') + zlib.compress('''\
module1 py:module 0 foo.html#module-module1 Long Module desc module1 py:module 0 foo.html#module-module1 Long Module desc
module2 py:module 0 foo.html#module-$ - module2 py:module 0 foo.html#module-$ -
module1.func py:function 1 sub/foo.html#$ - module1.func py:function 1 sub/foo.html#$ -
CFunc c:function 2 cfunc.html#CFunc - CFunc c:function 2 cfunc.html#CFunc -
''') '''.encode('utf-8'))
def test_read_inventory_v1(): def test_read_inventory_v1():
f = StringIO(inventory_v1) f = BytesIO(inventory_v1)
f.readline() f.readline()
invdata = read_inventory_v1(f, '/util', posixpath.join) invdata = read_inventory_v1(f, '/util', posixpath.join)
assert invdata['py:module']['module'] == \ assert invdata['py:module']['module'] == \
@ -54,12 +57,12 @@ def test_read_inventory_v1():
def test_read_inventory_v2(): def test_read_inventory_v2():
f = StringIO(inventory_v2) f = BytesIO(inventory_v2)
f.readline() f.readline()
invdata1 = read_inventory_v2(f, '/util', posixpath.join) invdata1 = read_inventory_v2(f, '/util', posixpath.join)
# try again with a small buffer size to test the chunking algorithm # try again with a small buffer size to test the chunking algorithm
f = StringIO(inventory_v2) f = BytesIO(inventory_v2)
f.readline() f.readline()
invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5) invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5)