fixed file handling and parsing in intersphinx so it properly handles encodings

This commit is contained in:
Daniel Neuhäuser 2010-06-07 01:34:01 +02:00
parent 5727214299
commit 817e1dd4cd
2 changed files with 29 additions and 15 deletions

View File

@ -28,12 +28,20 @@ import time
import zlib
import urllib2
import posixpath
import codecs
import sys
from os import path
from docutils import nodes
from sphinx.builders.html import INVENTORY_FILENAME
if sys.version_info >= (3, 0):
def b(s):
return s.encode('utf-8')
else:
b = str
handlers = [urllib2.ProxyHandler(), urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler()]
try:
@ -43,11 +51,14 @@ except NameError:
urllib2.install_opener(urllib2.build_opener(*handlers))
UTF8StreamReader = codecs.lookup('utf-8')[2]
def read_inventory_v1(f, uri, join):
f = UTF8StreamReader(f)
invdata = {}
line = f.next()
projname = line.rstrip()[11:].decode('utf-8')
projname = line.rstrip()[11:]
line = f.next()
version = line.rstrip()[11:]
for line in f:
@ -70,25 +81,25 @@ def read_inventory_v2(f, uri, join, bufsize=16*1024):
projname = line.rstrip()[11:].decode('utf-8')
line = f.readline()
version = line.rstrip()[11:].decode('utf-8')
line = f.readline()
line = f.readline().decode('utf-8')
if 'zlib' not in line:
raise ValueError
def read_chunks():
decompressor = zlib.decompressobj()
for chunk in iter(lambda: f.read(bufsize), ''):
for chunk in iter(lambda: f.read(bufsize), b('')):
yield decompressor.decompress(chunk)
yield decompressor.flush()
def split_lines(iter):
buf = ''
buf = b('')
for chunk in iter:
buf += chunk
lineend = buf.find('\n')
lineend = buf.find(b('\n'))
while lineend != -1:
yield buf[:lineend].decode('utf-8')
buf = buf[lineend+1:]
lineend = buf.find('\n')
lineend = buf.find(b('\n'))
assert not buf
for line in split_lines(read_chunks()):
@ -111,13 +122,13 @@ def fetch_inventory(app, uri, inv):
if inv.find('://') != -1:
f = urllib2.urlopen(inv)
else:
f = open(path.join(app.srcdir, inv))
f = open(path.join(app.srcdir, inv), 'rb')
except Exception, err:
app.warn('intersphinx inventory %r not fetchable due to '
'%s: %s' % (inv, err.__class__, err))
return
try:
line = f.readline().rstrip()
line = f.readline().rstrip().decode('utf-8')
try:
if line == '# Sphinx inventory version 1':
invdata = read_inventory_v1(f, uri, join)

View File

@ -11,7 +11,10 @@
import zlib
import posixpath
from cStringIO import StringIO
try:
from io import BytesIO
except ImportError:
from cStringIO import StringIO as BytesIO
from docutils import nodes
@ -28,23 +31,23 @@ inventory_v1 = '''\
# Version: 1.0
module mod foo.html
module.cls class foo.html
'''
'''.encode('utf-8')
inventory_v2 = '''\
# Sphinx inventory version 2
# Project: foo
# Version: 2.0
# The remainder of this file is compressed with zlib.
''' + zlib.compress('''\
'''.encode('utf-8') + zlib.compress('''\
module1 py:module 0 foo.html#module-module1 Long Module desc
module2 py:module 0 foo.html#module-$ -
module1.func py:function 1 sub/foo.html#$ -
CFunc c:function 2 cfunc.html#CFunc -
''')
'''.encode('utf-8'))
def test_read_inventory_v1():
f = StringIO(inventory_v1)
f = BytesIO(inventory_v1)
f.readline()
invdata = read_inventory_v1(f, '/util', posixpath.join)
assert invdata['py:module']['module'] == \
@ -54,12 +57,12 @@ def test_read_inventory_v1():
def test_read_inventory_v2():
f = StringIO(inventory_v2)
f = BytesIO(inventory_v2)
f.readline()
invdata1 = read_inventory_v2(f, '/util', posixpath.join)
# try again with a small buffer size to test the chunking algorithm
f = StringIO(inventory_v2)
f = BytesIO(inventory_v2)
f.readline()
invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5)