From 5584863d18b5b0ffed9bb75a4a2cc7b11df8863d Mon Sep 17 00:00:00 2001 From: Cole Robinson Date: Fri, 18 Sep 2015 18:31:56 -0400 Subject: [PATCH] urlfetcher: Switch to requests and urllib2 instead of urlgrabber urlgrabber is largely dead upstream and isn't going to be ported to python3 AFAIK. So we will need to move off of it eventually. Use requests for http handling which is the most common library nowadays, and just plain old urllib2 for ftp fetching. --- virt-manager.spec.in | 1 + virtinst/cli.py | 2 + virtinst/urlfetcher.py | 128 +++++++++++++++++++++++++++++------------ 3 files changed, 94 insertions(+), 37 deletions(-) diff --git a/virt-manager.spec.in b/virt-manager.spec.in index 3e4ace087..e23a9d4f2 100644 --- a/virt-manager.spec.in +++ b/virt-manager.spec.in @@ -70,6 +70,7 @@ Group: Applications/Emulators # however varying amounts of functionality will not be enabled. Requires: libvirt-python >= 0.7.0 Requires: libxml2-python +Requires: python-requests Requires: python-urlgrabber Requires: python-ipaddr Requires: libosinfo >= 0.2.10 diff --git a/virtinst/cli.py b/virtinst/cli.py index c219c2705..6b18d32e7 100644 --- a/virtinst/cli.py +++ b/virtinst/cli.py @@ -250,6 +250,8 @@ def setupLogging(appname, debug_stdout, do_quiet, cli_app=True): sys.__excepthook__(typ, val, tb) sys.excepthook = exception_log + logging.getLogger("requests").setLevel(logging.ERROR) + # Log the app command string logging.debug("Launched with command line: %s", " ".join(sys.argv)) diff --git a/virtinst/urlfetcher.py b/virtinst/urlfetcher.py index b9ffeebef..afb8326e5 100644 --- a/virtinst/urlfetcher.py +++ b/virtinst/urlfetcher.py @@ -24,6 +24,7 @@ import ftplib import logging import os import re +import requests import stat import StringIO import subprocess @@ -31,8 +32,6 @@ import tempfile import urllib2 import urlparse -import urlgrabber.grabber as grabber - from .osdict import OSDB @@ -45,6 +44,8 @@ class _URLFetcher(object): This is a generic base class for fetching/extracting files from a media source, such as CD ISO, NFS server, or HTTP/FTP server """ + _block_size = 16384 + def __init__(self, location, scratchdir, meter): self.location = location self.scratchdir = scratchdir @@ -54,6 +55,7 @@ class _URLFetcher(object): logging.debug("Using scratchdir=%s", scratchdir) + #################### # Internal helpers # #################### @@ -71,33 +73,49 @@ class _URLFetcher(object): ret += "/" return ret + filename - def _writeURLToFileobj(self, urlobj, fileobj): + def _grabURL(self, filename, fileobj): """ - Write the urlobj contents into the passed python style file object - """ - block_size = 16384 - while 1: - buff = urlobj.read(block_size) - if not buff: - break - fileobj.write(buff) - - def _grabURL(self, filename): - """ - Return the urlobj from grabber.urlopen + Download the filename from self.location, and write contents to + fileobj """ url = self._make_full_url(filename) - base = os.path.basename(filename) - logging.debug("Fetching URI: %s", url) try: - return grabber.urlopen(url, - progress_obj=self.meter, - text=_("Retrieving file %s...") % base) + urlobj, size = self._grabber(url) except Exception, e: raise ValueError(_("Couldn't acquire file %s: %s") % (url, str(e))) + logging.debug("Fetching URI: %s", url) + self.meter.start( + text=_("Retrieving file %s...") % os.path.basename(filename), + size=size) + + total = self._write(urlobj, fileobj) + self.meter.end(total) + + def _write(self, urlobj, fileobj): + """ + Write the contents of urlobj to python file like object fileobj + """ + total = 0 + while 1: + buff = urlobj.read(self._block_size) + if not buff: + break + fileobj.write(buff) + total += len(buff) + self.meter.update(total) + return total + + def _grabber(self, url): + """ + Returns the urlobj, size for the passed URL. urlobj is whatever + data needs to be passed to self._write + """ + raise NotImplementedError("must be implemented in subclass") + + ############## # Public API # ############## @@ -125,43 +143,64 @@ class _URLFetcher(object): Grab the passed filename from self.location and save it to a temporary file, returning the temp filename """ - urlobj = self._grabURL(filename) prefix = "virtinst-" + os.path.basename(filename) + "." if "VIRTINST_TEST_SUITE" in os.environ: - filename = os.path.join("/tmp", prefix) - fileobj = file(filename, "w+b") + fn = os.path.join("/tmp", prefix) + fileobj = file(fn, "w") else: fileobj = tempfile.NamedTemporaryFile( dir=self.scratchdir, prefix=prefix, delete=False) - filename = fileobj.name + fn = fileobj.name - self._writeURLToFileobj(urlobj, fileobj) - logging.debug("Saved file to " + filename) - return filename + self._grabURL(filename, fileobj) + logging.debug("Saved file to " + fn) + return fn def acquireFileContent(self, filename): """ Grab the passed filename from self.location and return it as a string """ fileobj = StringIO.StringIO() - urlobj = self._grabURL(filename) - self._writeURLToFileobj(urlobj, fileobj) + self._grabURL(filename, fileobj) return fileobj.getvalue() class _HTTPURLFetcher(_URLFetcher): def hasFile(self, filename): + """ + We just do a HEAD request to see if the file exists + """ url = self._make_full_url(filename) try: - request = urllib2.Request(url) - request.get_method = lambda: "HEAD" - urllib2.urlopen(request) + response = requests.head(url) + response.raise_for_status() except Exception, e: logging.debug("HTTP hasFile: didn't find %s: %s", url, str(e)) return False return True + def _grabber(self, url): + """ + Use requests for this + """ + response = requests.get(url, stream=True) + response.raise_for_status() + size = response.headers.get('content-length') + return response, size.isdigit() and int(size) or None + + def _write(self, urlobj, fileobj): + """ + The requests object doesn't have a file-like read() option, so + we need to implemente it ourselves + """ + total = 0 + for data in urlobj.iter_content(chunk_size=self._block_size): + fileobj.write(data) + total += len(data) + self.meter.update(total) + return total + class _FTPURLFetcher(_URLFetcher): _ftp = None @@ -171,13 +210,23 @@ class _FTPURLFetcher(_URLFetcher): return try: - urlret = urlparse.urlparse(self._make_full_url("")) - self._ftp = ftplib.FTP(urlret[1]) + server = urlparse.urlparse(self.location)[1] + self._ftp = ftplib.FTP(server) self._ftp.login() except Exception, e: raise ValueError(_("Opening URL %s failed: %s.") % (self.location, str(e))) + def _grabber(self, url): + """ + Use urllib2 and ftplib to grab the file + """ + request = urllib2.Request(url) + urlobj = urllib2.urlopen(request) + size = self._ftp.size(urlparse.urlparse(url)[2]) + return urlobj, size + + def cleanupLocation(self): if not self._ftp: return @@ -191,15 +240,15 @@ class _FTPURLFetcher(_URLFetcher): def hasFile(self, filename): url = self._make_full_url(filename) - urlret = urlparse.urlparse(url) + path = urlparse.urlparse(url)[2] try: try: # If it's a file - self._ftp.size(urlret[2]) + self._ftp.size(path) except ftplib.all_errors: # If it's a dir - self._ftp.cwd(urlret[2]) + self._ftp.cwd(path) except ftplib.all_errors, e: logging.debug("FTP hasFile: couldn't access %s: %s", url, str(e)) @@ -219,6 +268,11 @@ class _LocalURLFetcher(_URLFetcher): logging.debug("local hasFile: Couldn't find %s", url) return ret + def _grabber(self, url): + urlobj = file(url, "r") + size = os.path.getsize(url) + return urlobj, size + class _MountedURLFetcher(_LocalURLFetcher): """