From fe7c30956c5cc00a90b8cf62e3caf03019efee59 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <python@rcn.com>
Date: Thu, 15 Sep 2005 05:28:25 +0000
Subject: Sync-up with patches to the head. Includes SF 1016880:
 urllib.urlretrieve silently truncates downloads and the performance fix-ups.

---
 Lib/urllib.py | 114 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 61 insertions(+), 53 deletions(-)
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 46045c3..bc16be0 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -86,6 +86,11 @@ def urlcleanup():
     if _urlopener:
         _urlopener.cleanup()
 
+# exception raised when downloaded size does not match content-length
+class ContentTooShortError(IOError):
+    def __init__(self, message, content):
+        IOError.__init__(self, message)
+        self.content = content
 
 ftpcache = {}
 class URLopener:
@@ -228,6 +233,7 @@ class URLopener:
             self.tempcache[url] = result
         bs = 1024*8
         size = -1
+        read = 0
         blocknum = 0
         if reporthook:
             if "content-length" in headers:
@@ -237,6 +243,7 @@ class URLopener:
             block = fp.read(bs)
             if block == "":
                 break
+            read += len(block)
             tfp.write(block)
             blocknum += 1
             if reporthook:
@@ -245,6 +252,12 @@ class URLopener:
         tfp.close()
         del fp
         del tfp
+
+        # raise exception if actual size does not match content-length header
+        if size >= 0 and read < size:
+            raise ContentTooShortError("retrieval incomplete: got only %i out "
+                                       "of %i bytes" % (read, size), result)
+
         return result
 
     # Each method named open_<type> knows how to open that type of URL
@@ -409,7 +422,11 @@ class URLopener:
 
     def open_local_file(self, url):
         """Use local file."""
-        import mimetypes, mimetools, email.Utils, StringIO
+        import mimetypes, mimetools, email.Utils
+        try:
+            from cStringIO import StringIO
+        except ImportError:
+            from StringIO import StringIO
         host, file = splithost(url)
         localname = url2pathname(file)
         try:
@@ -419,7 +436,7 @@ class URLopener:
         size = stats.st_size
         modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
         mtype = mimetypes.guess_type(url)[0]
-        headers = mimetools.Message(StringIO.StringIO(
+        headers = mimetools.Message(StringIO(
             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
             (mtype or 'text/plain', size, modified)))
         if not host:
@@ -440,7 +457,11 @@ class URLopener:
 
     def open_ftp(self, url):
         """Use FTP protocol."""
-        import mimetypes, mimetools, StringIO
+        import mimetypes, mimetools
+        try:
+            from cStringIO import StringIO
+        except ImportError:
+            from StringIO import StringIO
         host, path = splithost(url)
         if not host: raise IOError, ('ftp error', 'no host given')
         host, port = splitport(host)
@@ -489,7 +510,7 @@ class URLopener:
                 headers += "Content-Type: %s\n" % mtype
             if retrlen is not None and retrlen >= 0:
                 headers += "Content-Length: %d\n" % retrlen
-            headers = mimetools.Message(StringIO.StringIO(headers))
+            headers = mimetools.Message(StringIO(headers))
             return addinfourl(fp, headers, "ftp:" + url)
         except ftperrors(), msg:
             raise IOError, ('ftp error', msg), sys.exc_info()[2]
@@ -503,7 +524,11 @@ class URLopener:
         # mediatype := [ type "/" subtype ] *( ";" parameter )
         # data      := *urlchar
         # parameter := attribute "=" value
-        import StringIO, mimetools
+        import mimetools
+        try:
+            from cStringIO import StringIO
+        except ImportError:
+            from StringIO import StringIO
         try:
             [type, data] = url.split(',', 1)
         except ValueError:
@@ -529,7 +554,7 @@ class URLopener:
         msg.append('')
         msg.append(data)
         msg = '\n'.join(msg)
-        f = StringIO.StringIO(msg)
+        f = StringIO(msg)
         headers = mimetools.Message(f, 0)
         f.fileno = None     # needed for addinfourl
         return addinfourl(f, headers, url)
@@ -696,8 +721,11 @@ def noheaders():
     global _noheaders
     if _noheaders is None:
         import mimetools
-        import StringIO
-        _noheaders = mimetools.Message(StringIO.StringIO(), 0)
+        try:
+            from cStringIO import StringIO
+        except ImportError:
+            from StringIO import StringIO
+        _noheaders = mimetools.Message(StringIO(), 0)
         _noheaders.fp.close()   # Recycle file descriptor
     return _noheaders
 
@@ -1021,23 +1049,18 @@ def splitgophertype(selector):
         return selector[1], selector[2:]
     return None, selector
 
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
 def unquote(s):
     """unquote('abc%20def') -> 'abc def'."""
-    mychr = chr
-    myatoi = int
-    list = s.split('%')
-    res = [list[0]]
-    myappend = res.append
-    del list[0]
-    for item in list:
-        if item[1:2]:
-            try:
-                myappend(mychr(myatoi(item[:2], 16))
-                     + item[2:])
-            except ValueError:
-                myappend('%' + item)
-        else:
-            myappend('%' + item)
+    res = s.split('%')
+    for i in xrange(1, len(res)):
+        item = res[i]
+        try:
+            res[i] = _hextochr[item[:2]] + item[2:]
+        except KeyError:
+            res[i] = '%' + item
     return "".join(res)
 
 def unquote_plus(s):
@@ -1048,22 +1071,7 @@ def unquote_plus(s):
 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                'abcdefghijklmnopqrstuvwxyz'
                '0123456789' '_.-')
-
-_fast_safe_test = always_safe + '/'
-_fast_safe = None
-
-def _fast_quote(s):
-    global _fast_safe
-    if _fast_safe is None:
-        _fast_safe = {}
-        for c in _fast_safe_test:
-            _fast_safe[c] = c
-    res = list(s)
-    for i in range(len(res)):
-        c = res[i]
-        if not c in _fast_safe:
-            res[i] = '%%%02X' % ord(c)
-    return ''.join(res)
+_safemaps = {}
 
 def quote(s, safe = '/'):
     """quote('abc def') -> 'abc%20def'
@@ -1086,25 +1094,25 @@ def quote(s, safe = '/'):
     called on a path where the existing slash characters are used as
     reserved characters.
     """
-    safe = always_safe + safe
-    if _fast_safe_test == safe:
-        return _fast_quote(s)
-    res = list(s)
-    for i in range(len(res)):
-        c = res[i]
-        if c not in safe:
-            res[i] = '%%%02X' % ord(c)
+    cachekey = (safe, always_safe)
+    try:
+        safe_map = _safemaps[cachekey]
+    except KeyError:
+        safe += always_safe
+        safe_map = {}
+        for i in range(256):
+            c = chr(i)
+            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+        _safemaps[cachekey] = safe_map
+    res = map(safe_map.__getitem__, s)
     return ''.join(res)
 
 def quote_plus(s, safe = ''):
     """Quote the query fragment of a URL; replacing ' ' with '+'"""
     if ' ' in s:
-        l = s.split(' ')
-        for i in range(len(l)):
-            l[i] = quote(l[i], safe)
-        return '+'.join(l)
-    else:
-        return quote(s, safe)
+        s = quote(s, safe + ' ')
+        return s.replace(' ', '+')
+    return quote(s, safe)
 
 def urlencode(query,doseq=0):
     """Encode a sequence of two-element tuples or dictionary into a URL query string.
-- 
cgit v0.12