summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2005-08-24 18:46:39 (GMT)
committerGeorg Brandl <georg@python.org>2005-08-24 18:46:39 (GMT)
commitb925602f169d47270a064cf9eb03e21706ed25c3 (patch)
treeeb4e791e8253f66340e95dd7f19f57a0d9112f5c
parent568973181aa523bbcf7f827b3a2eb2affd96ea67 (diff)
downloadcpython-b925602f169d47270a064cf9eb03e21706ed25c3.zip
cpython-b925602f169d47270a064cf9eb03e21706ed25c3.tar.gz
cpython-b925602f169d47270a064cf9eb03e21706ed25c3.tar.bz2
Patch [ 1062060 ] fix for 1016880 urllib.urlretrieve silently truncates dwnld
-rw-r--r--Doc/lib/liburllib.tex30
-rw-r--r--Lib/urllib.py16
-rw-r--r--Misc/NEWS4
3 files changed, 48 insertions, 2 deletions
diff --git a/Doc/lib/liburllib.tex b/Doc/lib/liburllib.tex
index dafdd91..5e488c4 100644
--- a/Doc/lib/liburllib.tex
+++ b/Doc/lib/liburllib.tex
@@ -142,6 +142,25 @@ If the \var{url} uses the \file{http:} scheme identifier, the optional
(normally the request type is \code{GET}). The \var{data} argument
must in standard \mimetype{application/x-www-form-urlencoded} format;
see the \function{urlencode()} function below.
+
+\versionchanged[
+\function{urlretrieve()} will raise \exception{ContentTooShortError}
+when it detects that the amount of data available
+was less than the expected amount (which is the size reported by a
+\var{Content-Length} header). This can occur, for example, when the
+download is interrupted.
+
+The \var{Content-Length} is treated as a lower bound: if there's more data
+to read, urlretrieve reads more data, but if less data is available,
+it raises the exception.
+
+You can still retrieve the downloaded data in this case, it is stored
+in the \member{content} attribute of the exception instance.
+
+If no \var{Content-Length} header was supplied, urlretrieve can
+not check the size of the data it has downloaded, and just returns it.
+In this case you just have to assume that the download was successful]{2.5}
+
\end{funcdesc}
\begin{datadesc}{_urlopener}
@@ -283,6 +302,15 @@ subclass may override this method to support more appropriate behavior
if needed.}
\end{classdesc}
+\begin{excclassdesc}{ContentTooShortError}{msg\optional{, content}}
+This exception is raised when the \function{urlretrieve()} function
+detects that the amount of the downloaded data is less than the
+expected amount (given by the \var{Content-Length} header). The
+\member{content} attribute stores the downloaded (and supposedly
+truncated) data.
+\versionadded{2.5}
+\end{excclassdesc}
+
Restrictions:
\begin{itemize}
@@ -317,7 +345,7 @@ Web client using these functions without using threads.
\item
The data returned by \function{urlopen()} or \function{urlretrieve()}
is the raw data returned by the server. This may be binary data
-(e.g. an image), plain text or (for example) HTML\index{HTML}. The
+(such as an image), plain text or (for example) HTML\index{HTML}. The
HTTP\indexii{HTTP}{protocol} protocol provides type information in the
reply header, which can be inspected by looking at the
\mailheader{Content-Type} header. For the
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 74b2aec..4f1ebdd 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -86,6 +86,11 @@ def urlcleanup():
if _urlopener:
_urlopener.cleanup()
+# exception raised when downloaded size does not match content-length
+class ContentTooShortError(IOError):
+ def __init__(self, message, content):
+ IOError.__init__(self, message)
+ self.content = content
ftpcache = {}
class URLopener:
@@ -228,24 +233,33 @@ class URLopener:
self.tempcache[url] = result
bs = 1024*8
size = -1
+ read = 0
blocknum = 1
if reporthook:
if "content-length" in headers:
size = int(headers["Content-Length"])
reporthook(0, bs, size)
block = fp.read(bs)
+ read += len(block)
if reporthook:
reporthook(1, bs, size)
while block:
tfp.write(block)
block = fp.read(bs)
- blocknum = blocknum + 1
+ read += len(block)
+ blocknum += 1
if reporthook:
reporthook(blocknum, bs, size)
fp.close()
tfp.close()
del fp
del tfp
+
+ # raise exception if actual size does not match content-length header
+ if size >= 0 and read < size:
+ raise ContentTooShortError("retrieval incomplete: got only %i out "
+ "of %i bytes" % (read, size), result)
+
return result
# Each method named open_<type> knows how to open that type of URL
diff --git a/Misc/NEWS b/Misc/NEWS
index 7e21b7a..fab6163 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -193,6 +193,10 @@ Extension Modules
Library
-------
+- Patch #1062060: urllib.urlretrieve() now raises a new exception, named
+ ContentTooShortException, when the actually downloaded size does not
+ match the Content-Length header.
+
- Bug #1121494: distutils.dir_utils.mkpath now accepts Unicode strings.
- Bug #1178484: Return complete lines from codec stream readers