summaryrefslogtreecommitdiffstats
path: root/Lib/urlopen.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urlopen.py')
-rwxr-xr-xLib/urlopen.py108
1 files changed, 90 insertions, 18 deletions
diff --git a/Lib/urlopen.py b/Lib/urlopen.py
index 2b2729d..0366bda 100755
--- a/Lib/urlopen.py
+++ b/Lib/urlopen.py
@@ -32,6 +32,11 @@ def urlopen(url):
if not _urlopener:
_urlopener = URLopener()
return _urlopener.open(url)
+def urlretrieve(url):
+ global _urlopener
+ if not _urlopener:
+ _urlopener = URLopener()
+ return _urlopener.retrieve(url)
# Class to open URLs.
@@ -44,10 +49,25 @@ class URLopener:
def __init__(self):
self.addheaders = []
self.ftpcache = ftpcache
+ self.tempfiles = []
# Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member;
# in case you want logically independent URL openers
+ def __del__(self):
+ self.close()
+
+ def close(self):
+ self.cleanup()
+
+ def cleanup(self):
+ import os
+ for tfn in self.tempfiles:
+ try:
+ os.unlink(tfn)
+ except os.error:
+ pass
+
# Add a header to be used by the HTTP interface only
# e.g. u.addheader('Accept', 'sound/basic')
def addheader(self, *args):
@@ -56,13 +76,8 @@ class URLopener:
# External interface
# Use URLopener().open(file) instead of open(file, 'r')
def open(self, url):
- import string
- url = string.strip(url)
- if url[:1] == '<' and url[-1:] == '>':
- url = string.strip(url[1:-1])
- if url[:4] == 'URL:': url = string.strip(url[4:])
- type, url = splittype(url)
- if not type: type = 'file'
+ type, url = splittype(unwrap(url))
+ if not type: type = 'file'
name = 'open_' + type
if '-' in name:
import regsub
@@ -75,6 +90,32 @@ class URLopener:
except socket.error, msg:
raise IOError, ('socket error', msg)
+ # External interface
+ # retrieve(url) returns (filename, None) for a local object
+ # or (tempfilename, headers) for a remote object
+ def retrieve(self, url):
+ type, url1 = splittype(unwrap(url))
+ if not type or type == 'file':
+ try:
+ fp = self.open_local_file(url1)
+ return splithost(url1)[1], None
+ except IOError, msg:
+ pass
+ fp = self.open(url)
+ import tempfile
+ tfn = tempfile.mktemp()
+ self.tempfiles.append(tfn)
+ tfp = open(tfn, 'w')
+ bs = 1024*8
+ block = fp.read(bs)
+ while block:
+ tfp.write(block)
+ block = fp.read(bs)
+ headers = fp.info()
+ fp.close()
+ tfp.close()
+ return tfn, headers
+
# Each method named open_<type> knows how to open that type of URL
# Use HTTP protocol
@@ -100,15 +141,20 @@ class URLopener:
# Use local file or FTP depending on form of URL
def open_file(self, url):
+ try:
+ return self.open_local_file(url)
+ except IOError:
+ return self.open_ftp(url)
+
+ # Use local file
+ def open_local_file(self, url):
host, file = splithost(url)
if not host: return addinfo(open(file, 'r'), noheaders())
host, port = splitport(host)
if not port and socket.gethostbyname(host) in (
localhost(), thishost()):
- try: fp = open(file, 'r')
- except IOError: fp = None
- if fp: return addinfo(fp, noheaders())
- return self.open_ftp(url)
+ return addinfo(open(file, 'r'), noheaders())
+ raise IOError, ('local file error', 'not on local host')
# Use FTP protocol
def open_ftp(self, url):
@@ -199,8 +245,8 @@ class ftpwrapper:
raise IOError, ('ftp error', reason)
if not conn:
# Try a directory listing
- if file: cmd = 'NLST ' + file
- else: cmd = 'NLST'
+ if file: cmd = 'LIST ' + file
+ else: cmd = 'LIST'
conn = self.ftp.transfercmd(cmd)
return addclosehook(conn.makefile('r'), self.ftp.voidresp)
@@ -215,6 +261,11 @@ class addbase:
def __del__(self):
self.close()
def close(self):
+ self.read = None
+ self.readline = None
+ self.readlines = None
+ self.fileno = None
+ self.fp.close()
self.fp = None
# Class to add a close hook to an open file
@@ -227,7 +278,8 @@ class addclosehook(addbase):
if self.closehook:
apply(self.closehook, self.hookargs)
self.closehook = None
- self.fp = None
+ self.hookargs = None
+ addbase.close(self)
# class to add an info() method to an open file
class addinfo(addbase):
@@ -239,6 +291,7 @@ class addinfo(addbase):
# Utilities to parse URLs:
+# unwrap('<URL:type//host/path>') --> 'type//host/path'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
@@ -246,6 +299,14 @@ class addinfo(addbase):
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
+def unwrap(url):
+ import string
+ url = string.strip(url)
+ if url[:1] == '<' and url[-1:] == '>':
+ url = string.strip(url[1:-1])
+ if url[:4] == 'URL:': url = string.strip(url[4:])
+ return url
+
_typeprog = regex.compile('^\([^/:]+\):\(.*\)$')
def splittype(url):
if _typeprog.match(url) >= 0: return _typeprog.group(1, 2)
@@ -291,10 +352,21 @@ def test():
'gopher://gopher.cwi.nl/11/',
'http://www.cwi.nl/index.html',
]
- for arg in args:
- print '-'*10, arg, '-'*10
- print regsub.gsub('\r', '', urlopen(arg).read())
- print '-'*40
+ try:
+ for url in args:
+ print '-'*10, url, '-'*10
+ fn, h = urlretrieve(url)
+ print fn, h
+ if h:
+ print '======'
+ for k in h.keys(): print k + ':', h[k]
+ print '======'
+ fp = open(fn, 'r')
+ data = fp.read()
+ print regsub.gsub('\r', '', data)
+ print '-'*40
+ finally:
+ _urlopener.cleanup()
# Run test program when run as a script
if __name__ == '__main__':