diff options
author | Guido van Rossum <guido@python.org> | 1994-02-22 19:03:38 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1994-02-22 19:03:38 (GMT) |
commit | 749057be5afc425a7e9185062b239d44e20fc2be (patch) | |
tree | 07261d55819e4f034c2a5cea86a88e3991749f4d /Lib | |
parent | 76ca3c17f0922c17b6da43f7345b4f26cff854bc (diff) | |
download | cpython-749057be5afc425a7e9185062b239d44e20fc2be.zip cpython-749057be5afc425a7e9185062b239d44e20fc2be.tar.gz cpython-749057be5afc425a7e9185062b239d44e20fc2be.tar.bz2 |
Redesigned as a class
Diffstat (limited to 'Lib')
-rwxr-xr-x | Lib/urlopen.py | 307 |
1 files changed, 193 insertions, 114 deletions
diff --git a/Lib/urlopen.py b/Lib/urlopen.py index c24b3f9..2b2729d 100755 --- a/Lib/urlopen.py +++ b/Lib/urlopen.py @@ -6,110 +6,170 @@ # IETF URL Working Group 14 July 1993 # draft-ietf-uri-url-01.txt # -# The object returned by urlopen() will differ per protocol. -# All you know is that is has methods read(), fileno(), close() and info(). -# The read(), fileno() and close() methods work like those of open files. +# The object returned by URLopener().open(file) will differ per +# protocol. All you know is that is has methods read(), readline(), +# readlines(), fileno(), close() and info(). The read*(), fileno() +# and close() methods work like those of open files. # The info() method returns an rfc822.Message object which can be # used to query various info about the object, if available. # (rfc822.Message objects are queried with the getheader() method.) import socket import regex -import regsub -import string -import rfc822 -import ftplib -# External interface -- use urlopen(file) as if it were open(file, 'r') +# This really consists of two pieces: +# (1) a class which handles opening of all sorts of URLs +# (plus assorted utilities etc.) +# (2) a set of functions for parsing URLs +# XXX Should these be separated out into different modules? + + +# Shortcut for basic usage +_urlopener = None def urlopen(url): - url = string.strip(url) - if url[:1] == '<' and url[-1:] == '>': url = string.strip(url[1:-1]) - if url[:4] == 'URL:': url = string.strip(url[4:]) - type, url = splittype(url) - if not type: type = 'file' - type = regsub.gsub('-', '_', type) - try: - func = eval('open_' + type) - except NameError: - raise IOError, ('url error', 'unknown url type', type) - try: - return func(url) - except socket.error, msg: - raise IOError, ('socket error', msg) - - -# Each routine of the form open_<type> knows how to open that type of URL - -# Use HTTP protocol -def open_http(url): - import httplib - host, selector = splithost(url) - h = httplib.HTTP(host) - h.putrequest('GET', selector) - errcode, errmsg, headers = h.getreply() - if errcode == 200: return makefile(h.getfile(), headers) - else: raise IOError, ('http error', errcode, errmsg, headers) - -# Empty rfc822.Message object -noheaders = rfc822.Message(open('/dev/null', 'r')) -noheaders.fp.close() # Recycle file descriptor - -# Use Gopher protocol -def open_gopher(url): - import gopherlib - host, selector = splithost(url) - type, selector = splitgophertype(selector) - selector, query = splitquery(selector) - if query: fp = gopherlib.send_query(selector, query, host) - else: fp = gopherlib.send_selector(selector, host) - return makefile(fp, noheaders) - -# Use local file or FTP depending on form of URL -localhost = socket.gethostbyname('localhost') -thishost = socket.gethostbyname(socket.gethostname()) -def open_file(url): - host, file = splithost(url) - if not host: return makefile(open(file, 'r'), noheaders) - host, port = splitport(host) - if not port and socket.gethostbyname(host) in (localhost, thishost): - try: fp = open(file, 'r') - except IOError: fp = None - if fp: return makefile(fp, noheaders) - return open_ftp(url) - -# Use FTP protocol + global _urlopener + if not _urlopener: + _urlopener = URLopener() + return _urlopener.open(url) + + +# Class to open URLs. +# This is a class rather than just a subroutine because we may need +# more than one set of global protocol-specific options. ftpcache = {} -ftperrors = (ftplib.error_reply, - ftplib.error_temp, - ftplib.error_perm, - ftplib.error_proto) -def open_ftp(url): - host, file = splithost(url) - host, port = splitport(host) - host = socket.gethostbyname(host) - if not port: port = ftplib.FTP_PORT - key = (host, port) - try: - if not ftpcache.has_key(key): - ftpcache[key] = ftpwrapper(host, port) - return makefile(ftpcache[key].retrfile(file), noheaders) - except ftperrors, msg: - raise IOError, ('ftp error', msg) +class URLopener: + # Constructor + def __init__(self): + self.addheaders = [] + self.ftpcache = ftpcache + # Undocumented feature: you can use a different + # ftp cache by assigning to the .ftpcache member; + # in case you want logically independent URL openers -# Utility classes + # Add a header to be used by the HTTP interface only + # e.g. u.addheader('Accept', 'sound/basic') + def addheader(self, *args): + self.addheaders.append(args) -# Class used to add an info() method to a file object -class makefile: - def __init__(self, fp, headers): - self.fp = fp - self.headers = headers - self.read = self.fp.read - self.fileno = self.fp.fileno - self.close = self.fp.close - def info(self): - return self.headers + # External interface + # Use URLopener().open(file) instead of open(file, 'r') + def open(self, url): + import string + url = string.strip(url) + if url[:1] == '<' and url[-1:] == '>': + url = string.strip(url[1:-1]) + if url[:4] == 'URL:': url = string.strip(url[4:]) + type, url = splittype(url) + if not type: type = 'file' + name = 'open_' + type + if '-' in name: + import regsub + name = regsub.gsub('-', '_', name) + if not hasattr(self, name): + raise IOError, ('url error', 'unknown url type', type) + meth = getattr(self, name) + try: + return meth(url) + except socket.error, msg: + raise IOError, ('socket error', msg) + + # Each method named open_<type> knows how to open that type of URL + + # Use HTTP protocol + def open_http(self, url): + import httplib + host, selector = splithost(url) + h = httplib.HTTP(host) + h.putrequest('GET', selector) + for args in self.addheaders: apply(h.putheader, args) + errcode, errmsg, headers = h.getreply() + if errcode == 200: return addinfo(h.getfile(), headers) + else: raise IOError, ('http error', errcode, errmsg, headers) + + # Use Gopher protocol + def open_gopher(self, url): + import gopherlib + host, selector = splithost(url) + type, selector = splitgophertype(selector) + selector, query = splitquery(selector) + if query: fp = gopherlib.send_query(selector, query, host) + else: fp = gopherlib.send_selector(selector, host) + return addinfo(fp, noheaders()) + + # Use local file or FTP depending on form of URL + def open_file(self, url): + host, file = splithost(url) + if not host: return addinfo(open(file, 'r'), noheaders()) + host, port = splitport(host) + if not port and socket.gethostbyname(host) in ( + localhost(), thishost()): + try: fp = open(file, 'r') + except IOError: fp = None + if fp: return addinfo(fp, noheaders()) + return self.open_ftp(url) + + # Use FTP protocol + def open_ftp(self, url): + host, file = splithost(url) + host, port = splitport(host) + host = socket.gethostbyname(host) + if not port: + import ftplib + port = ftplib.FTP_PORT + key = (host, port) + try: + if not self.ftpcache.has_key(key): + self.ftpcache[key] = ftpwrapper(host, port) + return addinfo(self.ftpcache[key].retrfile(file), + noheaders()) + except ftperrors(), msg: + raise IOError, ('ftp error', msg) + + +# Utility functions + +# Return the IP address of the magic hostname 'localhost' +_localhost = None +def localhost(): + global _localhost + if not _localhost: + _localhost = socket.gethostbyname('localhost') + return _localhost + +# Return the IP address of the current host +_thishost = None +def thishost(): + global _thishost + if not _thishost: + _thishost = socket.gethostbyname(socket.gethostname()) + return _thishost + +# Return the set of errors raised by the FTP class +_ftperrors = None +def ftperrors(): + global _ftperrors + if not _ftperrors: + import ftplib + _ftperrors = (ftplib.error_reply, + ftplib.error_temp, + ftplib.error_perm, + ftplib.error_proto) + return _ftperrors + +# Return an empty rfc822.Message object +_noheaders = None +def noheaders(): + global _noheaders + if not _noheaders: + import rfc822 + _noheaders = rfc822.Message(open('/dev/null', 'r')) + _noheaders.fp.close() # Recycle file descriptor + return _noheaders + + +# Utility classes # Class used by open_ftp() for cache of open FTP connections class ftpwrapper: @@ -118,10 +178,12 @@ class ftpwrapper: self.port = port self.init() def init(self): + import ftplib self.ftp = ftplib.FTP() self.ftp.connect(self.host, self.port) self.ftp.login() def retrfile(self, file): + import ftplib try: self.ftp.voidcmd('TYPE I') except ftplib.all_errors: @@ -140,27 +202,43 @@ class ftpwrapper: if file: cmd = 'NLST ' + file else: cmd = 'NLST' conn = self.ftp.transfercmd(cmd) - return fakefile(self.ftp, conn) - -# Class used by ftpwrapper to handle response when transfer is complete -class fakefile: - def __init__(self, ftp, conn): - self.ftp = ftp - self.conn = conn - self.fp = self.conn.makefile('r') + return addclosehook(conn.makefile('r'), self.ftp.voidresp) + +# Base class for addinfo and addclosehook +class addbase: + def __init__(self, fp): + self.fp = fp self.read = self.fp.read + self.readline = self.fp.readline + self.readlines = self.fp.readlines self.fileno = self.fp.fileno def __del__(self): self.close() def close(self): - self.conn = None self.fp = None - self.read = None - if self.ftp: self.ftp.voidresp() - self.ftp = None + +# Class to add a close hook to an open file +class addclosehook(addbase): + def __init__(self, fp, closehook, *hookargs): + addbase.__init__(self, fp) + self.closehook = closehook + self.hookargs = hookargs + def close(self): + if self.closehook: + apply(self.closehook, self.hookargs) + self.closehook = None + self.fp = None + +# class to add an info() method to an open file +class addinfo(addbase): + def __init__(self, fp, headers): + addbase.__init__(self, fp) + self.headers = headers + def info(self): + return self.headers -# Utilities to split url parts into components: +# Utilities to parse URLs: # splittype('type:opaquestring') --> 'type', 'opaquestring' # splithost('//host[:port]/path') --> 'host[:port]', '/path' # splitport('host:port') --> 'host', 'port' @@ -168,29 +246,29 @@ class fakefile: # splittag('/path#tag') --> '/path', 'tag' # splitgophertype('/Xselector') --> 'X', 'selector' -typeprog = regex.compile('^\([^/:]+\):\(.*\)$') +_typeprog = regex.compile('^\([^/:]+\):\(.*\)$') def splittype(url): - if typeprog.match(url) >= 0: return typeprog.group(1, 2) + if _typeprog.match(url) >= 0: return _typeprog.group(1, 2) return None, url -hostprog = regex.compile('^//\([^/]+\)\(.*\)$') +_hostprog = regex.compile('^//\([^/]+\)\(.*\)$') def splithost(url): - if hostprog.match(url) >= 0: return hostprog.group(1, 2) + if _hostprog.match(url) >= 0: return _hostprog.group(1, 2) return None, url -portprog = regex.compile('^\(.*\):\([0-9]+\)$') +_portprog = regex.compile('^\(.*\):\([0-9]+\)$') def splitport(host): - if portprog.match(host) >= 0: return portprog.group(1, 2) + if _portprog.match(host) >= 0: return _portprog.group(1, 2) return host, None -queryprog = regex.compile('^\(.*\)\?\([^?]*\)$') +_queryprog = regex.compile('^\(.*\)\?\([^?]*\)$') def splitquery(url): - if queryprog.match(url) >= 0: return queryprog.group(1, 2) + if _queryprog.match(url) >= 0: return _queryprog.group(1, 2) return url, None -tagprog = regex.compile('^\(.*\)#\([^#]*\)$') +_tagprog = regex.compile('^\(.*\)#\([^#]*\)$') def splittag(url): - if tagprog.match(url) >= 0: return tagprog.group(1, 2) + if _tagprog.match(url) >= 0: return _tagprog.group(1, 2) return url, None def splitgophertype(selector): @@ -202,6 +280,7 @@ def splitgophertype(selector): # Test program def test(): import sys + import regsub args = sys.argv[1:] if not args: args = [ |