summaryrefslogtreecommitdiffstats
path: root/Lib/urlopen.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1994-02-22 19:03:38 (GMT)
committerGuido van Rossum <guido@python.org>1994-02-22 19:03:38 (GMT)
commit749057be5afc425a7e9185062b239d44e20fc2be (patch)
tree07261d55819e4f034c2a5cea86a88e3991749f4d /Lib/urlopen.py
parent76ca3c17f0922c17b6da43f7345b4f26cff854bc (diff)
downloadcpython-749057be5afc425a7e9185062b239d44e20fc2be.zip
cpython-749057be5afc425a7e9185062b239d44e20fc2be.tar.gz
cpython-749057be5afc425a7e9185062b239d44e20fc2be.tar.bz2
Redesigned as a class
Diffstat (limited to 'Lib/urlopen.py')
-rwxr-xr-xLib/urlopen.py307
1 files changed, 193 insertions, 114 deletions
diff --git a/Lib/urlopen.py b/Lib/urlopen.py
index c24b3f9..2b2729d 100755
--- a/Lib/urlopen.py
+++ b/Lib/urlopen.py
@@ -6,110 +6,170 @@
# IETF URL Working Group 14 July 1993
# draft-ietf-uri-url-01.txt
#
-# The object returned by urlopen() will differ per protocol.
-# All you know is that is has methods read(), fileno(), close() and info().
-# The read(), fileno() and close() methods work like those of open files.
+# The object returned by URLopener().open(file) will differ per
+# protocol. All you know is that is has methods read(), readline(),
+# readlines(), fileno(), close() and info(). The read*(), fileno()
+# and close() methods work like those of open files.
# The info() method returns an rfc822.Message object which can be
# used to query various info about the object, if available.
# (rfc822.Message objects are queried with the getheader() method.)
import socket
import regex
-import regsub
-import string
-import rfc822
-import ftplib
-# External interface -- use urlopen(file) as if it were open(file, 'r')
+# This really consists of two pieces:
+# (1) a class which handles opening of all sorts of URLs
+# (plus assorted utilities etc.)
+# (2) a set of functions for parsing URLs
+# XXX Should these be separated out into different modules?
+
+
+# Shortcut for basic usage
+_urlopener = None
def urlopen(url):
- url = string.strip(url)
- if url[:1] == '<' and url[-1:] == '>': url = string.strip(url[1:-1])
- if url[:4] == 'URL:': url = string.strip(url[4:])
- type, url = splittype(url)
- if not type: type = 'file'
- type = regsub.gsub('-', '_', type)
- try:
- func = eval('open_' + type)
- except NameError:
- raise IOError, ('url error', 'unknown url type', type)
- try:
- return func(url)
- except socket.error, msg:
- raise IOError, ('socket error', msg)
-
-
-# Each routine of the form open_<type> knows how to open that type of URL
-
-# Use HTTP protocol
-def open_http(url):
- import httplib
- host, selector = splithost(url)
- h = httplib.HTTP(host)
- h.putrequest('GET', selector)
- errcode, errmsg, headers = h.getreply()
- if errcode == 200: return makefile(h.getfile(), headers)
- else: raise IOError, ('http error', errcode, errmsg, headers)
-
-# Empty rfc822.Message object
-noheaders = rfc822.Message(open('/dev/null', 'r'))
-noheaders.fp.close() # Recycle file descriptor
-
-# Use Gopher protocol
-def open_gopher(url):
- import gopherlib
- host, selector = splithost(url)
- type, selector = splitgophertype(selector)
- selector, query = splitquery(selector)
- if query: fp = gopherlib.send_query(selector, query, host)
- else: fp = gopherlib.send_selector(selector, host)
- return makefile(fp, noheaders)
-
-# Use local file or FTP depending on form of URL
-localhost = socket.gethostbyname('localhost')
-thishost = socket.gethostbyname(socket.gethostname())
-def open_file(url):
- host, file = splithost(url)
- if not host: return makefile(open(file, 'r'), noheaders)
- host, port = splitport(host)
- if not port and socket.gethostbyname(host) in (localhost, thishost):
- try: fp = open(file, 'r')
- except IOError: fp = None
- if fp: return makefile(fp, noheaders)
- return open_ftp(url)
-
-# Use FTP protocol
+ global _urlopener
+ if not _urlopener:
+ _urlopener = URLopener()
+ return _urlopener.open(url)
+
+
+# Class to open URLs.
+# This is a class rather than just a subroutine because we may need
+# more than one set of global protocol-specific options.
ftpcache = {}
-ftperrors = (ftplib.error_reply,
- ftplib.error_temp,
- ftplib.error_perm,
- ftplib.error_proto)
-def open_ftp(url):
- host, file = splithost(url)
- host, port = splitport(host)
- host = socket.gethostbyname(host)
- if not port: port = ftplib.FTP_PORT
- key = (host, port)
- try:
- if not ftpcache.has_key(key):
- ftpcache[key] = ftpwrapper(host, port)
- return makefile(ftpcache[key].retrfile(file), noheaders)
- except ftperrors, msg:
- raise IOError, ('ftp error', msg)
+class URLopener:
+ # Constructor
+ def __init__(self):
+ self.addheaders = []
+ self.ftpcache = ftpcache
+ # Undocumented feature: you can use a different
+ # ftp cache by assigning to the .ftpcache member;
+ # in case you want logically independent URL openers
-# Utility classes
+ # Add a header to be used by the HTTP interface only
+ # e.g. u.addheader('Accept', 'sound/basic')
+ def addheader(self, *args):
+ self.addheaders.append(args)
-# Class used to add an info() method to a file object
-class makefile:
- def __init__(self, fp, headers):
- self.fp = fp
- self.headers = headers
- self.read = self.fp.read
- self.fileno = self.fp.fileno
- self.close = self.fp.close
- def info(self):
- return self.headers
+ # External interface
+ # Use URLopener().open(file) instead of open(file, 'r')
+ def open(self, url):
+ import string
+ url = string.strip(url)
+ if url[:1] == '<' and url[-1:] == '>':
+ url = string.strip(url[1:-1])
+ if url[:4] == 'URL:': url = string.strip(url[4:])
+ type, url = splittype(url)
+ if not type: type = 'file'
+ name = 'open_' + type
+ if '-' in name:
+ import regsub
+ name = regsub.gsub('-', '_', name)
+ if not hasattr(self, name):
+ raise IOError, ('url error', 'unknown url type', type)
+ meth = getattr(self, name)
+ try:
+ return meth(url)
+ except socket.error, msg:
+ raise IOError, ('socket error', msg)
+
+ # Each method named open_<type> knows how to open that type of URL
+
+ # Use HTTP protocol
+ def open_http(self, url):
+ import httplib
+ host, selector = splithost(url)
+ h = httplib.HTTP(host)
+ h.putrequest('GET', selector)
+ for args in self.addheaders: apply(h.putheader, args)
+ errcode, errmsg, headers = h.getreply()
+ if errcode == 200: return addinfo(h.getfile(), headers)
+ else: raise IOError, ('http error', errcode, errmsg, headers)
+
+ # Use Gopher protocol
+ def open_gopher(self, url):
+ import gopherlib
+ host, selector = splithost(url)
+ type, selector = splitgophertype(selector)
+ selector, query = splitquery(selector)
+ if query: fp = gopherlib.send_query(selector, query, host)
+ else: fp = gopherlib.send_selector(selector, host)
+ return addinfo(fp, noheaders())
+
+ # Use local file or FTP depending on form of URL
+ def open_file(self, url):
+ host, file = splithost(url)
+ if not host: return addinfo(open(file, 'r'), noheaders())
+ host, port = splitport(host)
+ if not port and socket.gethostbyname(host) in (
+ localhost(), thishost()):
+ try: fp = open(file, 'r')
+ except IOError: fp = None
+ if fp: return addinfo(fp, noheaders())
+ return self.open_ftp(url)
+
+ # Use FTP protocol
+ def open_ftp(self, url):
+ host, file = splithost(url)
+ host, port = splitport(host)
+ host = socket.gethostbyname(host)
+ if not port:
+ import ftplib
+ port = ftplib.FTP_PORT
+ key = (host, port)
+ try:
+ if not self.ftpcache.has_key(key):
+ self.ftpcache[key] = ftpwrapper(host, port)
+ return addinfo(self.ftpcache[key].retrfile(file),
+ noheaders())
+ except ftperrors(), msg:
+ raise IOError, ('ftp error', msg)
+
+
+# Utility functions
+
+# Return the IP address of the magic hostname 'localhost'
+_localhost = None
+def localhost():
+ global _localhost
+ if not _localhost:
+ _localhost = socket.gethostbyname('localhost')
+ return _localhost
+
+# Return the IP address of the current host
+_thishost = None
+def thishost():
+ global _thishost
+ if not _thishost:
+ _thishost = socket.gethostbyname(socket.gethostname())
+ return _thishost
+
+# Return the set of errors raised by the FTP class
+_ftperrors = None
+def ftperrors():
+ global _ftperrors
+ if not _ftperrors:
+ import ftplib
+ _ftperrors = (ftplib.error_reply,
+ ftplib.error_temp,
+ ftplib.error_perm,
+ ftplib.error_proto)
+ return _ftperrors
+
+# Return an empty rfc822.Message object
+_noheaders = None
+def noheaders():
+ global _noheaders
+ if not _noheaders:
+ import rfc822
+ _noheaders = rfc822.Message(open('/dev/null', 'r'))
+ _noheaders.fp.close() # Recycle file descriptor
+ return _noheaders
+
+
+# Utility classes
# Class used by open_ftp() for cache of open FTP connections
class ftpwrapper:
@@ -118,10 +178,12 @@ class ftpwrapper:
self.port = port
self.init()
def init(self):
+ import ftplib
self.ftp = ftplib.FTP()
self.ftp.connect(self.host, self.port)
self.ftp.login()
def retrfile(self, file):
+ import ftplib
try:
self.ftp.voidcmd('TYPE I')
except ftplib.all_errors:
@@ -140,27 +202,43 @@ class ftpwrapper:
if file: cmd = 'NLST ' + file
else: cmd = 'NLST'
conn = self.ftp.transfercmd(cmd)
- return fakefile(self.ftp, conn)
-
-# Class used by ftpwrapper to handle response when transfer is complete
-class fakefile:
- def __init__(self, ftp, conn):
- self.ftp = ftp
- self.conn = conn
- self.fp = self.conn.makefile('r')
+ return addclosehook(conn.makefile('r'), self.ftp.voidresp)
+
+# Base class for addinfo and addclosehook
+class addbase:
+ def __init__(self, fp):
+ self.fp = fp
self.read = self.fp.read
+ self.readline = self.fp.readline
+ self.readlines = self.fp.readlines
self.fileno = self.fp.fileno
def __del__(self):
self.close()
def close(self):
- self.conn = None
self.fp = None
- self.read = None
- if self.ftp: self.ftp.voidresp()
- self.ftp = None
+
+# Class to add a close hook to an open file
+class addclosehook(addbase):
+ def __init__(self, fp, closehook, *hookargs):
+ addbase.__init__(self, fp)
+ self.closehook = closehook
+ self.hookargs = hookargs
+ def close(self):
+ if self.closehook:
+ apply(self.closehook, self.hookargs)
+ self.closehook = None
+ self.fp = None
+
+# class to add an info() method to an open file
+class addinfo(addbase):
+ def __init__(self, fp, headers):
+ addbase.__init__(self, fp)
+ self.headers = headers
+ def info(self):
+ return self.headers
-# Utilities to split url parts into components:
+# Utilities to parse URLs:
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
@@ -168,29 +246,29 @@ class fakefile:
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
-typeprog = regex.compile('^\([^/:]+\):\(.*\)$')
+_typeprog = regex.compile('^\([^/:]+\):\(.*\)$')
def splittype(url):
- if typeprog.match(url) >= 0: return typeprog.group(1, 2)
+ if _typeprog.match(url) >= 0: return _typeprog.group(1, 2)
return None, url
-hostprog = regex.compile('^//\([^/]+\)\(.*\)$')
+_hostprog = regex.compile('^//\([^/]+\)\(.*\)$')
def splithost(url):
- if hostprog.match(url) >= 0: return hostprog.group(1, 2)
+ if _hostprog.match(url) >= 0: return _hostprog.group(1, 2)
return None, url
-portprog = regex.compile('^\(.*\):\([0-9]+\)$')
+_portprog = regex.compile('^\(.*\):\([0-9]+\)$')
def splitport(host):
- if portprog.match(host) >= 0: return portprog.group(1, 2)
+ if _portprog.match(host) >= 0: return _portprog.group(1, 2)
return host, None
-queryprog = regex.compile('^\(.*\)\?\([^?]*\)$')
+_queryprog = regex.compile('^\(.*\)\?\([^?]*\)$')
def splitquery(url):
- if queryprog.match(url) >= 0: return queryprog.group(1, 2)
+ if _queryprog.match(url) >= 0: return _queryprog.group(1, 2)
return url, None
-tagprog = regex.compile('^\(.*\)#\([^#]*\)$')
+_tagprog = regex.compile('^\(.*\)#\([^#]*\)$')
def splittag(url):
- if tagprog.match(url) >= 0: return tagprog.group(1, 2)
+ if _tagprog.match(url) >= 0: return _tagprog.group(1, 2)
return url, None
def splitgophertype(selector):
@@ -202,6 +280,7 @@ def splitgophertype(selector):
# Test program
def test():
import sys
+ import regsub
args = sys.argv[1:]
if not args:
args = [