Added __del__(), close(), cleanup(), retrieve() and open_local_file()

author: Guido van Rossum <guido@python.org> 1994-02-24 13:50:39 (GMT)
committer: Guido van Rossum <guido@python.org> 1994-02-24 13:50:39 (GMT)
commit: d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9 (patch)
tree: aa76d9d3a4767a1b8c5b40bfa4bf0ca5c594d9e0 /Lib/urlopen.py
parent: 555915a90b729b8205b4068f84e8ccf741d1a644 (diff)
download: cpython-d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9.zip
cpython-d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9.tar.gz
cpython-d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9.tar.bz2
1 files changed, 90 insertions, 18 deletions
diff --git a/Lib/urlopen.py b/Lib/urlopen.py
index 2b2729d..0366bda 100755
--- a/Lib/urlopen.py
+++ b/Lib/urlopen.py
@@ -32,6 +32,11 @@ def urlopen(url):
 	if not _urlopener:
 		_urlopener = URLopener()
 	return _urlopener.open(url)
+def urlretrieve(url):
+	global _urlopener
+	if not _urlopener:
+		_urlopener = URLopener()
+	return _urlopener.retrieve(url)
 
 
 # Class to open URLs.
@@ -44,10 +49,25 @@ class URLopener:
 	def __init__(self):
 		self.addheaders = []
 		self.ftpcache = ftpcache
+		self.tempfiles = []
 		# Undocumented feature: you can use a different
 		# ftp cache by assigning to the .ftpcache member;
 		# in case you want logically independent URL openers
 
+	def __del__(self):
+		self.close()
+
+	def close(self):
+		self.cleanup()
+
+	def cleanup(self):
+		import os
+		for tfn in self.tempfiles:
+			try:
+				os.unlink(tfn)
+			except os.error:
+				pass
+
 	# Add a header to be used by the HTTP interface only
 	# e.g. u.addheader('Accept', 'sound/basic')
 	def addheader(self, *args):
@@ -56,13 +76,8 @@ class URLopener:
 	# External interface
 	# Use URLopener().open(file) instead of open(file, 'r')
 	def open(self, url):
-		import string
-		url = string.strip(url)
-		if url[:1] == '<' and url[-1:] == '>':
-			url = string.strip(url[1:-1])
-		if url[:4] == 'URL:': url = string.strip(url[4:])
-		type, url = splittype(url)
-		if not type: type = 'file'
+		type, url = splittype(unwrap(url))
+ 		if not type: type = 'file'
 		name = 'open_' + type
 		if '-' in name:
 			import regsub
@@ -75,6 +90,32 @@ class URLopener:
 		except socket.error, msg:
 			raise IOError, ('socket error', msg)
 
+	# External interface
+	# retrieve(url) returns (filename, None) for a local object
+	# or (tempfilename, headers) for a remote object
+	def retrieve(self, url):
+		type, url1 = splittype(unwrap(url))
+		if not type or type == 'file':
+			try:
+				fp = self.open_local_file(url1)
+				return splithost(url1)[1], None
+			except IOError, msg:
+				pass
+		fp = self.open(url)
+		import tempfile
+		tfn = tempfile.mktemp()
+		self.tempfiles.append(tfn)
+		tfp = open(tfn, 'w')
+		bs = 1024*8
+		block = fp.read(bs)
+		while block:
+			tfp.write(block)
+			block = fp.read(bs)
+		headers = fp.info()
+		fp.close()
+		tfp.close()
+		return tfn, headers
+
 	# Each method named open_<type> knows how to open that type of URL
 
 	# Use HTTP protocol
@@ -100,15 +141,20 @@ class URLopener:
 
 	# Use local file or FTP depending on form of URL
 	def open_file(self, url):
+		try:
+			return self.open_local_file(url)
+		except IOError:
+			return self.open_ftp(url)
+
+	# Use local file
+	def open_local_file(self, url):
 		host, file = splithost(url)
 		if not host: return addinfo(open(file, 'r'), noheaders())
 		host, port = splitport(host)
 		if not port and socket.gethostbyname(host) in (
 			  localhost(), thishost()):
-			try: fp = open(file, 'r')
-			except IOError: fp = None
-			if fp: return addinfo(fp, noheaders())
-		return self.open_ftp(url)
+			return addinfo(open(file, 'r'), noheaders())
+		raise IOError, ('local file error', 'not on local host')
 
 	# Use FTP protocol
 	def open_ftp(self, url):
@@ -199,8 +245,8 @@ class ftpwrapper:
 					raise IOError, ('ftp error', reason)
 		if not conn:
 			# Try a directory listing
-			if file: cmd = 'NLST ' + file
-			else: cmd = 'NLST'
+			if file: cmd = 'LIST ' + file
+			else: cmd = 'LIST'
 			conn = self.ftp.transfercmd(cmd)
 		return addclosehook(conn.makefile('r'), self.ftp.voidresp)
 
@@ -215,6 +261,11 @@ class addbase:
 	def __del__(self):
 		self.close()
 	def close(self):
+		self.read = None
+		self.readline = None
+		self.readlines = None
+		self.fileno = None
+		self.fp.close()
 		self.fp = None
 
 # Class to add a close hook to an open file
@@ -227,7 +278,8 @@ class addclosehook(addbase):
 		if self.closehook:
 			apply(self.closehook, self.hookargs)
 			self.closehook = None
-		self.fp = None
+			self.hookargs = None
+		addbase.close(self)
 
 # class to add an info() method to an open file
 class addinfo(addbase):
@@ -239,6 +291,7 @@ class addinfo(addbase):
 
 
 # Utilities to parse URLs:
+# unwrap('<URL:type//host/path>') --> 'type//host/path'
 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 # splitport('host:port') --> 'host', 'port'
@@ -246,6 +299,14 @@ class addinfo(addbase):
 # splittag('/path#tag') --> '/path', 'tag'
 # splitgophertype('/Xselector') --> 'X', 'selector'
 
+def unwrap(url):
+	import string
+	url = string.strip(url)
+	if url[:1] == '<' and url[-1:] == '>':
+		url = string.strip(url[1:-1])
+	if url[:4] == 'URL:': url = string.strip(url[4:])
+	return url
+
 _typeprog = regex.compile('^\([^/:]+\):\(.*\)$')
 def splittype(url):
 	if _typeprog.match(url) >= 0: return _typeprog.group(1, 2)
@@ -291,10 +352,21 @@ def test():
 			'gopher://gopher.cwi.nl/11/',
 			'http://www.cwi.nl/index.html',
 			]
-	for arg in args:
-		print '-'*10, arg, '-'*10
-		print regsub.gsub('\r', '', urlopen(arg).read())
-	print '-'*40
+	try:
+		for url in args:
+			print '-'*10, url, '-'*10
+			fn, h = urlretrieve(url)
+			print fn, h
+			if h:
+				print '======'
+				for k in h.keys(): print k + ':', h[k]
+				print '======'
+			fp = open(fn, 'r')
+			data = fp.read()
+			print regsub.gsub('\r', '', data)
+		print '-'*40
+	finally:
+		_urlopener.cleanup()
 
 # Run test program when run as a script
 if __name__ == '__main__':
author	Guido van Rossum <guido@python.org>	1994-02-24 13:50:39 (GMT)
committer	Guido van Rossum <guido@python.org>	1994-02-24 13:50:39 (GMT)
commit	d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9 (patch)
tree	aa76d9d3a4767a1b8c5b40bfa4bf0ca5c594d9e0 /Lib/urlopen.py
parent	555915a90b729b8205b4068f84e8ccf741d1a644 (diff)
download	cpython-d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9.zip cpython-d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9.tar.gz cpython-d5b9ea1c67d997e7ff24bf70aa96bec6f0bf28d9.tar.bz2