summaryrefslogtreecommitdiffstats
path: root/Lib/urllib.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1998-12-18 15:25:22 (GMT)
committerGuido van Rossum <guido@python.org>1998-12-18 15:25:22 (GMT)
commit33add0a95aa6c5ba5dbb8cae7b51a253209ecd6f (patch)
tree906438c8bced8f11620b5e66117ecc9c1a5b22e8 /Lib/urllib.py
parentdc86a4e5bb537c979377d43e7eafeb473196b5e0 (diff)
downloadcpython-33add0a95aa6c5ba5dbb8cae7b51a253209ecd6f.zip
cpython-33add0a95aa6c5ba5dbb8cae7b51a253209ecd6f.tar.gz
cpython-33add0a95aa6c5ba5dbb8cae7b51a253209ecd6f.tar.bz2
Sjoerd Mullender:
File names with "funny" characters get translated wrong by pathname2url (any variety). E.g. the (Unix) file "/ufs/sjoerd/#tmp" gets translated into "/ufs/sjoerd/#tmp" which, when interpreted as a URL is file "/ufs/sjoerd/" with fragment ID "tmp". Here's an easy fix. (An alternative fix would be to change the various implementations of pathname2url and url2pathname to include calls to quote and unquote. [The main problem is with the normal use of URLs: url = url2pathname(file) transmit url url, tag = splittag(url) urlopen(url) ] In addition, this patch fixes some uses of unquote: - the host part of URLs should be unquoted - the file path in the FTP URL should be unquoted before it is split into components. - because of the latter, I removed all unquoting from ftpwrapper, and moved it to the caller, but that is not essential
Diffstat (limited to 'Lib/urllib.py')
-rw-r--r--Lib/urllib.py22
1 files changed, 16 insertions, 6 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 62e5c7f..d175eef 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -42,6 +42,13 @@ else:
def pathname2url(pathname):
return pathname
+_url2pathname = url2pathname
+def url2pathname(url):
+ return _url2pathname(unquote(url))
+_pathname2url = pathname2url
+def pathname2url(p):
+ return quote(_pathname2url(p))
+
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
@@ -228,6 +235,7 @@ class URLopener:
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
+ host = unquote(host)
realhost = host
else:
host, selector = url
@@ -298,6 +306,7 @@ class URLopener:
import gopherlib
host, selector = splithost(url)
if not host: raise IOError, ('gopher error', 'no host given')
+ host = unquote(host)
type, selector = splitgophertype(selector)
selector, query = splitquery(selector)
selector = unquote(selector)
@@ -329,7 +338,6 @@ class URLopener:
host, port = splitport(host)
if not port and socket.gethostbyname(host) in (
localhost(), thishost()):
- file = unquote(file)
return addinfourl(
open(url2pathname(file), 'rb'),
headers, 'file:'+file)
@@ -343,6 +351,9 @@ class URLopener:
user, host = splituser(host)
if user: user, passwd = splitpasswd(user)
else: passwd = None
+ host = unquote(host)
+ user = unquote(user or '')
+ passwd = unquote(passwd or '')
host = socket.gethostbyname(host)
if not port:
import ftplib
@@ -350,6 +361,7 @@ class URLopener:
else:
port = int(port)
path, attrs = splitattr(path)
+ path = unquote(path)
dirs = string.splitfields(path, '/')
dirs, file = dirs[:-1], dirs[-1]
if dirs and not dirs[0]: dirs = dirs[1:]
@@ -548,13 +560,11 @@ def noheaders():
# Class used by open_ftp() for cache of open FTP connections
class ftpwrapper:
def __init__(self, user, passwd, host, port, dirs):
- self.user = unquote(user or '')
- self.passwd = unquote(passwd or '')
+ self.user = user
+ self.passwd = passwd
self.host = host
self.port = port
- self.dirs = []
- for dir in dirs:
- self.dirs.append(unquote(dir))
+ self.dirs = dirs
self.init()
def init(self):
import ftplib