diff options
author | Guido van Rossum <guido@python.org> | 2008-10-02 18:55:37 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2008-10-02 18:55:37 (GMT) |
commit | f0af3e30db9475ab68bcb1f1ce0b5581e214df76 (patch) | |
tree | 71efbc67686d96e8c8a81dd97c75c419adf36657 /Lib/posixpath.py | |
parent | fefeca53eebe8665c08ac0c041639ada3c9f9446 (diff) | |
download | cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.zip cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.gz cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.bz2 |
Issue #3187: Better support for "undecodable" filenames. Code by Victor
Stinner, with small tweaks by GvR.
Diffstat (limited to 'Lib/posixpath.py')
-rw-r--r-- | Lib/posixpath.py | 171 |
1 files changed, 128 insertions, 43 deletions
diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 575492f..745c920 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -11,6 +11,7 @@ for manipulation of the pathname component of URLs. """ import os +import sys import stat import genericpath from genericpath import * @@ -23,7 +24,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", "devnull","realpath","supports_unicode_filenames","relpath"] -# strings representing various path-related bits and pieces +# Strings representing various path-related bits and pieces. +# These are primarily for export; internally, they are hardcoded. curdir = '.' pardir = '..' extsep = '.' @@ -33,6 +35,12 @@ defpath = ':/bin:/usr/bin' altsep = None devnull = '/dev/null' +def _get_sep(path): + if isinstance(path, bytes): + return b'/' + else: + return '/' + # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. # On MS-DOS this may also turn slashes into backslashes; however, other # normalizations (such as optimizing '../' away) are not allowed @@ -40,6 +48,7 @@ devnull = '/dev/null' def normcase(s): """Normalize case of pathname. Has no effect under Posix""" + # TODO: on Mac OS X, this should really return s.lower(). return s @@ -48,7 +57,8 @@ def normcase(s): def isabs(s): """Test whether a path is absolute""" - return s.startswith('/') + sep = _get_sep(s) + return s.startswith(sep) # Join pathnames. @@ -59,14 +69,15 @@ def join(a, *p): """Join two or more pathname components, inserting '/' as needed. If any component is an absolute path, all previous path components will be discarded.""" + sep = _get_sep(a) path = a for b in p: - if b.startswith('/'): + if b.startswith(sep): path = b - elif path == '' or path.endswith('/'): + elif not path or path.endswith(sep): path += b else: - path += '/' + b + path += sep + b return path @@ -78,10 +89,11 @@ def join(a, *p): def split(p): """Split a pathname. Returns tuple "(head, tail)" where "tail" is everything after the final slash. Either part may be empty.""" - i = p.rfind('/') + 1 + sep = _get_sep(p) + i = p.rfind(sep) + 1 head, tail = p[:i], p[i:] - if head and head != '/'*len(head): - head = head.rstrip('/') + if head and head != sep*len(head): + head = head.rstrip(sep) return head, tail @@ -91,7 +103,13 @@ def split(p): # It is always true that root + ext == p. def splitext(p): - return genericpath._splitext(p, sep, altsep, extsep) + if isinstance(p, bytes): + sep = b'/' + extsep = b'.' + else: + sep = '/' + extsep = '.' + return genericpath._splitext(p, sep, None, extsep) splitext.__doc__ = genericpath._splitext.__doc__ # Split a pathname into a drive specification and the rest of the @@ -100,14 +118,15 @@ splitext.__doc__ = genericpath._splitext.__doc__ def splitdrive(p): """Split a pathname into drive and path. On Posix, drive is always empty.""" - return '', p + return p[:0], p # Return the tail (basename) part of a path, same as split(path)[1]. def basename(p): """Returns the final component of a pathname""" - i = p.rfind('/') + 1 + sep = _get_sep(p) + i = p.rfind(sep) + 1 return p[i:] @@ -115,10 +134,11 @@ def basename(p): def dirname(p): """Returns the directory component of a pathname""" - i = p.rfind('/') + 1 + sep = _get_sep(p) + i = p.rfind(sep) + 1 head = p[:i] - if head and head != '/'*len(head): - head = head.rstrip('/') + if head and head != sep*len(head): + head = head.rstrip(sep) return head @@ -179,7 +199,11 @@ def ismount(path): """Test whether a path is a mount point""" try: s1 = os.lstat(path) - s2 = os.lstat(join(path, '..')) + if isinstance(path, bytes): + parent = join(path, b'..') + else: + parent = join(path, '..') + s2 = os.lstat(parent) except os.error: return False # It doesn't exist -- so not a mount point :-) dev1 = s1.st_dev @@ -205,9 +229,14 @@ def ismount(path): def expanduser(path): """Expand ~ and ~user constructions. If user or $HOME is unknown, do nothing.""" - if not path.startswith('~'): + if isinstance(path, bytes): + tilde = b'~' + else: + tilde = '~' + if not path.startswith(tilde): return path - i = path.find('/', 1) + sep = _get_sep(path) + i = path.find(sep, 1) if i < 0: i = len(path) if i == 1: @@ -218,12 +247,17 @@ def expanduser(path): userhome = os.environ['HOME'] else: import pwd + name = path[1:i] + if isinstance(name, bytes): + name = str(name, 'ASCII') try: - pwent = pwd.getpwnam(path[1:i]) + pwent = pwd.getpwnam(name) except KeyError: return path userhome = pwent.pw_dir - userhome = userhome.rstrip('/') + if isinstance(path, bytes): + userhome = userhome.encode(sys.getfilesystemencoding()) + userhome = userhome.rstrip(sep) return userhome + path[i:] @@ -232,28 +266,47 @@ def expanduser(path): # Non-existent variables are left unchanged. _varprog = None +_varprogb = None def expandvars(path): """Expand shell variables of form $var and ${var}. Unknown variables are left unchanged.""" - global _varprog - if '$' not in path: - return path - if not _varprog: - import re - _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) + global _varprog, _varprogb + if isinstance(path, bytes): + if b'$' not in path: + return path + if not _varprogb: + import re + _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII) + search = _varprogb.search + start = b'{' + end = b'}' + else: + if '$' not in path: + return path + if not _varprog: + import re + _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) + search = _varprog.search + start = '{' + end = '}' i = 0 while True: - m = _varprog.search(path, i) + m = search(path, i) if not m: break i, j = m.span(0) name = m.group(1) - if name.startswith('{') and name.endswith('}'): + if name.startswith(start) and name.endswith(end): name = name[1:-1] + if isinstance(name, bytes): + name = str(name, 'ASCII') if name in os.environ: tail = path[j:] - path = path[:i] + os.environ[name] + value = os.environ[name] + if isinstance(path, bytes): + value = value.encode('ASCII') + path = path[:i] + value i = len(path) path += tail else: @@ -267,35 +320,49 @@ def expandvars(path): def normpath(path): """Normalize path, eliminating double slashes, etc.""" - if path == '': - return '.' - initial_slashes = path.startswith('/') + if isinstance(path, bytes): + sep = b'/' + empty = b'' + dot = b'.' + dotdot = b'..' + else: + sep = '/' + empty = '' + dot = '.' + dotdot = '..' + if path == empty: + return dot + initial_slashes = path.startswith(sep) # POSIX allows one or two initial slashes, but treats three or more # as single slash. if (initial_slashes and - path.startswith('//') and not path.startswith('///')): + path.startswith(sep*2) and not path.startswith(sep*3)): initial_slashes = 2 - comps = path.split('/') + comps = path.split(sep) new_comps = [] for comp in comps: - if comp in ('', '.'): + if comp in (empty, dot): continue - if (comp != '..' or (not initial_slashes and not new_comps) or - (new_comps and new_comps[-1] == '..')): + if (comp != dotdot or (not initial_slashes and not new_comps) or + (new_comps and new_comps[-1] == dotdot)): new_comps.append(comp) elif new_comps: new_comps.pop() comps = new_comps - path = '/'.join(comps) + path = sep.join(comps) if initial_slashes: - path = '/'*initial_slashes + path - return path or '.' + path = sep*initial_slashes + path + return path or dot def abspath(path): """Return an absolute path.""" if not isabs(path): - path = join(os.getcwd(), path) + if isinstance(path, bytes): + cwd = os.getcwdb() + else: + cwd = os.getcwd() + path = join(cwd, path) return normpath(path) @@ -305,10 +372,16 @@ def abspath(path): def realpath(filename): """Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path.""" + if isinstance(filename, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' if isabs(filename): - bits = ['/'] + filename.split('/')[1:] + bits = [sep] + filename.split(sep)[1:] else: - bits = [''] + filename.split('/') + bits = [empty] + filename.split(sep) for i in range(2, len(bits)+1): component = join(*bits[0:i]) @@ -347,12 +420,24 @@ def _resolve_link(path): supports_unicode_filenames = False -def relpath(path, start=curdir): +def relpath(path, start=None): """Return a relative version of a path""" if not path: raise ValueError("no path specified") + if isinstance(path, bytes): + curdir = b'.' + sep = b'/' + pardir = b'..' + else: + curdir = '.' + sep = '/' + pardir = '..' + + if start is None: + start = curdir + start_list = abspath(start).split(sep) path_list = abspath(path).split(sep) |