summaryrefslogtreecommitdiffstats
path: root/Lib/posixpath.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2008-10-02 18:55:37 (GMT)
committerGuido van Rossum <guido@python.org>2008-10-02 18:55:37 (GMT)
commitf0af3e30db9475ab68bcb1f1ce0b5581e214df76 (patch)
tree71efbc67686d96e8c8a81dd97c75c419adf36657 /Lib/posixpath.py
parentfefeca53eebe8665c08ac0c041639ada3c9f9446 (diff)
downloadcpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.zip
cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.gz
cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.bz2
Issue #3187: Better support for "undecodable" filenames. Code by Victor
Stinner, with small tweaks by GvR.
Diffstat (limited to 'Lib/posixpath.py')
-rw-r--r--Lib/posixpath.py171
1 files changed, 128 insertions, 43 deletions
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
index 575492f..745c920 100644
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -11,6 +11,7 @@ for manipulation of the pathname component of URLs.
"""
import os
+import sys
import stat
import genericpath
from genericpath import *
@@ -23,7 +24,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"curdir","pardir","sep","pathsep","defpath","altsep","extsep",
"devnull","realpath","supports_unicode_filenames","relpath"]
-# strings representing various path-related bits and pieces
+# Strings representing various path-related bits and pieces.
+# These are primarily for export; internally, they are hardcoded.
curdir = '.'
pardir = '..'
extsep = '.'
@@ -33,6 +35,12 @@ defpath = ':/bin:/usr/bin'
altsep = None
devnull = '/dev/null'
+def _get_sep(path):
+ if isinstance(path, bytes):
+ return b'/'
+ else:
+ return '/'
+
# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
# On MS-DOS this may also turn slashes into backslashes; however, other
# normalizations (such as optimizing '../' away) are not allowed
@@ -40,6 +48,7 @@ devnull = '/dev/null'
def normcase(s):
"""Normalize case of pathname. Has no effect under Posix"""
+ # TODO: on Mac OS X, this should really return s.lower().
return s
@@ -48,7 +57,8 @@ def normcase(s):
def isabs(s):
"""Test whether a path is absolute"""
- return s.startswith('/')
+ sep = _get_sep(s)
+ return s.startswith(sep)
# Join pathnames.
@@ -59,14 +69,15 @@ def join(a, *p):
"""Join two or more pathname components, inserting '/' as needed.
If any component is an absolute path, all previous path components
will be discarded."""
+ sep = _get_sep(a)
path = a
for b in p:
- if b.startswith('/'):
+ if b.startswith(sep):
path = b
- elif path == '' or path.endswith('/'):
+ elif not path or path.endswith(sep):
path += b
else:
- path += '/' + b
+ path += sep + b
return path
@@ -78,10 +89,11 @@ def join(a, *p):
def split(p):
"""Split a pathname. Returns tuple "(head, tail)" where "tail" is
everything after the final slash. Either part may be empty."""
- i = p.rfind('/') + 1
+ sep = _get_sep(p)
+ i = p.rfind(sep) + 1
head, tail = p[:i], p[i:]
- if head and head != '/'*len(head):
- head = head.rstrip('/')
+ if head and head != sep*len(head):
+ head = head.rstrip(sep)
return head, tail
@@ -91,7 +103,13 @@ def split(p):
# It is always true that root + ext == p.
def splitext(p):
- return genericpath._splitext(p, sep, altsep, extsep)
+ if isinstance(p, bytes):
+ sep = b'/'
+ extsep = b'.'
+ else:
+ sep = '/'
+ extsep = '.'
+ return genericpath._splitext(p, sep, None, extsep)
splitext.__doc__ = genericpath._splitext.__doc__
# Split a pathname into a drive specification and the rest of the
@@ -100,14 +118,15 @@ splitext.__doc__ = genericpath._splitext.__doc__
def splitdrive(p):
"""Split a pathname into drive and path. On Posix, drive is always
empty."""
- return '', p
+ return p[:0], p
# Return the tail (basename) part of a path, same as split(path)[1].
def basename(p):
"""Returns the final component of a pathname"""
- i = p.rfind('/') + 1
+ sep = _get_sep(p)
+ i = p.rfind(sep) + 1
return p[i:]
@@ -115,10 +134,11 @@ def basename(p):
def dirname(p):
"""Returns the directory component of a pathname"""
- i = p.rfind('/') + 1
+ sep = _get_sep(p)
+ i = p.rfind(sep) + 1
head = p[:i]
- if head and head != '/'*len(head):
- head = head.rstrip('/')
+ if head and head != sep*len(head):
+ head = head.rstrip(sep)
return head
@@ -179,7 +199,11 @@ def ismount(path):
"""Test whether a path is a mount point"""
try:
s1 = os.lstat(path)
- s2 = os.lstat(join(path, '..'))
+ if isinstance(path, bytes):
+ parent = join(path, b'..')
+ else:
+ parent = join(path, '..')
+ s2 = os.lstat(parent)
except os.error:
return False # It doesn't exist -- so not a mount point :-)
dev1 = s1.st_dev
@@ -205,9 +229,14 @@ def ismount(path):
def expanduser(path):
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
do nothing."""
- if not path.startswith('~'):
+ if isinstance(path, bytes):
+ tilde = b'~'
+ else:
+ tilde = '~'
+ if not path.startswith(tilde):
return path
- i = path.find('/', 1)
+ sep = _get_sep(path)
+ i = path.find(sep, 1)
if i < 0:
i = len(path)
if i == 1:
@@ -218,12 +247,17 @@ def expanduser(path):
userhome = os.environ['HOME']
else:
import pwd
+ name = path[1:i]
+ if isinstance(name, bytes):
+ name = str(name, 'ASCII')
try:
- pwent = pwd.getpwnam(path[1:i])
+ pwent = pwd.getpwnam(name)
except KeyError:
return path
userhome = pwent.pw_dir
- userhome = userhome.rstrip('/')
+ if isinstance(path, bytes):
+ userhome = userhome.encode(sys.getfilesystemencoding())
+ userhome = userhome.rstrip(sep)
return userhome + path[i:]
@@ -232,28 +266,47 @@ def expanduser(path):
# Non-existent variables are left unchanged.
_varprog = None
+_varprogb = None
def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged."""
- global _varprog
- if '$' not in path:
- return path
- if not _varprog:
- import re
- _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
+ global _varprog, _varprogb
+ if isinstance(path, bytes):
+ if b'$' not in path:
+ return path
+ if not _varprogb:
+ import re
+ _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
+ search = _varprogb.search
+ start = b'{'
+ end = b'}'
+ else:
+ if '$' not in path:
+ return path
+ if not _varprog:
+ import re
+ _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
+ search = _varprog.search
+ start = '{'
+ end = '}'
i = 0
while True:
- m = _varprog.search(path, i)
+ m = search(path, i)
if not m:
break
i, j = m.span(0)
name = m.group(1)
- if name.startswith('{') and name.endswith('}'):
+ if name.startswith(start) and name.endswith(end):
name = name[1:-1]
+ if isinstance(name, bytes):
+ name = str(name, 'ASCII')
if name in os.environ:
tail = path[j:]
- path = path[:i] + os.environ[name]
+ value = os.environ[name]
+ if isinstance(path, bytes):
+ value = value.encode('ASCII')
+ path = path[:i] + value
i = len(path)
path += tail
else:
@@ -267,35 +320,49 @@ def expandvars(path):
def normpath(path):
"""Normalize path, eliminating double slashes, etc."""
- if path == '':
- return '.'
- initial_slashes = path.startswith('/')
+ if isinstance(path, bytes):
+ sep = b'/'
+ empty = b''
+ dot = b'.'
+ dotdot = b'..'
+ else:
+ sep = '/'
+ empty = ''
+ dot = '.'
+ dotdot = '..'
+ if path == empty:
+ return dot
+ initial_slashes = path.startswith(sep)
# POSIX allows one or two initial slashes, but treats three or more
# as single slash.
if (initial_slashes and
- path.startswith('//') and not path.startswith('///')):
+ path.startswith(sep*2) and not path.startswith(sep*3)):
initial_slashes = 2
- comps = path.split('/')
+ comps = path.split(sep)
new_comps = []
for comp in comps:
- if comp in ('', '.'):
+ if comp in (empty, dot):
continue
- if (comp != '..' or (not initial_slashes and not new_comps) or
- (new_comps and new_comps[-1] == '..')):
+ if (comp != dotdot or (not initial_slashes and not new_comps) or
+ (new_comps and new_comps[-1] == dotdot)):
new_comps.append(comp)
elif new_comps:
new_comps.pop()
comps = new_comps
- path = '/'.join(comps)
+ path = sep.join(comps)
if initial_slashes:
- path = '/'*initial_slashes + path
- return path or '.'
+ path = sep*initial_slashes + path
+ return path or dot
def abspath(path):
"""Return an absolute path."""
if not isabs(path):
- path = join(os.getcwd(), path)
+ if isinstance(path, bytes):
+ cwd = os.getcwdb()
+ else:
+ cwd = os.getcwd()
+ path = join(cwd, path)
return normpath(path)
@@ -305,10 +372,16 @@ def abspath(path):
def realpath(filename):
"""Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path."""
+ if isinstance(filename, bytes):
+ sep = b'/'
+ empty = b''
+ else:
+ sep = '/'
+ empty = ''
if isabs(filename):
- bits = ['/'] + filename.split('/')[1:]
+ bits = [sep] + filename.split(sep)[1:]
else:
- bits = [''] + filename.split('/')
+ bits = [empty] + filename.split(sep)
for i in range(2, len(bits)+1):
component = join(*bits[0:i])
@@ -347,12 +420,24 @@ def _resolve_link(path):
supports_unicode_filenames = False
-def relpath(path, start=curdir):
+def relpath(path, start=None):
"""Return a relative version of a path"""
if not path:
raise ValueError("no path specified")
+ if isinstance(path, bytes):
+ curdir = b'.'
+ sep = b'/'
+ pardir = b'..'
+ else:
+ curdir = '.'
+ sep = '/'
+ pardir = '..'
+
+ if start is None:
+ start = curdir
+
start_list = abspath(start).split(sep)
path_list = abspath(path).split(sep)