Issue #3187: Better support for "undecodable" filenames. Code by Victor

Stinner, with small tweaks by GvR.
author: Guido van Rossum <guido@python.org> 2008-10-02 18:55:37 (GMT)
committer: Guido van Rossum <guido@python.org> 2008-10-02 18:55:37 (GMT)
commit: f0af3e30db9475ab68bcb1f1ce0b5581e214df76 (patch)
tree: 71efbc67686d96e8c8a81dd97c75c419adf36657 /Lib/posixpath.py
parent: fefeca53eebe8665c08ac0c041639ada3c9f9446 (diff)
download: cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.zip
cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.gz
cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.bz2
1 files changed, 128 insertions, 43 deletions
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
index 575492f..745c920 100644
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -11,6 +11,7 @@ for manipulation of the pathname component of URLs.
 """
 
 import os
+import sys
 import stat
 import genericpath
 from genericpath import *
@@ -23,7 +24,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
            "devnull","realpath","supports_unicode_filenames","relpath"]
 
-# strings representing various path-related bits and pieces
+# Strings representing various path-related bits and pieces.
+# These are primarily for export; internally, they are hardcoded.
 curdir = '.'
 pardir = '..'
 extsep = '.'
@@ -33,6 +35,12 @@ defpath = ':/bin:/usr/bin'
 altsep = None
 devnull = '/dev/null'
 
+def _get_sep(path):
+    if isinstance(path, bytes):
+        return b'/'
+    else:
+        return '/'
+
 # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
 # On MS-DOS this may also turn slashes into backslashes; however, other
 # normalizations (such as optimizing '../' away) are not allowed
@@ -40,6 +48,7 @@ devnull = '/dev/null'
 
 def normcase(s):
     """Normalize case of pathname.  Has no effect under Posix"""
+    # TODO: on Mac OS X, this should really return s.lower().
     return s
 
 
@@ -48,7 +57,8 @@ def normcase(s):
 
 def isabs(s):
     """Test whether a path is absolute"""
-    return s.startswith('/')
+    sep = _get_sep(s)
+    return s.startswith(sep)
 
 
 # Join pathnames.
@@ -59,14 +69,15 @@ def join(a, *p):
     """Join two or more pathname components, inserting '/' as needed.
     If any component is an absolute path, all previous path components
     will be discarded."""
+    sep = _get_sep(a)
     path = a
     for b in p:
-        if b.startswith('/'):
+        if b.startswith(sep):
             path = b
-        elif path == '' or path.endswith('/'):
+        elif not path or path.endswith(sep):
             path +=  b
         else:
-            path += '/' + b
+            path += sep + b
     return path
 
 
@@ -78,10 +89,11 @@ def join(a, *p):
 def split(p):
     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
     everything after the final slash.  Either part may be empty."""
-    i = p.rfind('/') + 1
+    sep = _get_sep(p)
+    i = p.rfind(sep) + 1
     head, tail = p[:i], p[i:]
-    if head and head != '/'*len(head):
-        head = head.rstrip('/')
+    if head and head != sep*len(head):
+        head = head.rstrip(sep)
     return head, tail
 
 
@@ -91,7 +103,13 @@ def split(p):
 # It is always true that root + ext == p.
 
 def splitext(p):
-    return genericpath._splitext(p, sep, altsep, extsep)
+    if isinstance(p, bytes):
+        sep = b'/'
+        extsep = b'.'
+    else:
+        sep = '/'
+        extsep = '.'
+    return genericpath._splitext(p, sep, None, extsep)
 splitext.__doc__ = genericpath._splitext.__doc__
 
 # Split a pathname into a drive specification and the rest of the
@@ -100,14 +118,15 @@ splitext.__doc__ = genericpath._splitext.__doc__
 def splitdrive(p):
     """Split a pathname into drive and path. On Posix, drive is always
     empty."""
-    return '', p
+    return p[:0], p
 
 
 # Return the tail (basename) part of a path, same as split(path)[1].
 
 def basename(p):
     """Returns the final component of a pathname"""
-    i = p.rfind('/') + 1
+    sep = _get_sep(p)
+    i = p.rfind(sep) + 1
     return p[i:]
 
 
@@ -115,10 +134,11 @@ def basename(p):
 
 def dirname(p):
     """Returns the directory component of a pathname"""
-    i = p.rfind('/') + 1
+    sep = _get_sep(p)
+    i = p.rfind(sep) + 1
     head = p[:i]
-    if head and head != '/'*len(head):
-        head = head.rstrip('/')
+    if head and head != sep*len(head):
+        head = head.rstrip(sep)
     return head
 
 
@@ -179,7 +199,11 @@ def ismount(path):
     """Test whether a path is a mount point"""
     try:
         s1 = os.lstat(path)
-        s2 = os.lstat(join(path, '..'))
+        if isinstance(path, bytes):
+            parent = join(path, b'..')
+        else:
+            parent = join(path, '..')
+        s2 = os.lstat(parent)
     except os.error:
         return False # It doesn't exist -- so not a mount point :-)
     dev1 = s1.st_dev
@@ -205,9 +229,14 @@ def ismount(path):
 def expanduser(path):
     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
     do nothing."""
-    if not path.startswith('~'):
+    if isinstance(path, bytes):
+        tilde = b'~'
+    else:
+        tilde = '~'
+    if not path.startswith(tilde):
         return path
-    i = path.find('/', 1)
+    sep = _get_sep(path)
+    i = path.find(sep, 1)
     if i < 0:
         i = len(path)
     if i == 1:
@@ -218,12 +247,17 @@ def expanduser(path):
             userhome = os.environ['HOME']
     else:
         import pwd
+        name = path[1:i]
+        if isinstance(name, bytes):
+            name = str(name, 'ASCII')
         try:
-            pwent = pwd.getpwnam(path[1:i])
+            pwent = pwd.getpwnam(name)
         except KeyError:
             return path
         userhome = pwent.pw_dir
-    userhome = userhome.rstrip('/')
+    if isinstance(path, bytes):
+        userhome = userhome.encode(sys.getfilesystemencoding())
+    userhome = userhome.rstrip(sep)
     return userhome + path[i:]
 
 
@@ -232,28 +266,47 @@ def expanduser(path):
 # Non-existent variables are left unchanged.
 
 _varprog = None
+_varprogb = None
 
 def expandvars(path):
     """Expand shell variables of form $var and ${var}.  Unknown variables
     are left unchanged."""
-    global _varprog
-    if '$' not in path:
-        return path
-    if not _varprog:
-        import re
-        _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
+    global _varprog, _varprogb
+    if isinstance(path, bytes):
+        if b'$' not in path:
+            return path
+        if not _varprogb:
+            import re
+            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
+        search = _varprogb.search
+        start = b'{'
+        end = b'}'
+    else:
+        if '$' not in path:
+            return path
+        if not _varprog:
+            import re
+            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
+        search = _varprog.search
+        start = '{'
+        end = '}'
     i = 0
     while True:
-        m = _varprog.search(path, i)
+        m = search(path, i)
         if not m:
             break
         i, j = m.span(0)
         name = m.group(1)
-        if name.startswith('{') and name.endswith('}'):
+        if name.startswith(start) and name.endswith(end):
             name = name[1:-1]
+        if isinstance(name, bytes):
+            name = str(name, 'ASCII')
         if name in os.environ:
             tail = path[j:]
-            path = path[:i] + os.environ[name]
+            value = os.environ[name]
+            if isinstance(path, bytes):
+                value = value.encode('ASCII')
+            path = path[:i] + value
             i = len(path)
             path += tail
         else:
@@ -267,35 +320,49 @@ def expandvars(path):
 
 def normpath(path):
     """Normalize path, eliminating double slashes, etc."""
-    if path == '':
-        return '.'
-    initial_slashes = path.startswith('/')
+    if isinstance(path, bytes):
+        sep = b'/'
+        empty = b''
+        dot = b'.'
+        dotdot = b'..'
+    else:
+        sep = '/'
+        empty = ''
+        dot = '.'
+        dotdot = '..'
+    if path == empty:
+        return dot
+    initial_slashes = path.startswith(sep)
     # POSIX allows one or two initial slashes, but treats three or more
     # as single slash.
     if (initial_slashes and
-        path.startswith('//') and not path.startswith('///')):
+        path.startswith(sep*2) and not path.startswith(sep*3)):
         initial_slashes = 2
-    comps = path.split('/')
+    comps = path.split(sep)
     new_comps = []
     for comp in comps:
-        if comp in ('', '.'):
+        if comp in (empty, dot):
             continue
-        if (comp != '..' or (not initial_slashes and not new_comps) or
-             (new_comps and new_comps[-1] == '..')):
+        if (comp != dotdot or (not initial_slashes and not new_comps) or
+             (new_comps and new_comps[-1] == dotdot)):
             new_comps.append(comp)
         elif new_comps:
             new_comps.pop()
     comps = new_comps
-    path = '/'.join(comps)
+    path = sep.join(comps)
     if initial_slashes:
-        path = '/'*initial_slashes + path
-    return path or '.'
+        path = sep*initial_slashes + path
+    return path or dot
 
 
 def abspath(path):
     """Return an absolute path."""
     if not isabs(path):
-        path = join(os.getcwd(), path)
+        if isinstance(path, bytes):
+            cwd = os.getcwdb()
+        else:
+            cwd = os.getcwd()
+        path = join(cwd, path)
     return normpath(path)
 
 
@@ -305,10 +372,16 @@ def abspath(path):
 def realpath(filename):
     """Return the canonical path of the specified filename, eliminating any
 symbolic links encountered in the path."""
+    if isinstance(filename, bytes):
+        sep = b'/'
+        empty = b''
+    else:
+        sep = '/'
+        empty = ''
     if isabs(filename):
-        bits = ['/'] + filename.split('/')[1:]
+        bits = [sep] + filename.split(sep)[1:]
     else:
-        bits = [''] + filename.split('/')
+        bits = [empty] + filename.split(sep)
 
     for i in range(2, len(bits)+1):
         component = join(*bits[0:i])
@@ -347,12 +420,24 @@ def _resolve_link(path):
 
 supports_unicode_filenames = False
 
-def relpath(path, start=curdir):
+def relpath(path, start=None):
     """Return a relative version of a path"""
 
     if not path:
         raise ValueError("no path specified")
 
+    if isinstance(path, bytes):
+        curdir = b'.'
+        sep = b'/'
+        pardir = b'..'
+    else:
+        curdir = '.'
+        sep = '/'
+        pardir = '..'
+
+    if start is None:
+        start = curdir
+
     start_list = abspath(start).split(sep)
     path_list = abspath(path).split(sep)
author	Guido van Rossum <guido@python.org>	2008-10-02 18:55:37 (GMT)
committer	Guido van Rossum <guido@python.org>	2008-10-02 18:55:37 (GMT)
commit	f0af3e30db9475ab68bcb1f1ce0b5581e214df76 (patch)
tree	71efbc67686d96e8c8a81dd97c75c419adf36657 /Lib/posixpath.py
parent	fefeca53eebe8665c08ac0c041639ada3c9f9446 (diff)
download	cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.zip cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.gz cpython-f0af3e30db9475ab68bcb1f1ce0b5581e214df76.tar.bz2