summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-02-19 21:27:37 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-02-19 21:27:37 (GMT)
commit2ac9d3110898a1cfc779dd436f05cd6ac231cbb3 (patch)
treeb62d200630b8675414e833a7f1200cd424cf7ebe
parentc77d4ba85b4d2f08b4de094bc9d9104113772374 (diff)
downloadcpython-2ac9d3110898a1cfc779dd436f05cd6ac231cbb3.zip
cpython-2ac9d3110898a1cfc779dd436f05cd6ac231cbb3.tar.gz
cpython-2ac9d3110898a1cfc779dd436f05cd6ac231cbb3.tar.bz2
Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment
variables names and values.
-rw-r--r--Lib/ntpath.py25
-rw-r--r--Lib/posixpath.py27
-rw-r--r--Lib/test/test_genericpath.py29
-rw-r--r--Lib/test/test_ntpath.py36
-rw-r--r--Lib/test/test_support.py46
-rw-r--r--Misc/NEWS3
6 files changed, 145 insertions, 21 deletions
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index fc9463c..42469fe 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -294,6 +294,13 @@ def expandvars(path):
return path
import string
varchars = string.ascii_letters + string.digits + '_-'
+ if isinstance(path, unicode):
+ encoding = sys.getfilesystemencoding()
+ def getenv(var):
+ return os.environ[var.encode(encoding)].decode(encoding)
+ else:
+ def getenv(var):
+ return os.environ[var]
res = ''
index = 0
pathlen = len(path)
@@ -322,9 +329,9 @@ def expandvars(path):
index = pathlen - 1
else:
var = path[:index]
- if var in os.environ:
- res = res + os.environ[var]
- else:
+ try:
+ res = res + getenv(var)
+ except KeyError:
res = res + '%' + var + '%'
elif c == '$': # variable or '$$'
if path[index + 1:index + 2] == '$':
@@ -336,9 +343,9 @@ def expandvars(path):
try:
index = path.index('}')
var = path[:index]
- if var in os.environ:
- res = res + os.environ[var]
- else:
+ try:
+ res = res + getenv(var)
+ except KeyError:
res = res + '${' + var + '}'
except ValueError:
res = res + '${' + path
@@ -351,9 +358,9 @@ def expandvars(path):
var = var + c
index = index + 1
c = path[index:index + 1]
- if var in os.environ:
- res = res + os.environ[var]
- else:
+ try:
+ res = res + getenv(var)
+ except KeyError:
res = res + '$' + var
if c != '':
index = index - 1
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
index d65dc75..0378004 100644
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -285,28 +285,43 @@ def expanduser(path):
# Non-existent variables are left unchanged.
_varprog = None
+_uvarprog = None
def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged."""
- global _varprog
+ global _varprog, _uvarprog
if '$' not in path:
return path
- if not _varprog:
- import re
- _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
+ if isinstance(path, _unicode):
+ if not _varprog:
+ import re
+ _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
+ varprog = _varprog
+ encoding = sys.getfilesystemencoding()
+ else:
+ if not _uvarprog:
+ import re
+ _uvarprog = re.compile(_unicode(r'\$(\w+|\{[^}]*\})'), re.UNICODE)
+ varprog = _uvarprog
+ encoding = None
i = 0
while True:
- m = _varprog.search(path, i)
+ m = varprog.search(path, i)
if not m:
break
i, j = m.span(0)
name = m.group(1)
if name.startswith('{') and name.endswith('}'):
name = name[1:-1]
+ if encoding:
+ name = name.encode(encoding)
if name in os.environ:
tail = path[j:]
- path = path[:i] + os.environ[name]
+ value = os.environ[name]
+ if encoding:
+ value = value.decode(encoding)
+ path = path[:i] + value
i = len(path)
path += tail
else:
diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py
index 3975b56..94380b1 100644
--- a/Lib/test/test_genericpath.py
+++ b/Lib/test/test_genericpath.py
@@ -199,13 +199,40 @@ class CommonTest(GenericTest):
self.assertEqual(expandvars("$[foo]bar"), "$[foo]bar")
self.assertEqual(expandvars("$bar bar"), "$bar bar")
self.assertEqual(expandvars("$?bar"), "$?bar")
- self.assertEqual(expandvars("${foo}bar"), "barbar")
self.assertEqual(expandvars("$foo}bar"), "bar}bar")
self.assertEqual(expandvars("${foo"), "${foo")
self.assertEqual(expandvars("${{foo}}"), "baz1}")
self.assertEqual(expandvars("$foo$foo"), "barbar")
self.assertEqual(expandvars("$bar$bar"), "$bar$bar")
+ @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
+ def test_expandvars_nonascii(self):
+ if self.pathmodule.__name__ == 'macpath':
+ self.skipTest('macpath.expandvars is a stub')
+ expandvars = self.pathmodule.expandvars
+ def check(value, expected):
+ self.assertEqual(expandvars(value), expected)
+ encoding = sys.getfilesystemencoding()
+ with test_support.EnvironmentVarGuard() as env:
+ env.clear()
+ unonascii = test_support.FS_NONASCII
+ snonascii = unonascii.encode(encoding)
+ env['spam'] = snonascii
+ env[snonascii] = 'ham' + snonascii
+ check(snonascii, snonascii)
+ check('$spam bar', '%s bar' % snonascii)
+ check('${spam}bar', '%sbar' % snonascii)
+ check('${%s}bar' % snonascii, 'ham%sbar' % snonascii)
+ check('$bar%s bar' % snonascii, '$bar%s bar' % snonascii)
+ check('$spam}bar', '%s}bar' % snonascii)
+
+ check(unonascii, unonascii)
+ check(u'$spam bar', u'%s bar' % unonascii)
+ check(u'${spam}bar', u'%sbar' % unonascii)
+ check(u'${%s}bar' % unonascii, u'ham%sbar' % unonascii)
+ check(u'$bar%s bar' % unonascii, u'$bar%s bar' % unonascii)
+ check(u'$spam}bar', u'%s}bar' % unonascii)
+
def test_abspath(self):
self.assertIn("foo", self.pathmodule.abspath("foo"))
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 11e66fb..1f1a971 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -1,16 +1,19 @@
import ntpath
import os
+import sys
from test.test_support import TestFailed
from test import test_support, test_genericpath
import unittest
+def tester0(fn, wantResult):
+ gotResult = eval(fn)
+ if wantResult != gotResult:
+ raise TestFailed, "%s should return: %r but returned: %r" \
+ %(fn, wantResult, gotResult)
def tester(fn, wantResult):
fn = fn.replace("\\", "\\\\")
- gotResult = eval(fn)
- if wantResult != gotResult:
- raise TestFailed, "%s should return: %s but returned: %s" \
- %(str(fn), str(wantResult), str(gotResult))
+ tester0(fn, wantResult)
class TestNtpath(unittest.TestCase):
@@ -173,7 +176,6 @@ class TestNtpath(unittest.TestCase):
tester('ntpath.expandvars("$[foo]bar")', "$[foo]bar")
tester('ntpath.expandvars("$bar bar")', "$bar bar")
tester('ntpath.expandvars("$?bar")', "$?bar")
- tester('ntpath.expandvars("${foo}bar")', "barbar")
tester('ntpath.expandvars("$foo}bar")', "bar}bar")
tester('ntpath.expandvars("${foo")', "${foo")
tester('ntpath.expandvars("${{foo}}")', "baz1}")
@@ -187,6 +189,30 @@ class TestNtpath(unittest.TestCase):
tester('ntpath.expandvars("%foo%%bar")', "bar%bar")
tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar")
+ @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
+ def test_expandvars_nonascii(self):
+ encoding = sys.getfilesystemencoding()
+ def check(value, expected):
+ tester0("ntpath.expandvars(%r)" % value, expected)
+ tester0("ntpath.expandvars(%r)" % value.decode(encoding),
+ expected.decode(encoding))
+ with test_support.EnvironmentVarGuard() as env:
+ env.clear()
+ unonascii = test_support.FS_NONASCII
+ snonascii = unonascii.encode(encoding)
+ env['spam'] = snonascii
+ env[snonascii] = 'ham' + snonascii
+ check('$spam bar', '%s bar' % snonascii)
+ check('$%s bar' % snonascii, '$%s bar' % snonascii)
+ check('${spam}bar', '%sbar' % snonascii)
+ check('${%s}bar' % snonascii, 'ham%sbar' % snonascii)
+ check('$spam}bar', '%s}bar' % snonascii)
+ check('$%s}bar' % snonascii, '$%s}bar' % snonascii)
+ check('%spam% bar', '%s bar' % snonascii)
+ check('%{}% bar'.format(snonascii), 'ham%s bar' % snonascii)
+ check('%spam%bar', '%sbar' % snonascii)
+ check('%{}%bar'.format(snonascii), 'ham%sbar' % snonascii)
+
def test_abspath(self):
# ntpath.abspath() can only be used on a system with the "nt" module
# (reasonably), so we protect this test with "import nt". This allows
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
index a40a593..44b4224 100644
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -465,6 +465,52 @@ except NameError:
is_jython = sys.platform.startswith('java')
+# FS_NONASCII: non-ASCII Unicode character encodable by
+# sys.getfilesystemencoding(), or None if there is no such character.
+FS_NONASCII = None
+if have_unicode:
+ for character in (
+ # First try printable and common characters to have a readable filename.
+ # For each character, the encoding list are just example of encodings able
+ # to encode the character (the list is not exhaustive).
+
+ # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
+ unichr(0x00E6),
+ # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
+ unichr(0x0130),
+ # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
+ unichr(0x0141),
+ # U+03C6 (Greek Small Letter Phi): cp1253
+ unichr(0x03C6),
+ # U+041A (Cyrillic Capital Letter Ka): cp1251
+ unichr(0x041A),
+ # U+05D0 (Hebrew Letter Alef): Encodable to cp424
+ unichr(0x05D0),
+ # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
+ unichr(0x060C),
+ # U+062A (Arabic Letter Teh): cp720
+ unichr(0x062A),
+ # U+0E01 (Thai Character Ko Kai): cp874
+ unichr(0x0E01),
+
+ # Then try more "special" characters. "special" because they may be
+ # interpreted or displayed differently depending on the exact locale
+ # encoding and the font.
+
+ # U+00A0 (No-Break Space)
+ unichr(0x00A0),
+ # U+20AC (Euro Sign)
+ unichr(0x20AC),
+ ):
+ try:
+ character.encode(sys.getfilesystemencoding())\
+ .decode(sys.getfilesystemencoding())
+ except UnicodeError:
+ pass
+ else:
+ FS_NONASCII = character
+ break
+
# Filename used for testing
if os.name == 'java':
# Jython disallows @ in module names
diff --git a/Misc/NEWS b/Misc/NEWS
index b66f945..2593500 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -39,6 +39,9 @@ Core and Builtins
Library
-------
+- Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment
+ variables names and values.
+
- Issue #20635: Fixed grid_columnconfigure() and grid_rowconfigure() methods of
Tkinter widgets to work in wantobjects=True mode.