From 84ae1180063a6f9fc39c22a5977b49aaac8c3b3c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 6 May 2010 22:05:07 +0000 Subject: Issue #8603: Create a bytes version of os.environ for Unix Create os.environb mapping and os.getenvb() function, os.unsetenv() encodes str argument to the file system encoding with the surrogateescape error handler (instead of utf8/strict) and accepts bytes, and posix.environ keys and values are bytes. --- Doc/library/os.rst | 28 ++++++++++++++- Doc/library/posix.rst | 17 +++++---- Lib/os.py | 87 +++++++++++++++++++++++++++++++++++---------- Lib/test/test_os.py | 21 +++++++++++ Lib/test/test_subprocess.py | 8 ++--- Misc/NEWS | 6 ++++ Modules/posixmodule.c | 79 +++++++++++++++++++++++++++------------- 7 files changed, 191 insertions(+), 55 deletions(-) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index c41ee1b..6ad4785 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -107,6 +107,10 @@ process and user. to modify the environment as well as query the environment. :func:`putenv` will be called automatically when the mapping is modified. + On Unix, keys and values use :func:`sys.getfilesystemencoding` and + ``'surrogateescape'`` error handler. Use :data:`environb` if you would like + to use a different encoding. + .. note:: Calling :func:`putenv` directly does not change ``os.environ``, so it's better @@ -128,6 +132,16 @@ process and user. one of the :meth:`pop` or :meth:`clear` methods is called. +.. data:: environb + + Bytes version of :data:`environ`: a mapping object representing the + environment as byte strings. :data:`environ` and :data:`environb` are + synchronized (modify :data:`environb` updates :data:`environ`, and vice + versa). + + Availability: Unix. + + .. function:: chdir(path) fchdir(fd) getcwd() @@ -251,7 +265,19 @@ process and user. .. function:: getenv(key, default=None) Return the value of the environment variable *key* if it exists, or - *default* if it doesn't. Availability: most flavors of Unix, Windows. + *default* if it doesn't. *key*, *default* and the result are str. + Availability: most flavors of Unix, Windows. + + On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding` + and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you + would like to use a different encoding. + + +.. function:: getenvb(key, default=None) + + Return the value of the environment variable *key* if it exists, or + *default* if it doesn't. *key*, *default* and the result are bytes. + Availability: most flavors of Unix. .. function:: putenv(key, value) diff --git a/Doc/library/posix.rst b/Doc/library/posix.rst index c33d9e5..d65b999 100644 --- a/Doc/library/posix.rst +++ b/Doc/library/posix.rst @@ -69,17 +69,22 @@ In addition to many functions described in the :mod:`os` module documentation, .. data:: environ A dictionary representing the string environment at the time the interpreter - was started. For example, ``environ['HOME']`` is the pathname of your home - directory, equivalent to ``getenv("HOME")`` in C. + was started. Keys and values are bytes on Unix and str on Windows. For + example, ``environ[b'HOME']`` (``environ['HOME']`` on Windows) is the + pathname of your home directory, equivalent to ``getenv("HOME")`` in C. Modifying this dictionary does not affect the string environment passed on by :func:`execv`, :func:`popen` or :func:`system`; if you need to change the environment, pass ``environ`` to :func:`execve` or add variable assignments and export statements to the command string for :func:`system` or :func:`popen`. + .. versionchanged:: 3.2 + On Unix, keys and values are bytes. + .. note:: - The :mod:`os` module provides an alternate implementation of ``environ`` which - updates the environment on modification. Note also that updating ``os.environ`` - will render this dictionary obsolete. Use of the :mod:`os` module version of - this is recommended over direct access to the :mod:`posix` module. + The :mod:`os` module provides an alternate implementation of ``environ`` + which updates the environment on modification. Note also that updating + :data:`os.environ` will render this dictionary obsolete. Use of the + :mod:`os` module version of this is recommended over direct access to the + :mod:`posix` module. diff --git a/Lib/os.py b/Lib/os.py index 7672d6f..3e2ee0d 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -387,29 +387,33 @@ def get_exec_path(env=None): from _abcoll import MutableMapping # Can't use collections (bootstrap) class _Environ(MutableMapping): - def __init__(self, environ, keymap, putenv, unsetenv): - self.keymap = keymap + def __init__(self, data, encodekey, decodekey, encodevalue, decodevalue, putenv, unsetenv): + self.encodekey = encodekey + self.decodekey = decodekey + self.encodevalue = encodevalue + self.decodevalue = decodevalue self.putenv = putenv self.unsetenv = unsetenv - self.data = data = {} - for key, value in environ.items(): - data[keymap(key)] = str(value) + self.data = data def __getitem__(self, key): - return self.data[self.keymap(key)] + value = self.data[self.encodekey(key)] + return self.decodevalue(value) def __setitem__(self, key, value): - value = str(value) + key = self.encodekey(key) + value = self.encodevalue(value) self.putenv(key, value) - self.data[self.keymap(key)] = value + self.data[key] = value def __delitem__(self, key): + key = self.encodekey(key) self.unsetenv(key) - del self.data[self.keymap(key)] + del self.data[key] def __iter__(self): for key in self.data: - yield key + yield self.decodekey(key) def __len__(self): return len(self.data) @@ -439,22 +443,67 @@ except NameError: else: __all__.append("unsetenv") -if name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE - _keymap = lambda key: str(key.upper()) -else: # Where Env Var Names Can Be Mixed Case - _keymap = lambda key: str(key) - -environ = _Environ(environ, _keymap, _putenv, _unsetenv) +def _createenviron(): + if name in ('os2', 'nt'): + # Where Env Var Names Must Be UPPERCASE + def check_str(value): + if not isinstance(value, str): + raise TypeError("str expected, not %s" % type(value).__name__) + return value + encode = check_str + decode = str + def encodekey(key): + return encode(key).upper() + data = {} + for key, value in environ.items(): + data[encodekey(key)] = value + else: + # Where Env Var Names Can Be Mixed Case + def encode(value): + if not isinstance(value, str): + raise TypeError("str expected, not %s" % type(value).__name__) + return value.encode(sys.getfilesystemencoding(), 'surrogateescape') + def decode(value): + return value.decode(sys.getfilesystemencoding(), 'surrogateescape') + encodekey = encode + data = environ + return _Environ(data, + encodekey, decode, + encode, decode, + _putenv, _unsetenv) + +# unicode environ +environ = _createenviron() +del _createenviron def getenv(key, default=None): """Get an environment variable, return None if it doesn't exist. - The optional second argument can specify an alternate default.""" - if isinstance(key, bytes): - key = key.decode(sys.getfilesystemencoding(), "surrogateescape") + The optional second argument can specify an alternate default. + key, default and the result are str.""" return environ.get(key, default) __all__.append("getenv") +if name not in ('os2', 'nt'): + def _check_bytes(value): + if not isinstance(value, bytes): + raise TypeError("bytes expected, not %s" % type(value).__name__) + return value + + # bytes environ + environb = _Environ(environ.data, + _check_bytes, bytes, + _check_bytes, bytes, + _putenv, _unsetenv) + del _check_bytes + + def getenvb(key, default=None): + """Get an environment variable, return None if it doesn't exist. + The optional second argument can specify an alternate default. + key, default and the result are bytes.""" + return environb.get(key, default) + __all__.append("getenvb") + def _exists(name): return name in globals() diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index b91f97b..49c6591 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -369,12 +369,15 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol): def setUp(self): self.__save = dict(os.environ) + self.__saveb = dict(os.environb) for key, value in self._reference().items(): os.environ[key] = value def tearDown(self): os.environ.clear() os.environ.update(self.__save) + os.environb.clear() + os.environb.update(self.__saveb) def _reference(self): return {"KEY1":"VALUE1", "KEY2":"VALUE2", "KEY3":"VALUE3"} @@ -439,6 +442,24 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol): # Supplied PATH environment variable self.assertSequenceEqual(test_path, os.get_exec_path(test_env)) + @unittest.skipIf(sys.platform == "win32", "POSIX specific test") + def test_environb(self): + # os.environ -> os.environb + value = 'euro\u20ac' + try: + value_bytes = value.encode(sys.getfilesystemencoding(), 'surrogateescape') + except UnicodeEncodeError: + raise unittest.SkipTest("U+20AC character is not encodable to %s" % sys.getfilesystemencoding()) + os.environ['unicode'] = value + self.assertEquals(os.environ['unicode'], value) + self.assertEquals(os.environb[b'unicode'], value_bytes) + + # os.environb -> os.environ + value = b'\xff' + os.environb[b'bytes'] = value + self.assertEquals(os.environb[b'bytes'], value) + value_str = value.decode(sys.getfilesystemencoding(), 'surrogateescape') + self.assertEquals(os.environ['bytes'], value_str) class WalkTests(unittest.TestCase): """Tests for os.walk().""" diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index be163fc..eb96706 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -803,8 +803,6 @@ class POSIXProcessTestCase(BaseTestCase): def test_undecodable_env(self): for key, value in (('test', 'abc\uDCFF'), ('test\uDCFF', '42')): - value_repr = repr(value).encode("ascii") - # test str with surrogates script = "import os; print(repr(os.getenv(%s)))" % repr(key) env = os.environ.copy() @@ -813,19 +811,19 @@ class POSIXProcessTestCase(BaseTestCase): [sys.executable, "-c", script], env=env) stdout = stdout.rstrip(b'\n\r') - self.assertEquals(stdout, value_repr) + self.assertEquals(stdout.decode('ascii'), repr(value)) # test bytes key = key.encode("ascii", "surrogateescape") value = value.encode("ascii", "surrogateescape") - script = "import os; print(repr(os.getenv(%s)))" % repr(key) + script = "import os; print(repr(os.getenvb(%s)))" % repr(key) env = os.environ.copy() env[key] = value stdout = subprocess.check_output( [sys.executable, "-c", script], env=env) stdout = stdout.rstrip(b'\n\r') - self.assertEquals(stdout, value_repr) + self.assertEquals(stdout.decode('ascii'), repr(value)) @unittest.skipUnless(mswindows, "Windows specific tests") diff --git a/Misc/NEWS b/Misc/NEWS index a48f8ed..fdb9253 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -348,6 +348,12 @@ C-API Library ------- +- Issue #8603: Create a bytes version of os.environ for Unix: create + os.environb mapping and os.getenvb() function, os.unsetenv() encodes str + argument to the file system encoding with the surrogateescape error handler + (instead of utf8/strict) and accepts bytes, and posix.environ keys and values + are bytes. + - Issue #8573: asyncore _strerror() function might throw ValueError. - Issue #8483: asyncore.dispatcher's __getattr__ method produced confusing diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index ac866d7..0d6f8f0 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -498,14 +498,12 @@ convertenviron(void) char *p = strchr(*e, '='); if (p == NULL) continue; - k = PyUnicode_Decode(*e, (int)(p-*e), - Py_FileSystemDefaultEncoding, "surrogateescape"); + k = PyBytes_FromStringAndSize(*e, (int)(p-*e)); if (k == NULL) { PyErr_Clear(); continue; } - v = PyUnicode_Decode(p+1, strlen(p+1), - Py_FileSystemDefaultEncoding, "surrogateescape"); + v = PyBytes_FromStringAndSize(p+1, strlen(p+1)); if (v == NULL) { PyErr_Clear(); Py_DECREF(k); @@ -5301,7 +5299,7 @@ posix_putenv(PyObject *self, PyObject *args) char *s1, *s2; char *newenv; #endif - PyObject *newstr; + PyObject *newstr = NULL; size_t len; #ifdef MS_WINDOWS @@ -5324,15 +5322,19 @@ posix_putenv(PyObject *self, PyObject *args) APIRET rc; rc = DosSetExtLIBPATH(s2, BEGIN_LIBPATH); - if (rc != NO_ERROR) - return os2_error(rc); + if (rc != NO_ERROR) { + os2_error(rc); + goto error; + } } else if (stricmp(s1, "ENDLIBPATH") == 0) { APIRET rc; rc = DosSetExtLIBPATH(s2, END_LIBPATH); - if (rc != NO_ERROR) - return os2_error(rc); + if (rc != NO_ERROR) { + os2_error(rc); + goto error; + } } else { #endif /* XXX This can leak memory -- not easy to fix :-( */ @@ -5342,36 +5344,40 @@ posix_putenv(PyObject *self, PyObject *args) len = wcslen(s1) + wcslen(s2) + 2; newstr = PyUnicode_FromUnicode(NULL, (int)len - 1); #else - len = strlen(s1) + strlen(s2) + 2; + len = PyBytes_GET_SIZE(os1) + PyBytes_GET_SIZE(os2) + 2; newstr = PyBytes_FromStringAndSize(NULL, (int)len - 1); #endif - if (newstr == NULL) - return PyErr_NoMemory(); + if (newstr == NULL) { + PyErr_NoMemory(); + goto error; + } #ifdef MS_WINDOWS newenv = PyUnicode_AsUnicode(newstr); _snwprintf(newenv, len, L"%s=%s", s1, s2); if (_wputenv(newenv)) { - Py_DECREF(newstr); posix_error(); - return NULL; + goto error; } #else newenv = PyBytes_AS_STRING(newstr); PyOS_snprintf(newenv, len, "%s=%s", s1, s2); if (putenv(newenv)) { - Py_DECREF(newstr); - Py_DECREF(os1); - Py_DECREF(os2); posix_error(); - return NULL; + goto error; } #endif + /* Install the first arg and newstr in posix_putenv_garbage; * this will cause previous value to be collected. This has to * happen after the real putenv() call because the old value * was still accessible until then. */ if (PyDict_SetItem(posix_putenv_garbage, - PyTuple_GET_ITEM(args, 0), newstr)) { +#ifdef MS_WINDOWS + PyTuple_GET_ITEM(args, 0), +#else + os1, +#endif + newstr)) { /* really not much we can do; just leak */ PyErr_Clear(); } @@ -5382,12 +5388,20 @@ posix_putenv(PyObject *self, PyObject *args) #if defined(PYOS_OS2) } #endif + #ifndef MS_WINDOWS Py_DECREF(os1); Py_DECREF(os2); #endif - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; + +error: +#ifndef MS_WINDOWS + Py_DECREF(os1); + Py_DECREF(os2); +#endif + Py_XDECREF(newstr); + return NULL; } #endif /* putenv */ @@ -5399,10 +5413,20 @@ Delete an environment variable."); static PyObject * posix_unsetenv(PyObject *self, PyObject *args) { +#ifdef MS_WINDOWS char *s1; if (!PyArg_ParseTuple(args, "s:unsetenv", &s1)) return NULL; +#else + PyObject *os1; + char *s1; + + if (!PyArg_ParseTuple(args, "O&:unsetenv", + PyUnicode_FSConverter, &os1)) + return NULL; + s1 = PyBytes_AsString(os1); +#endif unsetenv(s1); @@ -5412,13 +5436,20 @@ posix_unsetenv(PyObject *self, PyObject *args) * old value was still accessible until then. */ if (PyDict_DelItem(posix_putenv_garbage, - PyTuple_GET_ITEM(args, 0))) { +#ifdef MS_WINDOWS + PyTuple_GET_ITEM(args, 0) +#else + os1 +#endif + )) { /* really not much we can do; just leak */ PyErr_Clear(); } - Py_INCREF(Py_None); - return Py_None; +#ifndef MS_WINDOWS + Py_DECREF(os1); +#endif + Py_RETURN_NONE; } #endif /* unsetenv */ -- cgit v0.12