diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2008-06-01 07:20:46 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2008-06-01 07:20:46 (GMT) |
commit | 99815892f685e9ab20dfdade9c1e8a295139140c (patch) | |
tree | 92d2aeaee0a925df71471b296c5fa7ff28a451b8 | |
parent | 7f7ca35f5bf22b698135de62d2179a13f5c94c7f (diff) | |
download | cpython-99815892f685e9ab20dfdade9c1e8a295139140c.zip cpython-99815892f685e9ab20dfdade9c1e8a295139140c.tar.gz cpython-99815892f685e9ab20dfdade9c1e8a295139140c.tar.bz2 |
New environment variable PYTHONIOENCODING.
-rw-r--r-- | Doc/c-api/file.rst | 8 | ||||
-rw-r--r-- | Doc/library/stdtypes.rst | 7 | ||||
-rw-r--r-- | Doc/using/cmdline.rst | 7 | ||||
-rw-r--r-- | Include/fileobject.h | 2 | ||||
-rw-r--r-- | Lib/test/test_sys.py | 20 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Modules/main.c | 1 | ||||
-rw-r--r-- | Objects/fileobject.c | 35 | ||||
-rw-r--r-- | Python/pythonrun.c | 103 | ||||
-rw-r--r-- | Python/sysmodule.c | 20 |
10 files changed, 153 insertions, 52 deletions
diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index e107268..aa753e5 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -130,6 +130,14 @@ change in future releases of Python. .. versionadded:: 2.3 +.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors) + + Set the file's encoding for Unicode output to *enc*, and its error + mode to *err*. Return 1 on success and 0 on failure. + + .. versionadded:: 2.6 + + .. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag) .. index:: single: softspace (file attribute) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index c6679fd..c96ef10 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2165,6 +2165,13 @@ the particular object. .. versionadded:: 2.3 +.. attribute:: file.errors + + The Unicode error handler used to along with the encoding. + + .. versionadded:: 2.6 + + .. attribute:: file.mode The I/O mode for the file. If the file was created using the :func:`open` diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index a6ab7ad..7d9ecad 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -481,6 +481,13 @@ These environment variables influence Python's behavior. .. versionadded:: 2.6 +.. envvar:: PYTHONIOENCODING + + Overrides the encoding used for stdin/stdout/stderr, in the syntax + encodingname:errorhandler, with the :errors part being optional. + + .. versionadded:: 2.6 + .. envvar:: PYTHONNOUSERSITE diff --git a/Include/fileobject.h b/Include/fileobject.h index 56fae81..56cf40a 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -24,6 +24,7 @@ typedef struct { int f_newlinetypes; /* Types of newlines seen */ int f_skipnextlf; /* Skip next \n */ PyObject *f_encoding; + PyObject *f_errors; PyObject *weakreflist; /* List of weak references */ int unlocked_count; /* Num. currently running sections of code using f_fp with the GIL released. */ @@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type; PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *); +PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors); PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, int (*)(FILE *)); PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 975795a..a4d8a72 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -385,6 +385,26 @@ class SysModuleTest(unittest.TestCase): ## self.assert_(r[0][2] > 100, r[0][2]) ## self.assert_(r[1][2] > 100, r[1][2]) + def test_ioencoding(self): + import subprocess,os + env = dict(os.environ) + + # Test character: cent sign, encoded as 0x4A (ASCII J) in CP424, + # not representable in ASCII. + + env["PYTHONIOENCODING"] = "cp424" + p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'], + stdout = subprocess.PIPE, env=env) + out = p.stdout.read().strip() + self.assertEqual(out, unichr(0xa2).encode("cp424")) + + env["PYTHONIOENCODING"] = "ascii:replace" + p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'], + stdout = subprocess.PIPE, env=env) + out = p.stdout.read().strip() + self.assertEqual(out, '?') + + def test_main(): test.test_support.run_unittest(SysModuleTest) @@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1? Core and Builtins ----------------- +- New environment variable PYTHONIOENCODING. + - Patch #2488: Add sys.maxsize. - Issue #2353: file.xreadlines() now emits a Py3k warning. diff --git a/Modules/main.c b/Modules/main.c index dc94a09..c1c4b45 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -99,6 +99,7 @@ static char *usage_5 = "\ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\ The default module search path uses %s.\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ +PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ "; diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 86f3a14..4ea97f5 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, Py_DECREF(f->f_name); Py_DECREF(f->f_mode); Py_DECREF(f->f_encoding); + Py_DECREF(f->f_errors); Py_INCREF(name); f->f_name = name; @@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, f->f_skipnextlf = 0; Py_INCREF(Py_None); f->f_encoding = Py_None; + Py_INCREF(Py_None); + f->f_errors = Py_None; if (f->f_mode == NULL) return NULL; @@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize) } /* Set the encoding used to output Unicode strings. - Returh 1 on success, 0 on failure. */ + Return 1 on success, 0 on failure. */ int PyFile_SetEncoding(PyObject *f, const char *enc) { + return PyFile_SetEncodingAndErrors(f, enc, NULL); +} + +int +PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors) +{ PyFileObject *file = (PyFileObject*)f; - PyObject *str = PyBytes_FromString(enc); + PyObject *str, *oerrors; assert(PyFile_Check(f)); + str = PyBytes_FromString(enc); if (!str) return 0; + if (errors) { + oerrors = PyString_FromString(errors); + if (!oerrors) { + Py_DECREF(str); + return 0; + } + } else { + oerrors = Py_None; + Py_INCREF(Py_None); + } Py_DECREF(file->f_encoding); file->f_encoding = str; + Py_DECREF(file->f_errors); + file->f_errors = oerrors; return 1; } @@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f) Py_XDECREF(f->f_name); Py_XDECREF(f->f_mode); Py_XDECREF(f->f_encoding); + Py_XDECREF(f->f_errors); drop_readahead(f); Py_TYPE(f)->tp_free((PyObject *)f); } @@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = { "file name"}, {"encoding", T_OBJECT, OFF(f_encoding), RO, "file encoding"}, + {"errors", T_OBJECT, OFF(f_errors), RO, + "Unicode error handler"}, /* getattr(f, "closed") is implemented without this table */ {NULL} /* Sentinel */ }; @@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ((PyFileObject *)self)->f_mode = not_yet_string; Py_INCREF(Py_None); ((PyFileObject *)self)->f_encoding = Py_None; + Py_INCREF(Py_None); + ((PyFileObject *)self)->f_errors = Py_None; ((PyFileObject *)self)->weakreflist = NULL; ((PyFileObject *)self)->unlocked_count = 0; } @@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags) if ((flags & Py_PRINT_RAW) && PyUnicode_Check(v) && enc != Py_None) { char *cenc = PyBytes_AS_STRING(enc); - value = PyUnicode_AsEncodedString(v, cenc, "strict"); + char *errors = fobj->f_errors == Py_None ? + "strict" : PyBytes_AS_STRING(fobj->f_errors); + value = PyUnicode_AsEncodedString(v, cenc, errors); if (value == NULL) return -1; } else { diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 6a9cb25..c30bf65 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -132,11 +132,20 @@ Py_InitializeEx(int install_sigs) PyThreadState *tstate; PyObject *bimod, *sysmod; char *p; + char *icodeset; /* On Windows, input codeset may theoretically + differ from output codeset. */ + char *codeset = NULL; + char *errors = NULL; + int free_codeset = 0; + int overridden = 0; #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET) - char *codeset; - char *saved_locale; + char *saved_locale, *loc_codeset; PyObject *sys_stream, *sys_isatty; #endif +#ifdef MS_WINDOWS + char ibuf[128]; + char buf[128]; +#endif extern void _Py_ReadyTypes(void); if (initialized) @@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs) _PyGILState_Init(interp, tstate); #endif /* WITH_THREAD */ + if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') { + p = icodeset = codeset = strdup(p); + free_codeset = 1; + errors = strchr(p, ':'); + if (errors) { + *errors = '\0'; + errors++; + } + overridden = 1; + } + #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET) /* On Unix, set the file system encoding according to the user's preference, if the CODESET names a well-known Python codec, and Py_FileSystemDefaultEncoding isn't initialized by other means. Also set the encoding of - stdin and stdout if these are terminals. */ - - saved_locale = strdup(setlocale(LC_CTYPE, NULL)); - setlocale(LC_CTYPE, ""); - codeset = nl_langinfo(CODESET); - if (codeset && *codeset) { - PyObject *enc = PyCodec_Encoder(codeset); - if (enc) { - codeset = strdup(codeset); - Py_DECREF(enc); - } else { - codeset = NULL; - PyErr_Clear(); + stdin and stdout if these are terminals, unless overridden. */ + + if (!overridden || !Py_FileSystemDefaultEncoding) { + saved_locale = strdup(setlocale(LC_CTYPE, NULL)); + setlocale(LC_CTYPE, ""); + loc_codeset = nl_langinfo(CODESET); + if (loc_codeset && *loc_codeset) { + PyObject *enc = PyCodec_Encoder(loc_codeset); + if (enc) { + loc_codeset = strdup(loc_codeset); + Py_DECREF(enc); + } else { + loc_codeset = NULL; + PyErr_Clear(); + } + } else + loc_codeset = NULL; + setlocale(LC_CTYPE, saved_locale); + free(saved_locale); + + if (!overridden) { + codeset = icodeset = loc_codeset; + free_codeset = 1; + } + + /* Initialize Py_FileSystemDefaultEncoding from + locale even if PYTHONIOENCODING is set. */ + if (!Py_FileSystemDefaultEncoding) { + Py_FileSystemDefaultEncoding = loc_codeset; + if (!overridden) + free_codeset = 0; } - } else - codeset = NULL; - setlocale(LC_CTYPE, saved_locale); - free(saved_locale); + } +#endif + +#ifdef MS_WINDOWS + if (!overridden) { + icodeset = ibuf; + encoding = buf; + sprintf(ibuf, "cp%d", GetConsoleCP()); + sprintf(buf, "cp%d", GetConsoleOutputCP()); + } +#endif if (codeset) { sys_stream = PySys_GetObject("stdin"); sys_isatty = PyObject_CallMethod(sys_stream, "isatty", ""); if (!sys_isatty) PyErr_Clear(); - if(sys_isatty && PyObject_IsTrue(sys_isatty) && + if ((overridden || + (sys_isatty && PyObject_IsTrue(sys_isatty))) && PyFile_Check(sys_stream)) { - if (!PyFile_SetEncoding(sys_stream, codeset)) + if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors)) Py_FatalError("Cannot set codeset of stdin"); } Py_XDECREF(sys_isatty); @@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs) sys_isatty = PyObject_CallMethod(sys_stream, "isatty", ""); if (!sys_isatty) PyErr_Clear(); - if(sys_isatty && PyObject_IsTrue(sys_isatty) && + if ((overridden || + (sys_isatty && PyObject_IsTrue(sys_isatty))) && PyFile_Check(sys_stream)) { - if (!PyFile_SetEncoding(sys_stream, codeset)) + if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors)) Py_FatalError("Cannot set codeset of stdout"); } Py_XDECREF(sys_isatty); @@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs) sys_isatty = PyObject_CallMethod(sys_stream, "isatty", ""); if (!sys_isatty) PyErr_Clear(); - if(sys_isatty && PyObject_IsTrue(sys_isatty) && + if((overridden || + (sys_isatty && PyObject_IsTrue(sys_isatty))) && PyFile_Check(sys_stream)) { - if (!PyFile_SetEncoding(sys_stream, codeset)) + if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors)) Py_FatalError("Cannot set codeset of stderr"); } Py_XDECREF(sys_isatty); - if (!Py_FileSystemDefaultEncoding) - Py_FileSystemDefaultEncoding = codeset; - else + if (free_codeset) free(codeset); } -#endif } void diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b248429..e4fcc50 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1232,9 +1232,6 @@ _PySys_Init(void) PyObject *m, *v, *sysdict; PyObject *sysin, *sysout, *syserr; char *s; -#ifdef MS_WINDOWS - char buf[128]; -#endif m = Py_InitModule3("sys", sys_methods, sys_doc); if (m == NULL) @@ -1272,23 +1269,6 @@ _PySys_Init(void) syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush); if (PyErr_Occurred()) return NULL; -#ifdef MS_WINDOWS - if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) { - sprintf(buf, "cp%d", GetConsoleCP()); - if (!PyFile_SetEncoding(sysin, buf)) - return NULL; - } - if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) { - sprintf(buf, "cp%d", GetConsoleOutputCP()); - if (!PyFile_SetEncoding(sysout, buf)) - return NULL; - } - if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) { - sprintf(buf, "cp%d", GetConsoleOutputCP()); - if (!PyFile_SetEncoding(syserr, buf)) - return NULL; - } -#endif PyDict_SetItemString(sysdict, "stdin", sysin); PyDict_SetItemString(sysdict, "stdout", sysout); |