summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2008-06-01 07:20:46 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2008-06-01 07:20:46 (GMT)
commit99815892f685e9ab20dfdade9c1e8a295139140c (patch)
tree92d2aeaee0a925df71471b296c5fa7ff28a451b8
parent7f7ca35f5bf22b698135de62d2179a13f5c94c7f (diff)
downloadcpython-99815892f685e9ab20dfdade9c1e8a295139140c.zip
cpython-99815892f685e9ab20dfdade9c1e8a295139140c.tar.gz
cpython-99815892f685e9ab20dfdade9c1e8a295139140c.tar.bz2
New environment variable PYTHONIOENCODING.
-rw-r--r--Doc/c-api/file.rst8
-rw-r--r--Doc/library/stdtypes.rst7
-rw-r--r--Doc/using/cmdline.rst7
-rw-r--r--Include/fileobject.h2
-rw-r--r--Lib/test/test_sys.py20
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/main.c1
-rw-r--r--Objects/fileobject.c35
-rw-r--r--Python/pythonrun.c103
-rw-r--r--Python/sysmodule.c20
10 files changed, 153 insertions, 52 deletions
diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst
index e107268..aa753e5 100644
--- a/Doc/c-api/file.rst
+++ b/Doc/c-api/file.rst
@@ -130,6 +130,14 @@ change in future releases of Python.
.. versionadded:: 2.3
+.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
+
+ Set the file's encoding for Unicode output to *enc*, and its error
+ mode to *err*. Return 1 on success and 0 on failure.
+
+ .. versionadded:: 2.6
+
+
.. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
.. index:: single: softspace (file attribute)
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index c6679fd..c96ef10 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -2165,6 +2165,13 @@ the particular object.
.. versionadded:: 2.3
+.. attribute:: file.errors
+
+ The Unicode error handler used to along with the encoding.
+
+ .. versionadded:: 2.6
+
+
.. attribute:: file.mode
The I/O mode for the file. If the file was created using the :func:`open`
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index a6ab7ad..7d9ecad 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -481,6 +481,13 @@ These environment variables influence Python's behavior.
.. versionadded:: 2.6
+.. envvar:: PYTHONIOENCODING
+
+ Overrides the encoding used for stdin/stdout/stderr, in the syntax
+ encodingname:errorhandler, with the :errors part being optional.
+
+ .. versionadded:: 2.6
+
.. envvar:: PYTHONNOUSERSITE
diff --git a/Include/fileobject.h b/Include/fileobject.h
index 56fae81..56cf40a 100644
--- a/Include/fileobject.h
+++ b/Include/fileobject.h
@@ -24,6 +24,7 @@ typedef struct {
int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */
PyObject *f_encoding;
+ PyObject *f_errors;
PyObject *weakreflist; /* List of weak references */
int unlocked_count; /* Num. currently running sections of code
using f_fp with the GIL released. */
@@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
+PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
int (*)(FILE *));
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 975795a..a4d8a72 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -385,6 +385,26 @@ class SysModuleTest(unittest.TestCase):
## self.assert_(r[0][2] > 100, r[0][2])
## self.assert_(r[1][2] > 100, r[1][2])
+ def test_ioencoding(self):
+ import subprocess,os
+ env = dict(os.environ)
+
+ # Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
+ # not representable in ASCII.
+
+ env["PYTHONIOENCODING"] = "cp424"
+ p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+ stdout = subprocess.PIPE, env=env)
+ out = p.stdout.read().strip()
+ self.assertEqual(out, unichr(0xa2).encode("cp424"))
+
+ env["PYTHONIOENCODING"] = "ascii:replace"
+ p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+ stdout = subprocess.PIPE, env=env)
+ out = p.stdout.read().strip()
+ self.assertEqual(out, '?')
+
+
def test_main():
test.test_support.run_unittest(SysModuleTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index 21465f2..790e980 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1?
Core and Builtins
-----------------
+- New environment variable PYTHONIOENCODING.
+
- Patch #2488: Add sys.maxsize.
- Issue #2353: file.xreadlines() now emits a Py3k warning.
diff --git a/Modules/main.c b/Modules/main.c
index dc94a09..c1c4b45 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -99,6 +99,7 @@ static char *usage_5 = "\
PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
+PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
";
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 86f3a14..4ea97f5 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
Py_DECREF(f->f_name);
Py_DECREF(f->f_mode);
Py_DECREF(f->f_encoding);
+ Py_DECREF(f->f_errors);
Py_INCREF(name);
f->f_name = name;
@@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
f->f_skipnextlf = 0;
Py_INCREF(Py_None);
f->f_encoding = Py_None;
+ Py_INCREF(Py_None);
+ f->f_errors = Py_None;
if (f->f_mode == NULL)
return NULL;
@@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
}
/* Set the encoding used to output Unicode strings.
- Returh 1 on success, 0 on failure. */
+ Return 1 on success, 0 on failure. */
int
PyFile_SetEncoding(PyObject *f, const char *enc)
{
+ return PyFile_SetEncodingAndErrors(f, enc, NULL);
+}
+
+int
+PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
+{
PyFileObject *file = (PyFileObject*)f;
- PyObject *str = PyBytes_FromString(enc);
+ PyObject *str, *oerrors;
assert(PyFile_Check(f));
+ str = PyBytes_FromString(enc);
if (!str)
return 0;
+ if (errors) {
+ oerrors = PyString_FromString(errors);
+ if (!oerrors) {
+ Py_DECREF(str);
+ return 0;
+ }
+ } else {
+ oerrors = Py_None;
+ Py_INCREF(Py_None);
+ }
Py_DECREF(file->f_encoding);
file->f_encoding = str;
+ Py_DECREF(file->f_errors);
+ file->f_errors = oerrors;
return 1;
}
@@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f)
Py_XDECREF(f->f_name);
Py_XDECREF(f->f_mode);
Py_XDECREF(f->f_encoding);
+ Py_XDECREF(f->f_errors);
drop_readahead(f);
Py_TYPE(f)->tp_free((PyObject *)f);
}
@@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = {
"file name"},
{"encoding", T_OBJECT, OFF(f_encoding), RO,
"file encoding"},
+ {"errors", T_OBJECT, OFF(f_errors), RO,
+ "Unicode error handler"},
/* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */
};
@@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
((PyFileObject *)self)->f_mode = not_yet_string;
Py_INCREF(Py_None);
((PyFileObject *)self)->f_encoding = Py_None;
+ Py_INCREF(Py_None);
+ ((PyFileObject *)self)->f_errors = Py_None;
((PyFileObject *)self)->weakreflist = NULL;
((PyFileObject *)self)->unlocked_count = 0;
}
@@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
if ((flags & Py_PRINT_RAW) &&
PyUnicode_Check(v) && enc != Py_None) {
char *cenc = PyBytes_AS_STRING(enc);
- value = PyUnicode_AsEncodedString(v, cenc, "strict");
+ char *errors = fobj->f_errors == Py_None ?
+ "strict" : PyBytes_AS_STRING(fobj->f_errors);
+ value = PyUnicode_AsEncodedString(v, cenc, errors);
if (value == NULL)
return -1;
} else {
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 6a9cb25..c30bf65 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -132,11 +132,20 @@ Py_InitializeEx(int install_sigs)
PyThreadState *tstate;
PyObject *bimod, *sysmod;
char *p;
+ char *icodeset; /* On Windows, input codeset may theoretically
+ differ from output codeset. */
+ char *codeset = NULL;
+ char *errors = NULL;
+ int free_codeset = 0;
+ int overridden = 0;
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
- char *codeset;
- char *saved_locale;
+ char *saved_locale, *loc_codeset;
PyObject *sys_stream, *sys_isatty;
#endif
+#ifdef MS_WINDOWS
+ char ibuf[128];
+ char buf[128];
+#endif
extern void _Py_ReadyTypes(void);
if (initialized)
@@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs)
_PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */
+ if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
+ p = icodeset = codeset = strdup(p);
+ free_codeset = 1;
+ errors = strchr(p, ':');
+ if (errors) {
+ *errors = '\0';
+ errors++;
+ }
+ overridden = 1;
+ }
+
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
- stdin and stdout if these are terminals. */
-
- saved_locale = strdup(setlocale(LC_CTYPE, NULL));
- setlocale(LC_CTYPE, "");
- codeset = nl_langinfo(CODESET);
- if (codeset && *codeset) {
- PyObject *enc = PyCodec_Encoder(codeset);
- if (enc) {
- codeset = strdup(codeset);
- Py_DECREF(enc);
- } else {
- codeset = NULL;
- PyErr_Clear();
+ stdin and stdout if these are terminals, unless overridden. */
+
+ if (!overridden || !Py_FileSystemDefaultEncoding) {
+ saved_locale = strdup(setlocale(LC_CTYPE, NULL));
+ setlocale(LC_CTYPE, "");
+ loc_codeset = nl_langinfo(CODESET);
+ if (loc_codeset && *loc_codeset) {
+ PyObject *enc = PyCodec_Encoder(loc_codeset);
+ if (enc) {
+ loc_codeset = strdup(loc_codeset);
+ Py_DECREF(enc);
+ } else {
+ loc_codeset = NULL;
+ PyErr_Clear();
+ }
+ } else
+ loc_codeset = NULL;
+ setlocale(LC_CTYPE, saved_locale);
+ free(saved_locale);
+
+ if (!overridden) {
+ codeset = icodeset = loc_codeset;
+ free_codeset = 1;
+ }
+
+ /* Initialize Py_FileSystemDefaultEncoding from
+ locale even if PYTHONIOENCODING is set. */
+ if (!Py_FileSystemDefaultEncoding) {
+ Py_FileSystemDefaultEncoding = loc_codeset;
+ if (!overridden)
+ free_codeset = 0;
}
- } else
- codeset = NULL;
- setlocale(LC_CTYPE, saved_locale);
- free(saved_locale);
+ }
+#endif
+
+#ifdef MS_WINDOWS
+ if (!overridden) {
+ icodeset = ibuf;
+ encoding = buf;
+ sprintf(ibuf, "cp%d", GetConsoleCP());
+ sprintf(buf, "cp%d", GetConsoleOutputCP());
+ }
+#endif
if (codeset) {
sys_stream = PySys_GetObject("stdin");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
- if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+ if ((overridden ||
+ (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
- if (!PyFile_SetEncoding(sys_stream, codeset))
+ if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
Py_FatalError("Cannot set codeset of stdin");
}
Py_XDECREF(sys_isatty);
@@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs)
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
- if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+ if ((overridden ||
+ (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
- if (!PyFile_SetEncoding(sys_stream, codeset))
+ if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stdout");
}
Py_XDECREF(sys_isatty);
@@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs)
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
- if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+ if((overridden ||
+ (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
- if (!PyFile_SetEncoding(sys_stream, codeset))
+ if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stderr");
}
Py_XDECREF(sys_isatty);
- if (!Py_FileSystemDefaultEncoding)
- Py_FileSystemDefaultEncoding = codeset;
- else
+ if (free_codeset)
free(codeset);
}
-#endif
}
void
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index b248429..e4fcc50 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1232,9 +1232,6 @@ _PySys_Init(void)
PyObject *m, *v, *sysdict;
PyObject *sysin, *sysout, *syserr;
char *s;
-#ifdef MS_WINDOWS
- char buf[128];
-#endif
m = Py_InitModule3("sys", sys_methods, sys_doc);
if (m == NULL)
@@ -1272,23 +1269,6 @@ _PySys_Init(void)
syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
if (PyErr_Occurred())
return NULL;
-#ifdef MS_WINDOWS
- if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
- sprintf(buf, "cp%d", GetConsoleCP());
- if (!PyFile_SetEncoding(sysin, buf))
- return NULL;
- }
- if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
- sprintf(buf, "cp%d", GetConsoleOutputCP());
- if (!PyFile_SetEncoding(sysout, buf))
- return NULL;
- }
- if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
- sprintf(buf, "cp%d", GetConsoleOutputCP());
- if (!PyFile_SetEncoding(syserr, buf))
- return NULL;
- }
-#endif
PyDict_SetItemString(sysdict, "stdin", sysin);
PyDict_SetItemString(sysdict, "stdout", sysout);