summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-05-15 16:27:27 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-05-15 16:27:27 (GMT)
commitae6265f8d06dbec7d08c73ca23dad0f040d09b8e (patch)
tree3598426233e690b284bce322194f51be94ab6799
parent59e62db0a39eb89930ed3ae1730726cd15b7d640 (diff)
downloadcpython-ae6265f8d06dbec7d08c73ca23dad0f040d09b8e.zip
cpython-ae6265f8d06dbec7d08c73ca23dad0f040d09b8e.tar.gz
cpython-ae6265f8d06dbec7d08c73ca23dad0f040d09b8e.tar.bz2
Issue #8715: Create PyUnicode_EncodeFSDefault() function: Encode a Unicode
object to Py_FileSystemDefaultEncoding with the "surrogateescape" error handler, return a bytes object. If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
-rw-r--r--Doc/c-api/unicode.rst11
-rw-r--r--Include/unicodeobject.h10
-rw-r--r--Misc/NEWS5
-rw-r--r--Modules/_io/fileio.c3
-rw-r--r--Modules/_tkinter.c4
-rw-r--r--Modules/grpmodule.c3
-rw-r--r--Modules/pwdmodule.c4
-rw-r--r--Modules/spwdmodule.c4
-rw-r--r--Objects/unicodeobject.c16
-rw-r--r--Python/import.c12
10 files changed, 46 insertions, 26 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 6e163d6..4222a05 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -396,6 +396,7 @@ used, passsing :func:PyUnicode_FSConverter as the conversion function:
Use :func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length.
+
.. cfunction:: PyObject* PyUnicode_DecodeFSDefault(const char *s)
Decode a string using :cdata:`Py_FileSystemDefaultEncoding` and
@@ -404,6 +405,16 @@ used, passsing :func:PyUnicode_FSConverter as the conversion function:
If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+.. cfunction:: PyObject* PyUnicode_EncodeFSDefault(PyObject *unicode)
+
+ Encode a Unicode object to :cdata:`Py_FileSystemDefaultEncoding` with the
+ ``'surrogateescape'`` error handler, return a :func:`bytes` object.
+
+ If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+
+ .. versionadded:: 3.2
+
+
wchar_t Support
"""""""""""""""
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 383187b..ddc9000 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1268,6 +1268,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
Py_ssize_t size /* size */
);
+/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
+ "surrogateescape" error handler, return a bytes object.
+
+ If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
+ PyObject *unicode
+ );
+
/* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input
diff --git a/Misc/NEWS b/Misc/NEWS
index 31a063d..3da54ab 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,11 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins
-----------------
+- Issue #8715: Create PyUnicode_EncodeFSDefault() function: Encode a Unicode
+ object to Py_FileSystemDefaultEncoding with the "surrogateescape" error
+ handler, return a bytes object. If Py_FileSystemDefaultEncoding is not set,
+ fall back to UTF-8.
+
- Enable shortcuts for common encodings in PyUnicode_AsEncodedString() for any
error handler, not only the default error handler (strict)
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 6ecce1b..4f450da 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -247,8 +247,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
if (u == NULL)
return -1;
- stringobj = PyUnicode_AsEncodedString(
- u, Py_FileSystemDefaultEncoding, "surrogateescape");
+ stringobj = PyUnicode_EncodeFSDefault(u);
Py_DECREF(u);
if (stringobj == NULL)
return -1;
diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c
index 8552575..c7c1530 100644
--- a/Modules/_tkinter.c
+++ b/Modules/_tkinter.c
@@ -3147,9 +3147,7 @@ PyInit__tkinter(void)
it also helps Tcl find its encodings. */
uexe = PyUnicode_FromWideChar(Py_GetProgramName(), -1);
if (uexe) {
- cexe = PyUnicode_AsEncodedString(uexe,
- Py_FileSystemDefaultEncoding,
- NULL);
+ cexe = PyUnicode_EncodeFSDefault(uexe);
if (cexe)
Tcl_FindExecutable(PyBytes_AsString(cexe));
Py_XDECREF(cexe);
diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c
index d10a79d..d64c142 100644
--- a/Modules/grpmodule.c
+++ b/Modules/grpmodule.c
@@ -111,8 +111,7 @@ grp_getgrnam(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:getgrnam", &arg))
return NULL;
- if ((bytes = PyUnicode_AsEncodedString(arg, Py_FileSystemDefaultEncoding,
- "surrogateescape")) == NULL)
+ if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;
diff --git a/Modules/pwdmodule.c b/Modules/pwdmodule.c
index 35a387a..b303f95 100644
--- a/Modules/pwdmodule.c
+++ b/Modules/pwdmodule.c
@@ -132,9 +132,7 @@ pwd_getpwnam(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:getpwnam", &arg))
return NULL;
- if ((bytes = PyUnicode_AsEncodedString(arg,
- Py_FileSystemDefaultEncoding,
- "surrogateescape")) == NULL)
+ if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;
diff --git a/Modules/spwdmodule.c b/Modules/spwdmodule.c
index da452e9..96707b4 100644
--- a/Modules/spwdmodule.c
+++ b/Modules/spwdmodule.c
@@ -118,9 +118,7 @@ static PyObject* spwd_getspnam(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:getspnam", &arg))
return NULL;
- if ((bytes = PyUnicode_AsEncodedString(arg,
- Py_FileSystemDefaultEncoding,
- "surrogateescape")) == NULL)
+ if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 307027a..b97621b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1461,6 +1461,18 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
return NULL;
}
+PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
+{
+ if (Py_FileSystemDefaultEncoding)
+ return PyUnicode_AsEncodedString(unicode,
+ Py_FileSystemDefaultEncoding,
+ "surrogateescape");
+ else
+ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ "surrogateescape");
+}
+
PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
const char *encoding,
const char *errors)
@@ -1646,9 +1658,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
arg = PyUnicode_FromObject(arg);
if (!arg)
return 0;
- output = PyUnicode_AsEncodedObject(arg,
- Py_FileSystemDefaultEncoding,
- "surrogateescape");
+ output = PyUnicode_EncodeFSDefault(arg);
Py_DECREF(arg);
if (!output)
return 0;
diff --git a/Python/import.c b/Python/import.c
index 923888d..d23eb6a 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1633,8 +1633,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
if (!v)
return NULL;
if (PyUnicode_Check(v)) {
- v = PyUnicode_AsEncodedString(v,
- Py_FileSystemDefaultEncoding, NULL);
+ v = PyUnicode_EncodeFSDefault(v);
if (v == NULL)
return NULL;
}
@@ -2752,14 +2751,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen,
char *subname;
PyObject *submod;
char *p;
- if (!Py_FileSystemDefaultEncoding) {
- item8 = PyUnicode_EncodeASCII(PyUnicode_AsUnicode(item),
- PyUnicode_GetSize(item),
- NULL);
- } else {
- item8 = PyUnicode_AsEncodedString(item,
- Py_FileSystemDefaultEncoding, NULL);
- }
+ item8 = PyUnicode_EncodeFSDefault(item);
if (!item8) {
PyErr_SetString(PyExc_ValueError, "Cannot encode path item");
return 0;