summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/c-api/unicode.rst20
-rw-r--r--Include/unicodeobject.h12
-rw-r--r--Misc/NEWS4
-rw-r--r--Objects/unicodeobject.c44
4 files changed, 70 insertions, 10 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index fa460a8..913fd69 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -380,13 +380,25 @@ used, passsing :func:`PyUnicode_FSConverter` as the conversion function:
.. cfunction:: int PyUnicode_FSConverter(PyObject* obj, void* result)
- Convert *obj* into *result*, using :cdata:`Py_FileSystemDefaultEncoding`,
- and the ``"surrogateescape"`` error handler. *result* must be a
- ``PyObject*``, return a :func:`bytes` object which must be released if it
- is no longer used.
+ ParseTuple converter: encode :class:`str` objects to :class:`bytes` using
+ :cfunc:`PyUnicode_EncodeFSDefault`; :class:`bytes` objects are output as-is.
+ *result* must be a :ctype:`PyBytesObject*` which must be released when it is
+ no longer used.
.. versionadded:: 3.1
+To decode file names during argument parsing, the ``"O&"`` converter should be
+used, passsing :func:`PyUnicode_FSDecoder` as the conversion function:
+
+.. cfunction:: int PyUnicode_FSDecoder(PyObject* obj, void* result)
+
+ ParseTuple converter: decode :class:`bytes` objects to :class:`str` using
+ :cfunc:`PyUnicode_DecodeFSDefaultAndSize`; :class:`str` objects are output
+ as-is. *result* must be a :ctype:`PyUnicodeObject*` which must be released
+ when it is no longer used.
+
+ .. versionadded:: 3.2
+
.. cfunction:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
Decode a null-terminated string using :cdata:`Py_FileSystemDefaultEncoding`
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 7f5e8fd..cee75cc 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -200,6 +200,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
+# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -300,6 +301,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
+# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -1239,12 +1241,16 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
/* --- File system encoding ---------------------------------------------- */
-/* ParseTuple converter which converts a Unicode object into the file
- system encoding as a bytes object, using the "surrogateescape" error
- handler; bytes objects are output as-is. */
+/* ParseTuple converter: encode str objects to bytes using
+ PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
+/* ParseTuple converter: decode bytes objects to unicode using
+ PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
+
+PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
+
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
diff --git a/Misc/NEWS b/Misc/NEWS
index 28e0314..3388aec 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 2?
Core and Builtins
-----------------
+- Issue #9542: Create PyUnicode_FSDecoder() function, a ParseTuple converter:
+ decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize();
+ str objects are output as-is.
+
- Issue #9203: Computed gotos are now enabled by default on supported
compilers (which are detected by the configure script). They can still
be disable selectively by specifying --without-computed-gotos.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7c9b882..676c693 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1652,9 +1652,6 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
}
}
-/* Convert the argument to a bytes object, according to the file
- system encoding. The addr param must be a PyObject**.
- This is designed to be used with "O&" in PyArg_Parse APIs. */
int
PyUnicode_FSConverter(PyObject* arg, void* addr)
@@ -1696,6 +1693,47 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
}
+int
+PyUnicode_FSDecoder(PyObject* arg, void* addr)
+{
+ PyObject *output = NULL;
+ Py_ssize_t size;
+ void *data;
+ if (arg == NULL) {
+ Py_DECREF(*(PyObject**)addr);
+ return 1;
+ }
+ if (PyUnicode_Check(arg)) {
+ output = arg;
+ Py_INCREF(output);
+ }
+ else {
+ arg = PyBytes_FromObject(arg);
+ if (!arg)
+ return 0;
+ output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg),
+ PyBytes_GET_SIZE(arg));
+ Py_DECREF(arg);
+ if (!output)
+ return 0;
+ if (!PyUnicode_Check(output)) {
+ Py_DECREF(output);
+ PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode");
+ return 0;
+ }
+ }
+ size = PyUnicode_GET_SIZE(output);
+ data = PyUnicode_AS_UNICODE(output);
+ if (size != Py_UNICODE_strlen(data)) {
+ PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+ Py_DECREF(output);
+ return 0;
+ }
+ *(PyObject**)addr = output;
+ return Py_CLEANUP_SUPPORTED;
+}
+
+
char*
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{