summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2014-08-01 10:28:48 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2014-08-01 10:28:48 (GMT)
commitf6a271ae980d2f3fb450f745b8f87624378156c4 (patch)
treeae0c09042455826ae38875945dadd4919ca8f235
parentc6f8c0a1de448e7ca62ece1d21f089194d31f0d9 (diff)
downloadcpython-f6a271ae980d2f3fb450f745b8f87624378156c4.zip
cpython-f6a271ae980d2f3fb450f745b8f87624378156c4.tar.gz
cpython-f6a271ae980d2f3fb450f745b8f87624378156c4.tar.bz2
Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`, rename
``_Py_wchar2char()`` to :c:func:`Py_EncodeLocale`, and document these functions.
-rw-r--r--Doc/c-api/sys.rst54
-rw-r--r--Doc/c-api/unicode.rst35
-rw-r--r--Doc/library/codecs.rst1
-rw-r--r--Doc/library/os.rst7
-rw-r--r--Include/fileutils.h4
-rw-r--r--Misc/NEWS4
-rw-r--r--Misc/coverity_model.c2
-rw-r--r--Modules/getpath.c16
-rw-r--r--Modules/main.c4
-rw-r--r--Objects/unicodeobject.c8
-rw-r--r--Programs/python.c2
-rw-r--r--Python/fileutils.c67
-rw-r--r--Python/frozenmain.c2
13 files changed, 138 insertions, 68 deletions
diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst
index 9760dca..a6a939c 100644
--- a/Doc/c-api/sys.rst
+++ b/Doc/c-api/sys.rst
@@ -47,6 +47,60 @@ Operating System Utilities
not call those functions directly! :c:type:`PyOS_sighandler_t` is a typedef
alias for :c:type:`void (\*)(int)`.
+.. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size)
+
+ Decode a byte string from the locale encoding with the :ref:`surrogateescape
+ error handler <surrogateescape>`: undecodable bytes are decoded as
+ characters in range U+DC80..U+DCFF. If a byte sequence can be decoded as a
+ surrogate character, escape the bytes using the surrogateescape error
+ handler instead of decoding them.
+
+ Return a pointer to a newly allocated wide character string, use
+ :c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
+ the number of wide characters excluding the null character into ``*size``
+
+ Return ``NULL`` on decoding error or memory allocation error. If *size* is
+ not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to
+ ``(size_t)-2`` on decoding error.
+
+ Decoding errors should never happen, unless there is a bug in the C
+ library.
+
+ Use the :c:func:`Py_EncodeLocale` function to encode the character string
+ back to a byte string.
+
+ .. seealso::
+
+ The :c:func:`PyUnicode_DecodeFSDefaultAndSize` and
+ :c:func:`PyUnicode_DecodeLocaleAndSize` functions.
+
+ .. versionadded:: 3.5
+
+
+.. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
+
+ Encode a wide character string to the locale encoding with the
+ :ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
+ in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
+
+ Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
+ to free the memory. Return ``NULL`` on encoding error or memory allocation
+ error
+
+ If error_pos is not ``NULL``, ``*error_pos`` is set to the index of the
+ invalid character on encoding error, or set to ``(size_t)-1`` otherwise.
+
+ Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
+ to a wide character string.
+
+ .. seealso::
+
+ The :c:func:`PyUnicode_EncodeFSDefault` and
+ :c:func:`PyUnicode_EncodeLocale` functions.
+
+ .. versionadded:: 3.5
+
+
.. _systemfunctions:
System Functions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 4352351..2d1bae1 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -758,11 +758,13 @@ system.
*errors* is ``NULL``. *str* must end with a null character but
cannot contain embedded null characters.
+ Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` to decode a string from
+ :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
+ Python startup).
+
.. seealso::
- Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` to decode a string from
- :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
- Python startup).
+ The :c:func:`Py_DecodeLocale` function.
.. versionadded:: 3.3
@@ -783,11 +785,13 @@ system.
*errors* is ``NULL``. Return a :class:`bytes` object. *str* cannot
contain embedded null characters.
+ Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
+ :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
+ Python startup).
+
.. seealso::
- Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
- :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
- Python startup).
+ The :c:func:`Py_EncodeLocale` function.
.. versionadded:: 3.3
@@ -832,12 +836,14 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
+ :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
+ locale encoding and cannot be modified later. If you need to decode a string
+ from the current locale encoding, use
+ :c:func:`PyUnicode_DecodeLocaleAndSize`.
+
.. seealso::
- :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
- locale encoding and cannot be modified later. If you need to decode a
- string from the current locale encoding, use
- :c:func:`PyUnicode_DecodeLocaleAndSize`.
+ The :c:func:`Py_DecodeLocale` function.
.. versionchanged:: 3.2
Use ``"strict"`` error handler on Windows.
@@ -867,12 +873,13 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
+ :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
+ locale encoding and cannot be modified later. If you need to encode a string
+ to the current locale encoding, use :c:func:`PyUnicode_EncodeLocale`.
+
.. seealso::
- :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
- locale encoding and cannot be modified later. If you need to encode a
- string to the current locale encoding, use
- :c:func:`PyUnicode_EncodeLocale`.
+ The :c:func:`Py_EncodeLocale` function.
.. versionadded:: 3.2
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 36144e9..4c2a023 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -318,6 +318,7 @@ and writing to platform dependent files:
encodings.
+.. _surrogateescape:
.. _codec-base-classes:
Codec Base Classes
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 9cfc472..bf3a8d5 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -78,9 +78,10 @@ uses the file system encoding to perform this conversion (see
.. versionchanged:: 3.1
On some systems, conversion using the file system encoding may fail. In this
- case, Python uses the ``surrogateescape`` encoding error handler, which means
- that undecodable bytes are replaced by a Unicode character U+DCxx on
- decoding, and these are again translated to the original byte on encoding.
+ case, Python uses the :ref:`surrogateescape encoding error handler
+ <surrogateescape>`, which means that undecodable bytes are replaced by a
+ Unicode character U+DCxx on decoding, and these are again translated to the
+ original byte on encoding.
The file system encoding must guarantee to successfully decode all bytes
diff --git a/Include/fileutils.h b/Include/fileutils.h
index f2a43f7..c5eebc5 100644
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -7,11 +7,11 @@ extern "C" {
PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
-PyAPI_FUNC(wchar_t *) _Py_char2wchar(
+PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
const char *arg,
size_t *size);
-PyAPI_FUNC(char*) _Py_wchar2char(
+PyAPI_FUNC(char*) Py_EncodeLocale(
const wchar_t *text,
size_t *error_pos);
diff --git a/Misc/NEWS b/Misc/NEWS
index e8e9ba5..f771885 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ Release date: TBA
Core and Builtins
-----------------
+- Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`,
+ rename ``_Py_wchar2char()`` to :c:func:`Py_EncodeLocale`, and document
+ these functions.
+
- Issue #20179: Apply Argument Clinic to bytes and bytearray.
Patch by Tal Einat.
diff --git a/Misc/coverity_model.c b/Misc/coverity_model.c
index 57f3aeb..421d54d 100644
--- a/Misc/coverity_model.c
+++ b/Misc/coverity_model.c
@@ -85,7 +85,7 @@ PyObject *PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename)
}
/* Python/fileutils.c */
-wchar_t *_Py_char2wchar(const char* arg, size_t *size)
+wchar_t *Py_DecodeLocale(const char* arg, size_t *size)
{
wchar_t *w;
__coverity_tainted_data_sink__(arg);
diff --git a/Modules/getpath.c b/Modules/getpath.c
index f26b8e9..de803f8 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -336,7 +336,7 @@ search_for_prefix(wchar_t *argv0_path, wchar_t *home, wchar_t *_prefix,
joinpath(prefix, L"Modules/Setup");
if (isfile(prefix)) {
/* Check VPATH to see if argv0_path is in the build directory. */
- vpath = _Py_char2wchar(VPATH, NULL);
+ vpath = Py_DecodeLocale(VPATH, NULL);
if (vpath != NULL) {
wcsncpy(prefix, argv0_path, MAXPATHLEN);
prefix[MAXPATHLEN] = L'\0';
@@ -491,10 +491,10 @@ calculate_path(void)
wchar_t *_pythonpath, *_prefix, *_exec_prefix;
wchar_t *lib_python;
- _pythonpath = _Py_char2wchar(PYTHONPATH, NULL);
- _prefix = _Py_char2wchar(PREFIX, NULL);
- _exec_prefix = _Py_char2wchar(EXEC_PREFIX, NULL);
- lib_python = _Py_char2wchar("lib/python" VERSION, NULL);
+ _pythonpath = Py_DecodeLocale(PYTHONPATH, NULL);
+ _prefix = Py_DecodeLocale(PREFIX, NULL);
+ _exec_prefix = Py_DecodeLocale(EXEC_PREFIX, NULL);
+ lib_python = Py_DecodeLocale("lib/python" VERSION, NULL);
if (!_pythonpath || !_prefix || !_exec_prefix || !lib_python) {
Py_FatalError(
@@ -503,7 +503,7 @@ calculate_path(void)
}
if (_path) {
- path_buffer = _Py_char2wchar(_path, NULL);
+ path_buffer = Py_DecodeLocale(_path, NULL);
path = path_buffer;
}
@@ -584,7 +584,7 @@ calculate_path(void)
** be running the interpreter in the build directory, so we use the
** build-directory-specific logic to find Lib and such.
*/
- wchar_t* wbuf = _Py_char2wchar(modPath, NULL);
+ wchar_t* wbuf = Py_DecodeLocale(modPath, NULL);
if (wbuf == NULL) {
Py_FatalError("Cannot decode framework location");
}
@@ -709,7 +709,7 @@ calculate_path(void)
if (_rtpypath && _rtpypath[0] != '\0') {
size_t rtpypath_len;
- rtpypath = _Py_char2wchar(_rtpypath, &rtpypath_len);
+ rtpypath = Py_DecodeLocale(_rtpypath, &rtpypath_len);
if (rtpypath != NULL)
bufsz += rtpypath_len + 1;
}
diff --git a/Modules/main.c b/Modules/main.c
index 1c25326..8a9f5a2 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -647,7 +647,7 @@ Py_Main(int argc, wchar_t **argv)
/* Used by Mac/Tools/pythonw.c to forward
* the argv0 of the stub executable
*/
- wchar_t* wbuf = _Py_char2wchar(pyvenv_launcher, NULL);
+ wchar_t* wbuf = Py_DecodeLocale(pyvenv_launcher, NULL);
if (wbuf == NULL) {
Py_FatalError("Cannot decode __PYVENV_LAUNCHER__");
@@ -730,7 +730,7 @@ Py_Main(int argc, wchar_t **argv)
char *cfilename_buffer;
const char *cfilename;
int err = errno;
- cfilename_buffer = _Py_wchar2char(filename, NULL);
+ cfilename_buffer = Py_EncodeLocale(filename, NULL);
if (cfilename_buffer != NULL)
cfilename = cfilename_buffer;
else
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 72272c7..263ca85 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3255,7 +3255,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
/* "surrogateescape" error handler */
char *str;
- str = _Py_wchar2char(wstr, &error_pos);
+ str = Py_EncodeLocale(wstr, &error_pos);
if (str == NULL) {
if (error_pos == (size_t)-1) {
PyErr_NoMemory();
@@ -3308,7 +3308,7 @@ encode_error:
if (errmsg != NULL) {
size_t errlen;
- wstr = _Py_char2wchar(errmsg, &errlen);
+ wstr = Py_DecodeLocale(errmsg, &errlen);
if (wstr != NULL) {
reason = PyUnicode_FromWideChar(wstr, errlen);
PyMem_RawFree(wstr);
@@ -3526,7 +3526,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
if (surrogateescape) {
/* "surrogateescape" error handler */
- wstr = _Py_char2wchar(str, &wlen);
+ wstr = Py_DecodeLocale(str, &wlen);
if (wstr == NULL) {
if (wlen == (size_t)-1)
PyErr_NoMemory();
@@ -3581,7 +3581,7 @@ decode_error:
error_pos = mbstowcs_errorpos(str, len);
if (errmsg != NULL) {
size_t errlen;
- wstr = _Py_char2wchar(errmsg, &errlen);
+ wstr = Py_DecodeLocale(errmsg, &errlen);
if (wstr != NULL) {
reason = PyUnicode_FromWideChar(wstr, errlen);
PyMem_RawFree(wstr);
diff --git a/Programs/python.c b/Programs/python.c
index 9811c01..2e5e4e3 100644
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -52,7 +52,7 @@ main(int argc, char **argv)
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
- argv_copy[i] = _Py_char2wchar(argv[i], NULL);
+ argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
if (!argv_copy[i]) {
PyMem_RawFree(oldloc);
fprintf(stderr, "Fatal Python error: "
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 065d3fd..227e92a 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -82,11 +82,11 @@ extern int _Py_normalize_encoding(const char *, char *, size_t);
Values of force_ascii:
- 1: the workaround is used: _Py_wchar2char() uses
- encode_ascii_surrogateescape() and _Py_char2wchar() uses
+ 1: the workaround is used: Py_EncodeLocale() uses
+ encode_ascii_surrogateescape() and Py_DecodeLocale() uses
decode_ascii_surrogateescape()
- 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
- _Py_char2wchar() uses mbstowcs()
+ 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
+ Py_DecodeLocale() uses mbstowcs()
-1: unknown, need to call check_force_ascii() to get the value
*/
static int force_ascii = -1;
@@ -241,24 +241,26 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
/* Decode a byte string from the locale encoding with the
- surrogateescape error handler (undecodable bytes are decoded as characters
- in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
+ surrogateescape error handler: undecodable bytes are decoded as characters
+ in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
character, escape the bytes using the surrogateescape error handler instead
of decoding them.
- Use _Py_wchar2char() to encode the character string back to a byte string.
+ Return a pointer to a newly allocated wide character string, use
+ PyMem_RawFree() to free the memory. If size is not NULL, write the number of
+ wide characters excluding the null character into *size
- Return a pointer to a newly allocated wide character string (use
- PyMem_RawFree() to free the memory) and write the number of written wide
- characters excluding the null character into *size if size is not NULL, or
- NULL on error (decoding or memory allocation error). If size is not NULL,
- *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
- error.
+ Return NULL on decoding error or memory allocation error. If *size* is not
+ NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
+ decoding error.
- Conversion errors should never happen, unless there is a bug in the C
- library. */
+ Decoding errors should never happen, unless there is a bug in the C
+ library.
+
+ Use the Py_EncodeLocale() function to encode the character string back to a
+ byte string. */
wchar_t*
-_Py_char2wchar(const char* arg, size_t *size)
+Py_DecodeLocale(const char* arg, size_t *size)
{
#ifdef __APPLE__
wchar_t *wstr;
@@ -389,19 +391,20 @@ oom:
#endif /* __APPLE__ */
}
-/* Encode a (wide) character string to the locale encoding with the
- surrogateescape error handler (characters in range U+DC80..U+DCFF are
- converted to bytes 0x80..0xFF).
+/* Encode a wide character string to the locale encoding with the
+ surrogateescape error handler: surrogate characters in the range
+ U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
- This function is the reverse of _Py_char2wchar().
+ Return a pointer to a newly allocated byte string, use PyMem_Free() to free
+ the memory. Return NULL on encoding or memory allocation error.
- Return a pointer to a newly allocated byte string (use PyMem_Free() to free
- the memory), or NULL on encoding or memory allocation error.
+ If error_pos is not NULL, *error_pos is set to the index of the invalid
+ character on encoding error, or set to (size_t)-1 otherwise.
- If error_pos is not NULL: *error_pos is the index of the invalid character
- on encoding error, or (size_t)-1 otherwise. */
+ Use the Py_DecodeLocale() function to decode the bytes string back to a wide
+ character string. */
char*
-_Py_wchar2char(const wchar_t *text, size_t *error_pos)
+Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{
#ifdef __APPLE__
Py_ssize_t len;
@@ -520,7 +523,7 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
{
int err;
char *fname;
- fname = _Py_wchar2char(path, NULL);
+ fname = Py_EncodeLocale(path, NULL);
if (fname == NULL) {
errno = EINVAL;
return -1;
@@ -784,7 +787,7 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
errno = EINVAL;
return NULL;
}
- cpath = _Py_wchar2char(path, NULL);
+ cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL)
return NULL;
f = fopen(cpath, cmode);
@@ -875,7 +878,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
int res;
size_t r1;
- cpath = _Py_wchar2char(path, NULL);
+ cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL) {
errno = EINVAL;
return -1;
@@ -889,7 +892,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
return -1;
}
cbuf[res] = '\0'; /* buf will be null terminated */
- wbuf = _Py_char2wchar(cbuf, &r1);
+ wbuf = Py_DecodeLocale(cbuf, &r1);
if (wbuf == NULL) {
errno = EINVAL;
return -1;
@@ -920,7 +923,7 @@ _Py_wrealpath(const wchar_t *path,
wchar_t *wresolved_path;
char *res;
size_t r;
- cpath = _Py_wchar2char(path, NULL);
+ cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL) {
errno = EINVAL;
return NULL;
@@ -930,7 +933,7 @@ _Py_wrealpath(const wchar_t *path,
if (res == NULL)
return NULL;
- wresolved_path = _Py_char2wchar(cresolved_path, &r);
+ wresolved_path = Py_DecodeLocale(cresolved_path, &r);
if (wresolved_path == NULL) {
errno = EINVAL;
return NULL;
@@ -963,7 +966,7 @@ _Py_wgetcwd(wchar_t *buf, size_t size)
if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
return NULL;
- wname = _Py_char2wchar(fname, &len);
+ wname = Py_DecodeLocale(fname, &len);
if (wname == NULL)
return NULL;
if (size <= len) {
diff --git a/Python/frozenmain.c b/Python/frozenmain.c
index 55d05fc..cb84ed5 100644
--- a/Python/frozenmain.c
+++ b/Python/frozenmain.c
@@ -52,7 +52,7 @@ Py_FrozenMain(int argc, char **argv)
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
- argv_copy[i] = _Py_char2wchar(argv[i], NULL);
+ argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
argv_copy2[i] = argv_copy[i];
if (!argv_copy[i]) {
fprintf(stderr, "Unable to decode the command line argument #%i\n",