diff options
author | Jordon Xu <46997731+qigangxu@users.noreply.github.com> | 2019-08-21 13:26:20 (GMT) |
---|---|---|
committer | Victor Stinner <vstinner@redhat.com> | 2019-08-21 13:26:20 (GMT) |
commit | 20f59fe1f7748ae899aceee4cb560e5e1f528a1f (patch) | |
tree | 388519ee0b0caee244104278660644dbf1598b21 | |
parent | 87bc3b7a0b0c41ea5da2aa137c4145f437a1c9fd (diff) | |
download | cpython-20f59fe1f7748ae899aceee4cb560e5e1f528a1f.zip cpython-20f59fe1f7748ae899aceee4cb560e5e1f528a1f.tar.gz cpython-20f59fe1f7748ae899aceee4cb560e5e1f528a1f.tar.bz2 |
bpo-37751: Fix codecs.lookup() normalization (GH-15092)
Fix codecs.lookup() to normalize the encoding name the same way
than encodings.normalize_encoding(), except that codecs.lookup()
also converts the name to lower case.
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst | 1 | ||||
-rw-r--r-- | Python/codecs.c | 32 |
2 files changed, 17 insertions, 16 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst new file mode 100644 index 0000000..4da59ff --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst @@ -0,0 +1 @@ +Fix :func:`codecs.lookup` to normalize the encoding name the same way than :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case. diff --git a/Python/codecs.c b/Python/codecs.c index 4f38b33..08e9b91 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -49,15 +49,16 @@ int PyCodec_Register(PyObject *search_function) return -1; } -/* Convert a string to a normalized Python string: all characters are - converted to lower case, spaces are replaced with underscores. */ +extern int _Py_normalize_encoding(const char *, char *, size_t); + +/* Convert a string to a normalized Python string(decoded from UTF-8): all characters are + converted to lower case, spaces and hyphens are replaced with underscores. */ static PyObject *normalizestring(const char *string) { - size_t i; size_t len = strlen(string); - char *p; + char *encoding; PyObject *v; if (len > PY_SSIZE_T_MAX) { @@ -65,20 +66,19 @@ PyObject *normalizestring(const char *string) return NULL; } - p = PyMem_Malloc(len + 1); - if (p == NULL) + encoding = PyMem_Malloc(len + 1); + if (encoding == NULL) return PyErr_NoMemory(); - for (i = 0; i < len; i++) { - char ch = string[i]; - if (ch == ' ') - ch = '-'; - else - ch = Py_TOLOWER(Py_CHARMASK(ch)); - p[i] = ch; + + if (!_Py_normalize_encoding(string, encoding, len + 1)) + { + PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed"); + PyMem_Free(encoding); + return NULL; } - p[i] = '\0'; - v = PyUnicode_FromString(p); - PyMem_Free(p); + + v = PyUnicode_FromString(encoding); + PyMem_Free(encoding); return v; } |