summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJordon Xu <46997731+qigangxu@users.noreply.github.com>2019-08-21 13:26:20 (GMT)
committerVictor Stinner <vstinner@redhat.com>2019-08-21 13:26:20 (GMT)
commit20f59fe1f7748ae899aceee4cb560e5e1f528a1f (patch)
tree388519ee0b0caee244104278660644dbf1598b21
parent87bc3b7a0b0c41ea5da2aa137c4145f437a1c9fd (diff)
downloadcpython-20f59fe1f7748ae899aceee4cb560e5e1f528a1f.zip
cpython-20f59fe1f7748ae899aceee4cb560e5e1f528a1f.tar.gz
cpython-20f59fe1f7748ae899aceee4cb560e5e1f528a1f.tar.bz2
bpo-37751: Fix codecs.lookup() normalization (GH-15092)
Fix codecs.lookup() to normalize the encoding name the same way than encodings.normalize_encoding(), except that codecs.lookup() also converts the name to lower case.
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst1
-rw-r--r--Python/codecs.c32
2 files changed, 17 insertions, 16 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst
new file mode 100644
index 0000000..4da59ff
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst
@@ -0,0 +1 @@
+Fix :func:`codecs.lookup` to normalize the encoding name the same way than :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case.
diff --git a/Python/codecs.c b/Python/codecs.c
index 4f38b33..08e9b91 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -49,15 +49,16 @@ int PyCodec_Register(PyObject *search_function)
return -1;
}
-/* Convert a string to a normalized Python string: all characters are
- converted to lower case, spaces are replaced with underscores. */
+extern int _Py_normalize_encoding(const char *, char *, size_t);
+
+/* Convert a string to a normalized Python string(decoded from UTF-8): all characters are
+ converted to lower case, spaces and hyphens are replaced with underscores. */
static
PyObject *normalizestring(const char *string)
{
- size_t i;
size_t len = strlen(string);
- char *p;
+ char *encoding;
PyObject *v;
if (len > PY_SSIZE_T_MAX) {
@@ -65,20 +66,19 @@ PyObject *normalizestring(const char *string)
return NULL;
}
- p = PyMem_Malloc(len + 1);
- if (p == NULL)
+ encoding = PyMem_Malloc(len + 1);
+ if (encoding == NULL)
return PyErr_NoMemory();
- for (i = 0; i < len; i++) {
- char ch = string[i];
- if (ch == ' ')
- ch = '-';
- else
- ch = Py_TOLOWER(Py_CHARMASK(ch));
- p[i] = ch;
+
+ if (!_Py_normalize_encoding(string, encoding, len + 1))
+ {
+ PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
+ PyMem_Free(encoding);
+ return NULL;
}
- p[i] = '\0';
- v = PyUnicode_FromString(p);
- PyMem_Free(p);
+
+ v = PyUnicode_FromString(encoding);
+ PyMem_Free(encoding);
return v;
}