Cut disused recode_encoding logic in _PyBytes_DecodeEscape. (GH-16013)

All call sites pass NULL for `recode_encoding`, so this path is completely untested. That's been true since before Python 3.0. It adds significant complexity to this logic, so it's best to take it out. All call sites now have a literal NULL, and that's been true since commit 768921cf3 eliminated a conditional (`foo ? bar : NULL`) at the call site in Python/ast.c where we're parsing a bytes literal. But even before then, that condition `foo` had been a constant since unadorned string literals started meaning Unicode, in commit 572dbf8f1 aka v3.0a1~1035 . The `unicode` parameter is already unused, so mark it as unused too. The code that acted on it was also taken out before Python 3.0, in commit 8d30cc014 aka v3.0a1~1031 . The function (PyBytes_DecodeEscape) is exposed in the API, but it's never been documented.
author: Greg Price <gnprice@gmail.com> 2019-09-12 18:12:22 (GMT)
committer: Benjamin Peterson <benjamin@python.org> 2019-09-12 18:12:22 (GMT)
commit: 3a4f66707e824ef3a8384827590ebaa6ca463dc0 (patch)
tree: ed74c3415de2582188da0fa52ac3a9ffb2ac4f63 /Objects
parent: a44f3dc21d8b59ceeffa7dd1d9a7c05ed6be3e04 (diff)
download: cpython-3a4f66707e824ef3a8384827590ebaa6ca463dc0.zip
cpython-3a4f66707e824ef3a8384827590ebaa6ca463dc0.tar.gz
cpython-3a4f66707e824ef3a8384827590ebaa6ca463dc0.tar.bz2
1 files changed, 5 insertions, 58 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index e1f5ee2..4b2a77b 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1077,52 +1077,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
     return NULL;
 }
 
-/* Unescape a backslash-escaped string. If unicode is non-zero,
-   the string is a u-literal. If recode_encoding is non-zero,
-   the string is UTF-8 encoded and should be re-encoded in the
-   specified encoding.  */
-
-static char *
-_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
-                            const char *errors, const char *recode_encoding,
-                            _PyBytesWriter *writer, char *p)
-{
-    PyObject *u, *w;
-    const char* t;
-
-    t = *s;
-    /* Decode non-ASCII bytes as UTF-8. */
-    while (t < end && (*t & 0x80))
-        t++;
-    u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
-    if (u == NULL)
-        return NULL;
-
-    /* Recode them in target encoding. */
-    w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
-    Py_DECREF(u);
-    if  (w == NULL)
-        return NULL;
-    assert(PyBytes_Check(w));
-
-    /* Append bytes to output buffer. */
-    writer->min_size--;   /* subtract 1 preallocated byte */
-    p = _PyBytesWriter_WriteBytes(writer, p,
-                                  PyBytes_AS_STRING(w),
-                                  PyBytes_GET_SIZE(w));
-    Py_DECREF(w);
-    if (p == NULL)
-        return NULL;
-
-    *s = t;
-    return p;
-}
-
+/* Unescape a backslash-escaped string. */
 PyObject *_PyBytes_DecodeEscape(const char *s,
                                 Py_ssize_t len,
                                 const char *errors,
-                                Py_ssize_t unicode,
-                                const char *recode_encoding,
                                 const char **first_invalid_escape)
 {
     int c;
@@ -1142,17 +1100,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
     end = s + len;
     while (s < end) {
         if (*s != '\\') {
-            if (!(recode_encoding && (*s & 0x80))) {
-                *p++ = *s++;
-            }
-            else {
-                /* non-ASCII character and need to recode */
-                p = _PyBytes_DecodeEscapeRecode(&s, end,
-                                                errors, recode_encoding,
-                                                &writer, p);
-                if (p == NULL)
-                    goto failed;
-            }
+            *p++ = *s++;
             continue;
         }
 
@@ -1241,12 +1189,11 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
 PyObject *PyBytes_DecodeEscape(const char *s,
                                 Py_ssize_t len,
                                 const char *errors,
-                                Py_ssize_t unicode,
-                                const char *recode_encoding)
+                                Py_ssize_t Py_UNUSED(unicode),
+                                const char *Py_UNUSED(recode_encoding))
 {
     const char* first_invalid_escape;
-    PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
-                                             recode_encoding,
+    PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
                                              &first_invalid_escape);
     if (result == NULL)
         return NULL;
author	Greg Price <gnprice@gmail.com>	2019-09-12 18:12:22 (GMT)
committer	Benjamin Peterson <benjamin@python.org>	2019-09-12 18:12:22 (GMT)
commit	3a4f66707e824ef3a8384827590ebaa6ca463dc0 (patch)
tree	ed74c3415de2582188da0fa52ac3a9ffb2ac4f63 /Objects
parent	a44f3dc21d8b59ceeffa7dd1d9a7c05ed6be3e04 (diff)
download	cpython-3a4f66707e824ef3a8384827590ebaa6ca463dc0.zip cpython-3a4f66707e824ef3a8384827590ebaa6ca463dc0.tar.gz cpython-3a4f66707e824ef3a8384827590ebaa6ca463dc0.tar.bz2