summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2021-10-14 17:04:19 (GMT)
committerGitHub <noreply@github.com>2021-10-14 17:04:19 (GMT)
commit39aa98346d5dd8ac591a7cafb467af21c53f1e5d (patch)
treed363b62f299171467fce0cd3e1fe155c2ca41a09 /Modules
parentd413c503636cde2a6ab0ada25dccb0134633a8e6 (diff)
downloadcpython-39aa98346d5dd8ac591a7cafb467af21c53f1e5d.zip
cpython-39aa98346d5dd8ac591a7cafb467af21c53f1e5d.tar.gz
cpython-39aa98346d5dd8ac591a7cafb467af21c53f1e5d.tar.bz2
bpo-45467: Fix IncrementalDecoder and StreamReader in the "raw-unicode-escape" codec (GH-28944)
They support now splitting escape sequences between input chunks. Add the third parameter "final" in codecs.raw_unicode_escape_decode(). It is True by default to match the former behavior.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_codecsmodule.c13
-rw-r--r--Modules/clinic/_codecsmodule.c.h18
2 files changed, 21 insertions, 10 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index fc74127..50afc09 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -509,17 +509,20 @@ _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
_codecs.raw_unicode_escape_decode
data: Py_buffer(accept={str, buffer})
errors: str(accept={str, NoneType}) = None
+ final: bool(accept={int}) = True
/
[clinic start generated code]*/
static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors)
-/*[clinic end generated code: output=c98eeb56028070a6 input=d2f5159ce3b3392f]*/
+ const char *errors, int final)
+/*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/
{
- PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
- errors);
- return codec_tuple(decoded, data->len);
+ Py_ssize_t consumed = data->len;
+ PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
+ errors,
+ final ? NULL : &consumed);
+ return codec_tuple(decoded, consumed);
}
/*[clinic input]
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index a7086dd..855ac77 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -1143,7 +1143,7 @@ exit:
}
PyDoc_STRVAR(_codecs_raw_unicode_escape_decode__doc__,
-"raw_unicode_escape_decode($module, data, errors=None, /)\n"
+"raw_unicode_escape_decode($module, data, errors=None, final=True, /)\n"
"--\n"
"\n");
@@ -1152,7 +1152,7 @@ PyDoc_STRVAR(_codecs_raw_unicode_escape_decode__doc__,
static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors);
+ const char *errors, int final);
static PyObject *
_codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
@@ -1160,8 +1160,9 @@ _codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ss
PyObject *return_value = NULL;
Py_buffer data = {NULL, NULL};
const char *errors = NULL;
+ int final = 1;
- if (!_PyArg_CheckPositional("raw_unicode_escape_decode", nargs, 1, 2)) {
+ if (!_PyArg_CheckPositional("raw_unicode_escape_decode", nargs, 1, 3)) {
goto exit;
}
if (PyUnicode_Check(args[0])) {
@@ -1202,8 +1203,15 @@ _codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ss
_PyArg_BadArgument("raw_unicode_escape_decode", "argument 2", "str or None", args[1]);
goto exit;
}
+ if (nargs < 3) {
+ goto skip_optional;
+ }
+ final = _PyLong_AsInt(args[2]);
+ if (final == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
skip_optional:
- return_value = _codecs_raw_unicode_escape_decode_impl(module, &data, errors);
+ return_value = _codecs_raw_unicode_escape_decode_impl(module, &data, errors, final);
exit:
/* Cleanup for data */
@@ -2809,4 +2817,4 @@ exit:
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
#endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=9e9fb1d5d81577e0 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=814dae36b6f885cb input=a9049054013a1b77]*/