diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2021-10-14 18:23:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-14 18:23:52 (GMT) |
commit | 684860280687561f6312e206c4ccfbe4baa17e89 (patch) | |
tree | caee170a4350f45cdfe0a5517da5098b48afc2d7 /Modules | |
parent | 7c722e32bf582108680f49983cf01eaed710ddb9 (diff) | |
download | cpython-684860280687561f6312e206c4ccfbe4baa17e89.zip cpython-684860280687561f6312e206c4ccfbe4baa17e89.tar.gz cpython-684860280687561f6312e206c4ccfbe4baa17e89.tar.bz2 |
bpo-45467: Fix IncrementalDecoder and StreamReader in the "raw-unicode-escape" codec (GH-28944) (GH-28953)
They support now splitting escape sequences between input chunks.
Add the third parameter "final" in codecs.raw_unicode_escape_decode().
It is True by default to match the former behavior.
(cherry picked from commit 39aa98346d5dd8ac591a7cafb467af21c53f1e5d)
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_codecsmodule.c | 13 | ||||
-rw-r--r-- | Modules/clinic/_codecsmodule.c.h | 23 |
2 files changed, 26 insertions, 10 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index f22d4da..cbe5cc5 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -507,17 +507,20 @@ _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, _codecs.raw_unicode_escape_decode data: Py_buffer(accept={str, buffer}) errors: str(accept={str, NoneType}) = None + final: bool(accept={int}) = True / [clinic start generated code]*/ static PyObject * _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, - const char *errors) -/*[clinic end generated code: output=c98eeb56028070a6 input=d2f5159ce3b3392f]*/ + const char *errors, int final) +/*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/ { - PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len, - errors); - return codec_tuple(decoded, data->len); + Py_ssize_t consumed = data->len; + PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len, + errors, + final ? NULL : &consumed); + return codec_tuple(decoded, consumed); } /*[clinic input] diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index 4e2c057..7ddc36d 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -1234,7 +1234,7 @@ exit: } PyDoc_STRVAR(_codecs_raw_unicode_escape_decode__doc__, -"raw_unicode_escape_decode($module, data, errors=None, /)\n" +"raw_unicode_escape_decode($module, data, errors=None, final=True, /)\n" "--\n" "\n"); @@ -1243,7 +1243,7 @@ PyDoc_STRVAR(_codecs_raw_unicode_escape_decode__doc__, static PyObject * _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, - const char *errors); + const char *errors, int final); static PyObject * _codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) @@ -1251,8 +1251,9 @@ _codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ss PyObject *return_value = NULL; Py_buffer data = {NULL, NULL}; const char *errors = NULL; + int final = 1; - if (!_PyArg_CheckPositional("raw_unicode_escape_decode", nargs, 1, 2)) { + if (!_PyArg_CheckPositional("raw_unicode_escape_decode", nargs, 1, 3)) { goto exit; } if (PyUnicode_Check(args[0])) { @@ -1293,8 +1294,20 @@ _codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ss _PyArg_BadArgument("raw_unicode_escape_decode", "argument 2", "str or None", args[1]); goto exit; } + if (nargs < 3) { + goto skip_optional; + } + if (PyFloat_Check(args[2])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + final = _PyLong_AsInt(args[2]); + if (final == -1 && PyErr_Occurred()) { + goto exit; + } skip_optional: - return_value = _codecs_raw_unicode_escape_decode_impl(module, &data, errors); + return_value = _codecs_raw_unicode_escape_decode_impl(module, &data, errors, final); exit: /* Cleanup for data */ @@ -2935,4 +2948,4 @@ exit: #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=d4b696fe54cfee8f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=eed7dc9312baf252 input=a9049054013a1b77]*/ |