diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2023-05-31 10:11:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-31 10:11:53 (GMT) |
commit | c687946f6815a17bc5ceacaf3bbceba5b41e73fd (patch) | |
tree | 232c64d0c0190d8da0f3d6b9c3ab4528e4bcba0c /Python | |
parent | 2f8c22f1d6c22f018c78264937db66d52fb18869 (diff) | |
download | cpython-c687946f6815a17bc5ceacaf3bbceba5b41e73fd.zip cpython-c687946f6815a17bc5ceacaf3bbceba5b41e73fd.tar.gz cpython-c687946f6815a17bc5ceacaf3bbceba5b41e73fd.tar.bz2 |
[3.12] gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (GH-105070) (#105119)
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (GH-105070)
(cherry picked from commit 9216e69a87d16d871625721ed5a8aa302511f367)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Diffstat (limited to 'Python')
-rw-r--r-- | Python/Python-tokenize.c | 12 | ||||
-rw-r--r-- | Python/clinic/Python-tokenize.c.h | 41 |
2 files changed, 31 insertions, 22 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 2de1daa..a7933b2 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -37,15 +37,17 @@ typedef struct @classmethod _tokenizer.tokenizeriter.__new__ as tokenizeriter_new - source: str + readline: object + / * extra_tokens: bool + encoding: str(c_default="NULL") = 'utf-8' [clinic start generated code]*/ static PyObject * -tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens) -/*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/ +tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, + int extra_tokens, const char *encoding) +/*[clinic end generated code: output=7501a1211683ce16 input=f7dddf8a613ae8bd]*/ { tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); if (self == NULL) { @@ -55,7 +57,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source, if (filename == NULL) { return NULL; } - self->tok = _PyTokenizer_FromUTF8(source, 1, 1); + self->tok = _PyTokenizer_FromReadline(readline, encoding, 1, 1); if (self->tok == NULL) { Py_DECREF(filename); return NULL; diff --git a/Python/clinic/Python-tokenize.c.h b/Python/clinic/Python-tokenize.c.h index 7e77938..28f5075 100644 --- a/Python/clinic/Python-tokenize.c.h +++ b/Python/clinic/Python-tokenize.c.h @@ -9,8 +9,8 @@ preserve static PyObject * -tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens); +tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, + int extra_tokens, const char *encoding); static PyObject * tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -25,7 +25,7 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(source), &_Py_ID(extra_tokens), }, + .ob_item = { &_Py_ID(extra_tokens), &_Py_ID(encoding), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -34,43 +34,50 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"source", "extra_tokens", NULL}; + static const char * const _keywords[] = {"", "extra_tokens", "encoding", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "tokenizeriter", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); - const char *source; + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2; + PyObject *readline; int extra_tokens; + const char *encoding = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 1, argsbuf); if (!fastargs) { goto exit; } - if (!PyUnicode_Check(fastargs[0])) { - _PyArg_BadArgument("tokenizeriter", "argument 'source'", "str", fastargs[0]); + readline = fastargs[0]; + extra_tokens = PyObject_IsTrue(fastargs[1]); + if (extra_tokens < 0) { goto exit; } - Py_ssize_t source_length; - source = PyUnicode_AsUTF8AndSize(fastargs[0], &source_length); - if (source == NULL) { + if (!noptargs) { + goto skip_optional_kwonly; + } + if (!PyUnicode_Check(fastargs[2])) { + _PyArg_BadArgument("tokenizeriter", "argument 'encoding'", "str", fastargs[2]); goto exit; } - if (strlen(source) != (size_t)source_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[2], &encoding_length); + if (encoding == NULL) { goto exit; } - extra_tokens = PyObject_IsTrue(fastargs[1]); - if (extra_tokens < 0) { + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } - return_value = tokenizeriter_new_impl(type, source, extra_tokens); +skip_optional_kwonly: + return_value = tokenizeriter_new_impl(type, readline, extra_tokens, encoding); exit: return return_value; } -/*[clinic end generated code: output=940b564c67f6e0e2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=48be65a2808bdfa6 input=a9049054013a1b77]*/ |