diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2023-05-30 21:43:34 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-30 21:43:34 (GMT) |
commit | 9216e69a87d16d871625721ed5a8aa302511f367 (patch) | |
tree | 0e8f7f0689a7f873f34066d254bba74ec919a04d /Python/clinic | |
parent | 2ea34cfb3a21182b4d16f57dd6c1cfce46362fe2 (diff) | |
download | cpython-9216e69a87d16d871625721ed5a8aa302511f367.zip cpython-9216e69a87d16d871625721ed5a8aa302511f367.tar.gz cpython-9216e69a87d16d871625721ed5a8aa302511f367.tar.bz2 |
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070)
Diffstat (limited to 'Python/clinic')
-rw-r--r-- | Python/clinic/Python-tokenize.c.h | 41 |
1 files changed, 24 insertions, 17 deletions
diff --git a/Python/clinic/Python-tokenize.c.h b/Python/clinic/Python-tokenize.c.h index 7e77938..28f5075 100644 --- a/Python/clinic/Python-tokenize.c.h +++ b/Python/clinic/Python-tokenize.c.h @@ -9,8 +9,8 @@ preserve static PyObject * -tokenizeriter_new_impl(PyTypeObject *type, const char *source, - int extra_tokens); +tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, + int extra_tokens, const char *encoding); static PyObject * tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -25,7 +25,7 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(source), &_Py_ID(extra_tokens), }, + .ob_item = { &_Py_ID(extra_tokens), &_Py_ID(encoding), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -34,43 +34,50 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"source", "extra_tokens", NULL}; + static const char * const _keywords[] = {"", "extra_tokens", "encoding", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "tokenizeriter", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); - const char *source; + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2; + PyObject *readline; int extra_tokens; + const char *encoding = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 1, argsbuf); if (!fastargs) { goto exit; } - if (!PyUnicode_Check(fastargs[0])) { - _PyArg_BadArgument("tokenizeriter", "argument 'source'", "str", fastargs[0]); + readline = fastargs[0]; + extra_tokens = PyObject_IsTrue(fastargs[1]); + if (extra_tokens < 0) { goto exit; } - Py_ssize_t source_length; - source = PyUnicode_AsUTF8AndSize(fastargs[0], &source_length); - if (source == NULL) { + if (!noptargs) { + goto skip_optional_kwonly; + } + if (!PyUnicode_Check(fastargs[2])) { + _PyArg_BadArgument("tokenizeriter", "argument 'encoding'", "str", fastargs[2]); goto exit; } - if (strlen(source) != (size_t)source_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[2], &encoding_length); + if (encoding == NULL) { goto exit; } - extra_tokens = PyObject_IsTrue(fastargs[1]); - if (extra_tokens < 0) { + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } - return_value = tokenizeriter_new_impl(type, source, extra_tokens); +skip_optional_kwonly: + return_value = tokenizeriter_new_impl(type, readline, extra_tokens, encoding); exit: return return_value; } -/*[clinic end generated code: output=940b564c67f6e0e2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=48be65a2808bdfa6 input=a9049054013a1b77]*/ |