From 713bb19356bce9b8f2b95461834fe1dae505f889 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 13 Oct 2021 17:22:14 +0200 Subject: bpo-45434: Mark the PyTokenizer C API as private (GH-28924) Rename PyTokenize functions to mark them as private: * PyTokenizer_FindEncodingFilename() => _PyTokenizer_FindEncodingFilename() * PyTokenizer_FromString() => _PyTokenizer_FromString() * PyTokenizer_FromFile() => _PyTokenizer_FromFile() * PyTokenizer_FromUTF8() => _PyTokenizer_FromUTF8() * PyTokenizer_Free() => _PyTokenizer_Free() * PyTokenizer_Get() => _PyTokenizer_Get() Remove the unused PyTokenizer_FindEncoding() function. import.c: remove unused #include "errcode.h". --- Parser/pegen.c | 16 ++++++++-------- Parser/string_parser.c | 4 ++-- Parser/tokenizer.c | 47 ++++++++++++++++++++--------------------------- Parser/tokenizer.h | 10 +++++----- Python/Python-tokenize.c | 6 +++--- Python/import.c | 1 - Python/traceback.c | 4 ++-- 7 files changed, 40 insertions(+), 48 deletions(-) diff --git a/Parser/pegen.c b/Parser/pegen.c index a989635..e4d2692 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -729,7 +729,7 @@ _PyPegen_fill_token(Parser *p) { const char *start; const char *end; - int type = PyTokenizer_Get(p->tok, &start, &end); + int type = _PyTokenizer_Get(p->tok, &start, &end); // Record and skip '# type: ignore' comments while (type == TYPE_IGNORE) { @@ -746,7 +746,7 @@ _PyPegen_fill_token(Parser *p) PyErr_NoMemory(); return -1; } - type = PyTokenizer_Get(p->tok, &start, &end); + type = _PyTokenizer_Get(p->tok, &start, &end); } // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing @@ -1306,7 +1306,7 @@ _PyPegen_check_tokenizer_errors(Parser *p) { for (;;) { const char *start; const char *end; - switch (PyTokenizer_Get(p->tok, &start, &end)) { + switch (_PyTokenizer_Get(p->tok, &start, &end)) { case ERRORTOKEN: if (p->tok->level != 0) { int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; @@ -1411,7 +1411,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena const char *enc, const char *ps1, const char *ps2, PyCompilerFlags *flags, int *errcode, PyArena *arena) { - struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2); + struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); if (tok == NULL) { if (PyErr_Occurred()) { raise_tokenizer_init_error(filename_ob); @@ -1441,7 +1441,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena _PyPegen_Parser_Free(p); error: - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return result; } @@ -1453,9 +1453,9 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen struct tok_state *tok; if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) { - tok = PyTokenizer_FromUTF8(str, exec_input); + tok = _PyTokenizer_FromUTF8(str, exec_input); } else { - tok = PyTokenizer_FromString(str, exec_input); + tok = _PyTokenizer_FromString(str, exec_input); } if (tok == NULL) { if (PyErr_Occurred()) { @@ -1483,7 +1483,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen _PyPegen_Parser_Free(p); error: - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return result; } diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 2880d07..cffe24e 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -386,7 +386,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, str[0] = '('; str[len+1] = ')'; - struct tok_state* tok = PyTokenizer_FromString(str, 1); + struct tok_state* tok = _PyTokenizer_FromString(str, 1); if (tok == NULL) { PyMem_Free(str); return NULL; @@ -409,7 +409,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, exit: PyMem_Free(str); _PyPegen_Parser_Free(p2); - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return result; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index c7a014d..ae3874b 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -108,7 +108,7 @@ static char * error_ret(struct tok_state *tok) /* XXX */ { tok->decoding_erred = 1; - if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ + if (tok->fp != NULL && tok->buf != NULL) /* see _PyTokenizer_Free */ PyMem_Free(tok->buf); tok->buf = tok->cur = tok->inp = NULL; tok->start = NULL; @@ -702,7 +702,7 @@ decode_str(const char *input, int single, struct tok_state *tok) /* Set up tokenizer for string */ struct tok_state * -PyTokenizer_FromString(const char *str, int exec_input) +_PyTokenizer_FromString(const char *str, int exec_input) { struct tok_state *tok = tok_new(); char *decoded; @@ -711,7 +711,7 @@ PyTokenizer_FromString(const char *str, int exec_input) return NULL; decoded = decode_str(str, exec_input, tok); if (decoded == NULL) { - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return NULL; } @@ -723,7 +723,7 @@ PyTokenizer_FromString(const char *str, int exec_input) /* Set up tokenizer for UTF-8 string */ struct tok_state * -PyTokenizer_FromUTF8(const char *str, int exec_input) +_PyTokenizer_FromUTF8(const char *str, int exec_input) { struct tok_state *tok = tok_new(); char *translated; @@ -731,7 +731,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input) return NULL; tok->input = translated = translate_newlines(str, exec_input, tok); if (translated == NULL) { - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return NULL; } tok->decoding_state = STATE_NORMAL; @@ -739,7 +739,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input) tok->str = translated; tok->encoding = new_string("utf-8", 5, tok); if (!tok->encoding) { - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return NULL; } @@ -751,14 +751,14 @@ PyTokenizer_FromUTF8(const char *str, int exec_input) /* Set up tokenizer for file */ struct tok_state * -PyTokenizer_FromFile(FILE *fp, const char* enc, - const char *ps1, const char *ps2) +_PyTokenizer_FromFile(FILE *fp, const char* enc, + const char *ps1, const char *ps2) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return NULL; } tok->cur = tok->inp = tok->buf; @@ -771,7 +771,7 @@ PyTokenizer_FromFile(FILE *fp, const char* enc, gets copied into the parse tree. */ tok->encoding = new_string(enc, strlen(enc), tok); if (!tok->encoding) { - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return NULL; } tok->decoding_state = STATE_NORMAL; @@ -782,7 +782,7 @@ PyTokenizer_FromFile(FILE *fp, const char* enc, /* Free a tok_state structure */ void -PyTokenizer_Free(struct tok_state *tok) +_PyTokenizer_Free(struct tok_state *tok) { if (tok->encoding != NULL) { PyMem_Free(tok->encoding); @@ -2049,7 +2049,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } int -PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end) +_PyTokenizer_Get(struct tok_state *tok, + const char **p_start, const char **p_end) { int result = tok_get(tok, p_start, p_end); if (tok->decoding_erred) { @@ -2062,7 +2063,7 @@ PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end) /* Get the encoding of a Python file. Check for the coding cookie and check if the file starts with a BOM. - PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + _PyTokenizer_FindEncodingFilename() returns NULL when it can't find the encoding in the first or second line of the file (in which case the encoding should be assumed to be UTF-8). @@ -2070,7 +2071,7 @@ PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end) by the caller. */ char * -PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) +_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; @@ -2087,7 +2088,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) if (fp == NULL) { return NULL; } - tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); + tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL); if (tok == NULL) { fclose(fp); return NULL; @@ -2100,12 +2101,12 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) tok->filename = PyUnicode_FromString(""); if (tok->filename == NULL) { fclose(fp); - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return encoding; } } while (tok->lineno < 2 && tok->done == E_OK) { - PyTokenizer_Get(tok, &p_start, &p_end); + _PyTokenizer_Get(tok, &p_start, &p_end); } fclose(fp); if (tok->encoding) { @@ -2114,18 +2115,11 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) strcpy(encoding, tok->encoding); } } - PyTokenizer_Free(tok); + _PyTokenizer_Free(tok); return encoding; } -char * -PyTokenizer_FindEncoding(int fd) -{ - return PyTokenizer_FindEncodingFilename(fd, NULL); -} - #ifdef Py_DEBUG - void tok_dump(int type, char *start, char *end) { @@ -2133,5 +2127,4 @@ tok_dump(int type, char *start, char *end) if (type == NAME || type == NUMBER || type == STRING || type == OP) printf("(%.*s)", (int)(end - start), start); } - -#endif +#endif // Py_DEBUG diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 677f9db..1d1cfd6 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -86,12 +86,12 @@ struct tok_state { enum interactive_underflow_t interactive_underflow; }; -extern struct tok_state *PyTokenizer_FromString(const char *, int); -extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); -extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*, +extern struct tok_state *_PyTokenizer_FromString(const char *, int); +extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int); +extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*, const char *, const char *); -extern void PyTokenizer_Free(struct tok_state *); -extern int PyTokenizer_Get(struct tok_state *, const char **, const char **); +extern void _PyTokenizer_Free(struct tok_state *); +extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **); #define tok_dump _Py_tok_dump diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index fa71328..d3ebbe1 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -47,7 +47,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source) if (filename == NULL) { return NULL; } - self->tok = PyTokenizer_FromUTF8(source, 1); + self->tok = _PyTokenizer_FromUTF8(source, 1); if (self->tok == NULL) { Py_DECREF(filename); return NULL; @@ -61,7 +61,7 @@ tokenizeriter_next(tokenizeriterobject *it) { const char *start; const char *end; - int type = PyTokenizer_Get(it->tok, &start, &end); + int type = _PyTokenizer_Get(it->tok, &start, &end); if (type == ERRORTOKEN && PyErr_Occurred()) { return NULL; } @@ -105,7 +105,7 @@ static void tokenizeriter_dealloc(tokenizeriterobject *it) { PyTypeObject *tp = Py_TYPE(it); - PyTokenizer_Free(it->tok); + _PyTokenizer_Free(it->tok); tp->tp_free(it); Py_DECREF(tp); } diff --git a/Python/import.c b/Python/import.c index 731f0f5..4bc1e51 100644 --- a/Python/import.c +++ b/Python/import.c @@ -11,7 +11,6 @@ #include "pycore_interp.h" // _PyInterpreterState_ClearModules() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_sysmodule.h" -#include "errcode.h" #include "marshal.h" #include "code.h" #include "importdl.h" diff --git a/Python/traceback.c b/Python/traceback.c index ffa7c34..b18cbb9 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -29,7 +29,7 @@ #define MAX_NTHREADS 100 /* Function from Parser/tokenizer.c */ -extern char * PyTokenizer_FindEncodingFilename(int, PyObject *); +extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *); _Py_IDENTIFIER(TextIOWrapper); _Py_IDENTIFIER(close); @@ -431,7 +431,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i Py_DECREF(binary); return 0; } - found_encoding = PyTokenizer_FindEncodingFilename(fd, filename); + found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename); if (found_encoding == NULL) PyErr_Clear(); encoding = (found_encoding != NULL) ? found_encoding : "utf-8"; -- cgit v0.12