diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2019-03-01 23:34:44 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-01 23:34:44 (GMT) |
commit | 1f24a719e7be5e49b876a5dc7daf21d01ee69faa (patch) | |
tree | 8f8f56cab78ef671a8cb7f54b8ec2495d9a435e6 /Parser/tokenizer.c | |
parent | 7eebbbd5b3907447eddadf5cb7cb1cc9230d15b2 (diff) | |
download | cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.zip cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.tar.gz cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.tar.bz2 |
bpo-35808: Retire pgen and use pgen2 to generate the parser (GH-11814)
Pgen is the oldest piece of technology in the CPython repository, building it requires various #if[n]def PGEN hacks in other parts of the code and it also depends more and more on CPython internals. This commit removes the old pgen C code and replaces it for a new version implemented in pure Python. This is a modified and adapted version of lib2to3/pgen2 that can generate grammar files compatibles with the current parser.
This commit also eliminates all the #ifdef and code branches related to pgen, simplifying the code and making it more maintainable. The regen-grammar step now uses $(PYTHON_FOR_REGEN) that can be any version of the interpreter, so the new pgen code maintains compatibility with older versions of the interpreter (this also allows regenerating the grammar with the current CI solution that uses Python3.5). The new pgen Python module also makes use of the Grammar/Tokens file that holds the token specification, so is always kept in sync and avoids having to maintain duplicate token definitions.
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 1ded9ad..44ec415 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -10,13 +10,11 @@ #include "tokenizer.h" #include "errcode.h" -#ifndef PGEN #include "unicodeobject.h" #include "bytesobject.h" #include "fileobject.h" #include "codecs.h" #include "abstract.h" -#endif /* PGEN */ /* Alternate tab spacing */ #define ALTTABSIZE 1 @@ -81,11 +79,9 @@ tok_new(void) tok->enc = NULL; tok->encoding = NULL; tok->cont_line = 0; -#ifndef PGEN tok->filename = NULL; tok->decoding_readline = NULL; tok->decoding_buffer = NULL; -#endif tok->type_comments = 0; return tok; @@ -104,28 +100,6 @@ new_string(const char *s, Py_ssize_t len, struct tok_state *tok) return result; } -#ifdef PGEN - -static char * -decoding_fgets(char *s, int size, struct tok_state *tok) -{ - return fgets(s, size, tok->fp); -} - -static int -decoding_feof(struct tok_state *tok) -{ - return feof(tok->fp); -} - -static char * -decode_str(const char *str, int exec_input, struct tok_state *tok) -{ - return new_string(str, strlen(str), tok); -} - -#else /* PGEN */ - static char * error_ret(struct tok_state *tok) /* XXX */ { @@ -551,7 +525,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) return error_ret(tok); } } -#ifndef PGEN /* The default encoding is UTF-8, so make sure we don't have any non-UTF-8 sequences in it. */ if (line && !tok->encoding) { @@ -574,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) badchar, tok->filename, tok->lineno + 1); return error_ret(tok); } -#endif return line; } @@ -738,8 +710,6 @@ decode_str(const char *input, int single, struct tok_state *tok) return str; } -#endif /* PGEN */ - /* Set up tokenizer for string */ struct tok_state * @@ -765,9 +735,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input) struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; -#ifndef PGEN tok->input = str = translate_newlines(str, exec_input, tok); -#endif if (str == NULL) { PyTokenizer_Free(tok); return NULL; @@ -828,11 +796,9 @@ PyTokenizer_Free(struct tok_state *tok) { if (tok->encoding != NULL) PyMem_FREE(tok->encoding); -#ifndef PGEN Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_buffer); Py_XDECREF(tok->filename); -#endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); if (tok->input) @@ -871,7 +837,6 @@ tok_nextc(struct tok_state *tok) } if (tok->prompt != NULL) { char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); -#ifndef PGEN if (newtok != NULL) { char *translated = translate_newlines(newtok, 0, tok); PyMem_FREE(newtok); @@ -900,7 +865,6 @@ tok_nextc(struct tok_state *tok) strcpy(newtok, buf); Py_DECREF(u); } -#endif if (tok->nextprompt != NULL) tok->prompt = tok->nextprompt; if (newtok == NULL) @@ -1056,7 +1020,6 @@ tok_backup(struct tok_state *tok, int c) static int syntaxerror(struct tok_state *tok, const char *format, ...) { -#ifndef PGEN va_list vargs; #ifdef HAVE_STDARG_PROTOTYPES va_start(vargs, format); @@ -1069,9 +1032,6 @@ syntaxerror(struct tok_state *tok, const char *format, ...) tok->lineno, (int)(tok->cur - tok->line_start)); tok->done = E_ERROR; -#else - tok->done = E_TOKEN; -#endif return ERRORTOKEN; } @@ -1083,9 +1043,6 @@ indenterror(struct tok_state *tok) return ERRORTOKEN; } -#ifdef PGEN -#define verify_identifier(tok) 1 -#else /* Verify that the identifier follows PEP 3131. All identifier strings are guaranteed to be "ready" unicode objects. */ @@ -1112,7 +1069,6 @@ verify_identifier(struct tok_state *tok) tok->done = E_IDENTIFIER; return result; } -#endif static int tok_decimal_tail(struct tok_state *tok) @@ -1667,25 +1623,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) case '(': case '[': case '{': -#ifndef PGEN if (tok->level >= MAXLEVEL) { return syntaxerror(tok, "too many nested parentheses"); } tok->parenstack[tok->level] = c; tok->parenlinenostack[tok->level] = tok->lineno; -#endif tok->level++; break; case ')': case ']': case '}': -#ifndef PGEN if (!tok->level) { return syntaxerror(tok, "unmatched '%c'", c); } -#endif tok->level--; -#ifndef PGEN int opening = tok->parenstack[tok->level]; if (!((opening == '(' && c == ')') || (opening == '[' && c == ']') || @@ -1704,7 +1655,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c, opening); } } -#endif break; } @@ -1742,11 +1692,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) FILE *fp; char *p_start =NULL , *p_end =NULL , *encoding = NULL; -#ifndef PGEN fd = _Py_dup(fd); -#else - fd = dup(fd); -#endif if (fd < 0) { return NULL; } @@ -1760,7 +1706,6 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) fclose(fp); return NULL; } -#ifndef PGEN if (filename != NULL) { Py_INCREF(filename); tok->filename = filename; @@ -1773,7 +1718,6 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) return encoding; } } -#endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } |