diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 13 | ||||
-rw-r--r-- | Parser/asdl.py | 3 | ||||
-rw-r--r--[-rwxr-xr-x] | Parser/asdl_c.py | 16 | ||||
-rw-r--r-- | Parser/grammar.c | 27 | ||||
-rw-r--r-- | Parser/myreadline.c | 118 | ||||
-rw-r--r-- | Parser/node.c | 2 | ||||
-rw-r--r-- | Parser/parser.c | 9 | ||||
-rw-r--r-- | Parser/parsetok.c | 6 | ||||
-rw-r--r-- | Parser/pgen.c | 24 | ||||
-rw-r--r-- | Parser/pgenmain.c | 14 | ||||
-rw-r--r-- | Parser/tokenizer.c | 246 |
11 files changed, 375 insertions, 103 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index cd0832d..f470ad1 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -1,4 +1,8 @@ --- ASDL's six builtin types are identifier, int, string, bytes, object, singleton +-- ASDL's 7 builtin types are: +-- identifier, int, string, bytes, object, singleton, constant +-- +-- singleton: None, True or False +-- constant can be None, whereas None means "no value" for object. module Python { @@ -24,6 +28,8 @@ module Python | Delete(expr* targets) | Assign(expr* targets, expr value) | AugAssign(expr target, operator op, expr value) + -- 'simple' indicates that we annotate simple name without parens + | AnnAssign(expr target, expr annotation, expr? value, int simple) -- use 'orelse' because else is a keyword in target languages | For(expr target, expr iter, stmt* body, stmt* orelse) @@ -71,9 +77,12 @@ module Python | Call(expr func, expr* args, keyword* keywords) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? + | FormattedValue(expr value, int? conversion, expr? format_spec) + | JoinedStr(expr* values) | Bytes(bytes s) | NameConstant(singleton value) | Ellipsis + | Constant(constant value) -- the following expression can appear in assignment context | Attribute(expr value, identifier attr, expr_context ctx) @@ -101,7 +110,7 @@ module Python cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn - comprehension = (expr target, expr iter, expr* ifs) + comprehension = (expr target, expr iter, expr* ifs, int is_async) excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) attributes (int lineno, int col_offset) diff --git a/Parser/asdl.py b/Parser/asdl.py index 121cdab..62f5c19 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -33,7 +33,8 @@ __all__ = [ # See the EBNF at the top of the file to understand the logical connection # between the various node types. -builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton'} +builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton', + 'constant'} class AST: def __repr__(self): diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index f38c253..17c8517 100755..100644 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -834,6 +834,7 @@ static PyObject* ast2obj_object(void *o) return (PyObject*)o; } #define ast2obj_singleton ast2obj_object +#define ast2obj_constant ast2obj_object #define ast2obj_identifier ast2obj_object #define ast2obj_string ast2obj_object #define ast2obj_bytes ast2obj_object @@ -871,6 +872,19 @@ static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena) return 0; } +static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena) +{ + if (obj) { + if (PyArena_AddPyObject(arena, obj) < 0) { + *out = NULL; + return -1; + } + Py_INCREF(obj); + } + *out = obj; + return 0; +} + static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena) { if (!PyUnicode_CheckExact(obj) && obj != Py_None) { @@ -906,7 +920,7 @@ static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) return 1; } - i = (int)PyLong_AsLong(obj); + i = _PyLong_AsInt(obj); if (i == -1 && PyErr_Occurred()) return 1; *out = i; diff --git a/Parser/grammar.c b/Parser/grammar.c index b598294..75fd5b9 100644 --- a/Parser/grammar.c +++ b/Parser/grammar.c @@ -28,6 +28,23 @@ newgrammar(int start) return g; } +void +freegrammar(grammar *g) +{ + int i; + for (i = 0; i < g->g_ndfas; i++) { + free(g->g_dfa[i].d_name); + for (int j = 0; j < g->g_dfa[i].d_nstates; j++) + PyObject_FREE(g->g_dfa[i].d_state[j].s_arc); + PyObject_FREE(g->g_dfa[i].d_state); + } + PyObject_FREE(g->g_dfa); + for (i = 0; i < g->g_ll.ll_nlabels; i++) + free(g->g_ll.ll_label[i].lb_str); + PyObject_FREE(g->g_ll.ll_label); + PyObject_FREE(g); +} + dfa * adddfa(grammar *g, int type, const char *name) { @@ -63,7 +80,7 @@ addstate(dfa *d) s->s_upper = 0; s->s_accel = NULL; s->s_accept = 0; - return Py_SAFE_DOWNCAST(s - d->d_state, Py_intptr_t, int); + return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int); } void @@ -105,7 +122,7 @@ addlabel(labellist *ll, int type, const char *str) if (Py_DebugFlag) printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, PyGrammar_LabelRepr(lb)); - return Py_SAFE_DOWNCAST(lb - ll->ll_label, Py_intptr_t, int); + return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int); } /* Same, but rather dies than adds */ @@ -122,7 +139,13 @@ findlabel(labellist *ll, int type, const char *str) } fprintf(stderr, "Label %d/'%s' not found\n", type, str); Py_FatalError("grammar.c:findlabel()"); + + /* Py_FatalError() is declared with __attribute__((__noreturn__)). + GCC emits a warning without "return 0;" (compiler bug!), but Clang is + smarter and emits a warning on the return... */ +#ifndef __clang__ return 0; /* Make gcc -Wall happy */ +#endif } /* Forward */ diff --git a/Parser/myreadline.c b/Parser/myreadline.c index 28c7b6d..c8b92da 100644 --- a/Parser/myreadline.c +++ b/Parser/myreadline.c @@ -41,10 +41,7 @@ my_fgets(char *buf, int len, FILE *fp) (void)(PyOS_InputHook)(); errno = 0; clearerr(fp); - if (_PyVerify_fd(fileno(fp))) - p = fgets(buf, len, fp); - else - p = NULL; + p = fgets(buf, len, fp); if (p != NULL) return 0; /* No error */ err = errno; @@ -101,6 +98,100 @@ my_fgets(char *buf, int len, FILE *fp) /* NOTREACHED */ } +#ifdef MS_WINDOWS +/* Readline implementation using ReadConsoleW */ + +extern char _get_console_type(HANDLE handle); + +char * +_PyOS_WindowsConsoleReadline(HANDLE hStdIn) +{ + static wchar_t wbuf_local[1024 * 16]; + const DWORD chunk_size = 1024; + + DWORD n_read, total_read, wbuflen, u8len; + wchar_t *wbuf; + char *buf = NULL; + int err = 0; + + n_read = 0; + total_read = 0; + wbuf = wbuf_local; + wbuflen = sizeof(wbuf_local) / sizeof(wbuf_local[0]) - 1; + while (1) { + if (!ReadConsoleW(hStdIn, &wbuf[total_read], wbuflen - total_read, &n_read, NULL)) { + err = GetLastError(); + goto exit; + } + if (n_read == 0) { + int s; + err = GetLastError(); + if (err != ERROR_OPERATION_ABORTED) + goto exit; + err = 0; + HANDLE hInterruptEvent = _PyOS_SigintEvent(); + if (WaitForSingleObjectEx(hInterruptEvent, 100, FALSE) + == WAIT_OBJECT_0) { + ResetEvent(hInterruptEvent); +#ifdef WITH_THREAD + PyEval_RestoreThread(_PyOS_ReadlineTState); +#endif + s = PyErr_CheckSignals(); +#ifdef WITH_THREAD + PyEval_SaveThread(); +#endif + if (s < 0) + goto exit; + } + break; + } + + total_read += n_read; + if (total_read == 0 || wbuf[total_read - 1] == L'\n') { + break; + } + wbuflen += chunk_size; + if (wbuf == wbuf_local) { + wbuf[total_read] = '\0'; + wbuf = (wchar_t*)PyMem_RawMalloc(wbuflen * sizeof(wchar_t)); + if (wbuf) + wcscpy_s(wbuf, wbuflen, wbuf_local); + } + else + wbuf = (wchar_t*)PyMem_RawRealloc(wbuf, wbuflen * sizeof(wchar_t)); + } + + if (wbuf[0] == '\x1a') { + buf = PyMem_RawMalloc(1); + if (buf) + buf[0] = '\0'; + goto exit; + } + + u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, NULL, 0, NULL, NULL); + buf = PyMem_RawMalloc(u8len + 1); + u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, buf, u8len, NULL, NULL); + buf[u8len] = '\0'; + +exit: + if (wbuf != wbuf_local) + PyMem_RawFree(wbuf); + + if (err) { +#ifdef WITH_THREAD + PyEval_RestoreThread(_PyOS_ReadlineTState); +#endif + PyErr_SetFromWindowsErr(err); +#ifdef WITH_THREAD + PyEval_SaveThread(); +#endif + } + + return buf; +} + +#endif + /* Readline implementation using fgets() */ @@ -110,6 +201,25 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt) size_t n; char *p, *pr; +#ifdef MS_WINDOWS + if (!Py_LegacyWindowsStdioFlag && sys_stdin == stdin) { + HANDLE hStdIn; + + _Py_BEGIN_SUPPRESS_IPH + hStdIn = (HANDLE)_get_osfhandle(fileno(sys_stdin)); + _Py_END_SUPPRESS_IPH + + if (_get_console_type(hStdIn) == 'r') { + fflush(sys_stdout); + if (prompt) + fprintf(stderr, "%s", prompt); + fflush(stderr); + clearerr(sys_stdin); + return _PyOS_WindowsConsoleReadline(hStdIn); + } + } +#endif + n = 100; p = (char *)PyMem_RawMalloc(n); if (p == NULL) diff --git a/Parser/node.c b/Parser/node.c index 0010324..240d290 100644 --- a/Parser/node.c +++ b/Parser/node.c @@ -91,7 +91,7 @@ PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset) if (current_capacity < 0 || required_capacity < 0) return E_OVERFLOW; if (current_capacity < required_capacity) { - if ((size_t)required_capacity > PY_SIZE_MAX / sizeof(node)) { + if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) { return E_NOMEM; } n = n1->n_child; diff --git a/Parser/parser.c b/Parser/parser.c index 56ec514..41072c4 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -140,21 +140,20 @@ classify(parser_state *ps, int type, const char *str) int n = g->g_ll.ll_nlabels; if (type == NAME) { - const char *s = str; label *l = g->g_ll.ll_label; int i; for (i = n; i > 0; i--, l++) { if (l->lb_type != NAME || l->lb_str == NULL || - l->lb_str[0] != s[0] || - strcmp(l->lb_str, s) != 0) + l->lb_str[0] != str[0] || + strcmp(l->lb_str, str) != 0) continue; #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD #if 0 /* Leaving this in as an example */ if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) { - if (s[0] == 'w' && strcmp(s, "with") == 0) + if (str[0] == 'w' && strcmp(str, "with") == 0) break; /* not a keyword yet */ - else if (s[0] == 'a' && strcmp(s, "as") == 0) + else if (str[0] == 'a' && strcmp(str, "as") == 0) break; /* not a keyword yet */ } #endif diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 629dee5..1f467d6 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -161,10 +161,10 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD #if 0 -static char with_msg[] = +static const char with_msg[] = "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; -static char as_msg[] = +static const char as_msg[] = "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; static void @@ -255,7 +255,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, #endif if (a >= tok->line_start) col_offset = Py_SAFE_DOWNCAST(a - tok->line_start, - Py_intptr_t, int); + intptr_t, int); else col_offset = -1; diff --git a/Parser/pgen.c b/Parser/pgen.c index f3031ae..6451a1d 100644 --- a/Parser/pgen.c +++ b/Parser/pgen.c @@ -117,6 +117,16 @@ newnfagrammar(void) return gr; } +static void +freenfagrammar(nfagrammar *gr) +{ + for (int i = 0; i < gr->gr_nnfas; i++) { + PyObject_FREE(gr->gr_nfa[i]->nf_state); + } + PyObject_FREE(gr->gr_nfa); + PyObject_FREE(gr); +} + static nfa * addnfa(nfagrammar *gr, char *name) { @@ -134,7 +144,7 @@ addnfa(nfagrammar *gr, char *name) #ifdef Py_DEBUG -static char REQNFMT[] = "metacompile: less than %d children\n"; +static const char REQNFMT[] = "metacompile: less than %d children\n"; #define REQN(i, count) do { \ if (i < count) { \ @@ -379,7 +389,7 @@ typedef struct _ss_dfa { /* Forward */ static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, - labellist *ll, char *msg); + labellist *ll, const char *msg); static void simplify(int xx_nstates, ss_state *xx_state); static void convert(dfa *d, int xx_nstates, ss_state *xx_state); @@ -488,13 +498,17 @@ makedfa(nfagrammar *gr, nfa *nf, dfa *d) convert(d, xx_nstates, xx_state); - /* XXX cleanup */ + for (int i = 0; i < xx_nstates; i++) { + for (int j = 0; j < xx_state[i].ss_narcs; j++) + delbitset(xx_state[i].ss_arc[j].sa_bitset); + PyObject_FREE(xx_state[i].ss_arc); + } PyObject_FREE(xx_state); } static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, - labellist *ll, char *msg) + labellist *ll, const char *msg) { int i, ibit, iarc; ss_state *yy; @@ -669,7 +683,7 @@ pgen(node *n) g = maketables(gr); translatelabels(g); addfirstsets(g); - PyObject_FREE(gr); + freenfagrammar(gr); return g; } diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index 0f055d6..e386248 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -27,7 +27,7 @@ int Py_VerboseFlag; int Py_IgnoreEnvironmentFlag; /* Forward */ -grammar *getgrammar(char *filename); +grammar *getgrammar(const char *filename); void Py_Exit(int) _Py_NO_RETURN; @@ -37,6 +37,15 @@ Py_Exit(int sts) exit(sts); } +#ifdef WITH_THREAD +/* Needed by obmalloc.c */ +int PyGILState_Check(void) +{ return 1; } +#endif + +void _PyMem_DumpTraceback(int fd, const void *ptr) +{} + int main(int argc, char **argv) { @@ -71,12 +80,13 @@ main(int argc, char **argv) printf("Writing %s ...\n", graminit_h); printnonterminals(g, fp); fclose(fp); + freegrammar(g); Py_Exit(0); return 0; /* Make gcc -Wall happy */ } grammar * -getgrammar(char *filename) +getgrammar(const char *filename) { FILE *fp; node *n; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 612cb23..8317293 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -202,8 +202,8 @@ error_ret(struct tok_state *tok) /* XXX */ } -static char * -get_normal_name(char *s) /* for utf-8 and latin-1 */ +static const char * +get_normal_name(const char *s) /* for utf-8 and latin-1 */ { char buf[13]; int i; @@ -264,7 +264,7 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t if (begin < t) { char* r = new_string(begin, t - begin, tok); - char* q; + const char* q; if (!r) return 0; q = get_normal_name(r); @@ -1335,6 +1335,28 @@ verify_identifier(struct tok_state *tok) } #endif +static int +tok_decimal_tail(struct tok_state *tok) +{ + int c; + + while (1) { + do { + c = tok_nextc(tok); + } while (isdigit(c)); + if (c != '_') { + break; + } + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return 0; + } + } + return c; +} + /* Get next token, after space stripping etc. */ static int @@ -1355,17 +1377,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) tok->atbol = 0; for (;;) { c = tok_nextc(tok); - if (c == ' ') + if (c == ' ') { col++, altcol++; + } else if (c == '\t') { col = (col/tok->tabsize + 1) * tok->tabsize; altcol = (altcol/tok->alttabsize + 1) * tok->alttabsize; } - else if (c == '\014') /* Control-L (formfeed) */ + else if (c == '\014') {/* Control-L (formfeed) */ col = altcol = 0; /* For Emacs users */ - else + } + else { break; + } } tok_backup(tok, c); if (c == '#' || c == '\n') { @@ -1374,10 +1399,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) not passed to the parser as NEWLINE tokens, except *totally* empty lines in interactive mode, which signal the end of a command group. */ - if (col == 0 && c == '\n' && tok->prompt != NULL) + if (col == 0 && c == '\n' && tok->prompt != NULL) { blankline = 0; /* Let it through */ - else + } + else { blankline = 1; /* Ignore completely */ + } /* We can't jump back right here since we still may need to skip to the end of a comment */ } @@ -1385,8 +1412,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (col == tok->indstack[tok->indent]) { /* No change */ if (altcol != tok->altindstack[tok->indent]) { - if (indenterror(tok)) + if (indenterror(tok)) { return ERRORTOKEN; + } } } else if (col > tok->indstack[tok->indent]) { @@ -1397,8 +1425,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) return ERRORTOKEN; } if (altcol <= tok->altindstack[tok->indent]) { - if (indenterror(tok)) + if (indenterror(tok)) { return ERRORTOKEN; + } } tok->pendin++; tok->indstack[++tok->indent] = col; @@ -1417,8 +1446,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) return ERRORTOKEN; } if (altcol != tok->altindstack[tok->indent]) { - if (indenterror(tok)) + if (indenterror(tok)) { return ERRORTOKEN; + } } } } @@ -1464,9 +1494,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) tok->start = tok->cur - 1; /* Skip comment */ - if (c == '#') - while (c != EOF && c != '\n') + if (c == '#') { + while (c != EOF && c != '\n') { c = tok_nextc(tok); + } + } /* Check for EOF and errors now */ if (c == EOF) { @@ -1477,31 +1509,41 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) nonascii = 0; if (is_potential_identifier_start(c)) { /* Process b"", r"", u"", br"" and rb"" */ - int saw_b = 0, saw_r = 0, saw_u = 0; + int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0; while (1) { - if (!(saw_b || saw_u) && (c == 'b' || c == 'B')) + if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B')) saw_b = 1; /* Since this is a backwards compatibility support literal we don't want to support it in arbitrary order like byte literals. */ - else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U')) + else if (!(saw_b || saw_u || saw_r || saw_f) + && (c == 'u'|| c == 'U')) { saw_u = 1; + } /* ur"" and ru"" are not supported */ - else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) + else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) { saw_r = 1; - else + } + else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) { + saw_f = 1; + } + else { break; + } c = tok_nextc(tok); - if (c == '"' || c == '\'') + if (c == '"' || c == '\'') { goto letter_quote; + } } while (is_potential_identifier_char(c)) { - if (c >= 128) + if (c >= 128) { nonascii = 1; + } c = tok_nextc(tok); } tok_backup(tok, c); - if (nonascii && !verify_identifier(tok)) + if (nonascii && !verify_identifier(tok)) { return ERRORTOKEN; + } *p_start = tok->start; *p_end = tok->cur; @@ -1510,10 +1552,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) /* Current token length is 5. */ if (tok->async_def) { /* We're inside an 'async def' function. */ - if (memcmp(tok->start, "async", 5) == 0) + if (memcmp(tok->start, "async", 5) == 0) { return ASYNC; - if (memcmp(tok->start, "await", 5) == 0) + } + if (memcmp(tok->start, "await", 5) == 0) { return AWAIT; + } } else if (memcmp(tok->start, "async", 5) == 0) { /* The current token is 'async'. @@ -1546,8 +1590,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) /* Newline */ if (c == '\n') { tok->atbol = 1; - if (blankline || tok->level > 0) + if (blankline || tok->level > 0) { goto nextline; + } *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; @@ -1570,11 +1615,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_start = tok->start; *p_end = tok->cur; return ELLIPSIS; - } else { + } + else { tok_backup(tok, c); } tok_backup(tok, '.'); - } else { + } + else { tok_backup(tok, c); } *p_start = tok->start; @@ -1587,64 +1634,94 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (c == '0') { /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); - if (c == '.') - goto fraction; - if (c == 'j' || c == 'J') - goto imaginary; if (c == 'x' || c == 'X') { - /* Hex */ c = tok_nextc(tok); - if (!isxdigit(c)) { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while (isxdigit(c)); + if (c == '_') { + c = tok_nextc(tok); + } + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isxdigit(c)); + } while (c == '_'); } else if (c == 'o' || c == 'O') { /* Octal */ c = tok_nextc(tok); - if (c < '0' || c >= '8') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while ('0' <= c && c < '8'); + if (c == '_') { + c = tok_nextc(tok); + } + if (c < '0' || c >= '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } while (c == '_'); } else if (c == 'b' || c == 'B') { /* Binary */ c = tok_nextc(tok); - if (c != '0' && c != '1') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while (c == '0' || c == '1'); + if (c == '_') { + c = tok_nextc(tok); + } + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } while (c == '_'); } else { int nonzero = 0; /* maybe old-style octal; c is first char of it */ /* in any case, allow '0' as a literal */ - while (c == '0') + while (1) { + if (c == '_') { + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + if (c != '0') { + break; + } c = tok_nextc(tok); - while (isdigit(c)) { + } + if (isdigit(c)) { nonzero = 1; - c = tok_nextc(tok); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } } - if (c == '.') + if (c == '.') { + c = tok_nextc(tok); goto fraction; - else if (c == 'e' || c == 'E') + } + else if (c == 'e' || c == 'E') { goto exponent; - else if (c == 'j' || c == 'J') + } + else if (c == 'j' || c == 'J') { goto imaginary; + } else if (nonzero) { + /* Old-style octal: now disallowed. */ tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; @@ -1653,17 +1730,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } else { /* Decimal */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } { /* Accept floating point numbers. */ if (c == '.') { + c = tok_nextc(tok); fraction: /* Fraction */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + if (isdigit(c)) { + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } + } } if (c == 'e' || c == 'E') { int e; @@ -1685,14 +1767,16 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_end = tok->cur; return NUMBER; } - do { - c = tok_nextc(tok); - } while (isdigit(c)); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } } - if (c == 'j' || c == 'J') + if (c == 'j' || c == 'J') { /* Imaginary part */ imaginary: c = tok_nextc(tok); + } } } tok_backup(tok, c); @@ -1712,22 +1796,27 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); if (c == quote) { c = tok_nextc(tok); - if (c == quote) + if (c == quote) { quote_size = 3; - else + } + else { end_quote_size = 1; /* empty string found */ + } } - if (c != quote) + if (c != quote) { tok_backup(tok, c); + } /* Get rest of string */ while (end_quote_size != quote_size) { c = tok_nextc(tok); if (c == EOF) { - if (quote_size == 3) + if (quote_size == 3) { tok->done = E_EOFS; - else + } + else { tok->done = E_EOLS; + } tok->cur = tok->inp; return ERRORTOKEN; } @@ -1736,12 +1825,14 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) tok->cur = tok->inp; return ERRORTOKEN; } - if (c == quote) + if (c == quote) { end_quote_size += 1; + } else { end_quote_size = 0; - if (c == '\\') - c = tok_nextc(tok); /* skip escaped char */ + if (c == '\\') { + tok_nextc(tok); /* skip escaped char */ + } } } @@ -1771,7 +1862,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) int token3 = PyToken_ThreeChars(c, c2, c3); if (token3 != OP) { token = token3; - } else { + } + else { tok_backup(tok, c3); } *p_start = tok->start; |