diff options
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d60b256..fbbd0bc 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -93,6 +93,21 @@ char *_PyParser_TokenNames[] = { }; +/* Ensure that the locale does not interfere with tokenization. */ + +static int +ascii_isalpha(int c) +{ + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} + +static int +ascii_isalnum(int c) +{ + return ascii_isalpha(c) || ('0' <= c && c <= '9'); +} + + /* Create and initialize a new tok_state structure */ static struct tok_state * @@ -230,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size) } while (t[0] == '\x20' || t[0] == '\t'); begin = t; - while (isalnum(Py_CHARMASK(t[0])) || + while (ascii_isalnum(Py_CHARMASK(t[0])) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; @@ -1185,7 +1200,6 @@ indenterror(struct tok_state *tok) return 0; } - /* Get next token, after space stripping etc. */ static int @@ -1341,7 +1355,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) } /* Identifier (most frequent token!) */ - if (isalpha(c) || c == '_') { + if (ascii_isalpha(c) || c == '_') { /* Process r"", u"" and ur"" */ switch (c) { case 'b': @@ -1367,7 +1381,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto letter_quote; break; } - while (isalnum(c) || c == '_') { + while (ascii_isalnum(c) || c == '_') { c = tok_nextc(tok); } tok_backup(tok, c); |