diff options
author | Benjamin Peterson <benjamin@python.org> | 2010-04-03 22:48:51 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2010-04-03 22:48:51 (GMT) |
commit | 4ceeeb09d8ff445888b24aa324bc06175d141cb9 (patch) | |
tree | cdc10ef5b26e95ea8397ed57b88b36bfec6d88ab | |
parent | ab8b9cae7e2f1e714b35d8e5cdadca4df52290d5 (diff) | |
download | cpython-4ceeeb09d8ff445888b24aa324bc06175d141cb9.zip cpython-4ceeeb09d8ff445888b24aa324bc06175d141cb9.tar.gz cpython-4ceeeb09d8ff445888b24aa324bc06175d141cb9.tar.bz2 |
ensure that the locale does not affect the tokenization of identifiers
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 22 |
2 files changed, 20 insertions, 4 deletions
@@ -12,6 +12,8 @@ What's New in Python 2.7 beta 1? Core and Builtins ----------------- +- Ensure that tokenization of identifiers is not affected by locale. + - Issue #1222585: Added LDCXXSHARED for C++ support. Patch by Arfrever. - Raise a TypeError when trying to delete a T_STRING_INPLACE struct member. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d60b256..fbbd0bc 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -93,6 +93,21 @@ char *_PyParser_TokenNames[] = { }; +/* Ensure that the locale does not interfere with tokenization. */ + +static int +ascii_isalpha(int c) +{ + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} + +static int +ascii_isalnum(int c) +{ + return ascii_isalpha(c) || ('0' <= c && c <= '9'); +} + + /* Create and initialize a new tok_state structure */ static struct tok_state * @@ -230,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size) } while (t[0] == '\x20' || t[0] == '\t'); begin = t; - while (isalnum(Py_CHARMASK(t[0])) || + while (ascii_isalnum(Py_CHARMASK(t[0])) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; @@ -1185,7 +1200,6 @@ indenterror(struct tok_state *tok) return 0; } - /* Get next token, after space stripping etc. */ static int @@ -1341,7 +1355,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) } /* Identifier (most frequent token!) */ - if (isalpha(c) || c == '_') { + if (ascii_isalpha(c) || c == '_') { /* Process r"", u"" and ur"" */ switch (c) { case 'b': @@ -1367,7 +1381,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto letter_quote; break; } - while (isalnum(c) || c == '_') { + while (ascii_isalnum(c) || c == '_') { c = tok_nextc(tok); } tok_backup(tok, c); |