diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-08-15 07:32:56 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-08-15 07:32:56 (GMT) |
commit | 47383403a0a11259acb640406a8efc38981d2255 (patch) | |
tree | ad461e275dc3f2607bab86bb596366d71489b453 /Parser/tokenizer.c | |
parent | 32c4ac014387d3bffea5461339b8ad3044d0dafb (diff) | |
download | cpython-47383403a0a11259acb640406a8efc38981d2255.zip cpython-47383403a0a11259acb640406a8efc38981d2255.tar.gz cpython-47383403a0a11259acb640406a8efc38981d2255.tar.bz2 |
Implement PEP 3131. Add isidentifier to str.
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 2e700bc..8f30fef 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -21,13 +21,15 @@ #define is_potential_identifier_start(c) (\ (c >= 'a' && c <= 'z')\ || (c >= 'A' && c <= 'Z')\ - || c == '_') + || c == '_'\ + || (c >= 128)) #define is_potential_identifier_char(c) (\ (c >= 'a' && c <= 'z')\ || (c >= 'A' && c <= 'Z')\ || (c >= '0' && c <= '9')\ - || c == '_') + || c == '_'\ + || (c >= 128)) extern char *PyOS_Readline(FILE *, FILE *, char *); /* Return malloc'ed string including trailing \n; @@ -1070,6 +1072,19 @@ indenterror(struct tok_state *tok) return 0; } +#ifdef PGEN +#define verify_identifier(s,e) 1 +#else +/* Verify that the identifier follows PEP 3131. */ +static int +verify_identifier(char *start, char *end) +{ + PyObject *s = PyUnicode_DecodeUTF8(start, end-start, NULL); + int result = PyUnicode_IsIdentifier(s); + Py_DECREF(s); + return result; +} +#endif /* Get next token, after space stripping etc. */ @@ -1077,7 +1092,7 @@ static int tok_get(register struct tok_state *tok, char **p_start, char **p_end) { register int c; - int blankline; + int blankline, nonascii; *p_start = *p_end = NULL; nextline: @@ -1195,6 +1210,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) } /* Identifier (most frequent token!) */ + nonascii = 0; if (is_potential_identifier_start(c)) { /* Process r"", u"" and ur"" */ switch (c) { @@ -1214,9 +1230,16 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) break; } while (is_potential_identifier_char(c)) { + if (c >= 128) + nonascii = 1; c = tok_nextc(tok); } tok_backup(tok, c); + if (nonascii && + !verify_identifier(tok->start, tok->cur)) { + tok->done = E_IDENTIFIER; + return ERRORTOKEN; + } *p_start = tok->start; *p_end = tok->cur; return NAME; |