diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/parsetok.c | 6 | ||||
-rw-r--r-- | Parser/tokenizer.c | 22 | ||||
-rw-r--r-- | Parser/tokenizer.h | 1 |
3 files changed, 28 insertions, 1 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c index d8ff6ee..4c3b506 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -49,7 +49,11 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, initerr(err_ret, filename); - if ((tok = PyTokenizer_FromString(s)) == NULL) { + if (*flags & PyPARSE_IGNORE_COOKIE) + tok = PyTokenizer_FromUTF8(s); + else + tok = PyTokenizer_FromString(s); + if (tok == NULL) { err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3d52bed..c4f447d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -715,6 +715,28 @@ PyTokenizer_FromString(const char *str) return tok; } +struct tok_state * +PyTokenizer_FromUTF8(const char *str) +{ + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + tok->decoding_state = STATE_RAW; + tok->read_coding_spec = 1; + tok->enc = NULL; + tok->str = str; + tok->encoding = (char *)PyMem_MALLOC(6); + if (!tok->encoding) { + PyTokenizer_Free(tok); + return NULL; + } + strcpy(tok->encoding, "utf-8"); + + /* XXX: constify members. */ + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + return tok; +} + /* Set up tokenizer for file */ diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index df9cbc7..e3328f1 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -61,6 +61,7 @@ struct tok_state { }; extern struct tok_state *PyTokenizer_FromString(const char *); +extern struct tok_state *PyTokenizer_FromUTF8(const char *); extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, char *, char *); extern void PyTokenizer_Free(struct tok_state *); |