summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/parsetok.c6
-rw-r--r--Parser/tokenizer.c22
-rw-r--r--Parser/tokenizer.h1
3 files changed, 28 insertions, 1 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index d8ff6ee..4c3b506 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -49,7 +49,11 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
initerr(err_ret, filename);
- if ((tok = PyTokenizer_FromString(s)) == NULL) {
+ if (*flags & PyPARSE_IGNORE_COOKIE)
+ tok = PyTokenizer_FromUTF8(s);
+ else
+ tok = PyTokenizer_FromString(s);
+ if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 3d52bed..c4f447d 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -715,6 +715,28 @@ PyTokenizer_FromString(const char *str)
return tok;
}
+struct tok_state *
+PyTokenizer_FromUTF8(const char *str)
+{
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
+ tok->decoding_state = STATE_RAW;
+ tok->read_coding_spec = 1;
+ tok->enc = NULL;
+ tok->str = str;
+ tok->encoding = (char *)PyMem_MALLOC(6);
+ if (!tok->encoding) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ strcpy(tok->encoding, "utf-8");
+
+ /* XXX: constify members. */
+ tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+ return tok;
+}
+
/* Set up tokenizer for file */
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index df9cbc7..e3328f1 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -61,6 +61,7 @@ struct tok_state {
};
extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
char *, char *);
extern void PyTokenizer_Free(struct tok_state *);