diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-09-04 09:18:06 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-09-04 09:18:06 (GMT) |
commit | 85bcc66bb492931b6ca3de21ca53ca53b754be33 (patch) | |
tree | 3e46f8d106d8ac338238c146480568dd4a3c0083 /Parser | |
parent | 53de1902e7a9788d2d4b917b1b14b2a76171f0f4 (diff) | |
download | cpython-85bcc66bb492931b6ca3de21ca53ca53b754be33.zip cpython-85bcc66bb492931b6ca3de21ca53ca53b754be33.tar.gz cpython-85bcc66bb492931b6ca3de21ca53ca53b754be33.tar.bz2 |
Convert code from sys.stdin.encoding to UTF-8 in
interactive mode. Fixes #1100.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/parsetok.c | 9 | ||||
-rw-r--r-- | Parser/tokenizer.c | 36 | ||||
-rw-r--r-- | Parser/tokenizer.h | 3 |
3 files changed, 42 insertions, 6 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 71bed29..b9664ea 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -59,19 +59,20 @@ node * PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret) { - return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, - err_ret, 0); + return PyParser_ParseFileFlags(fp, filename, NULL, + g, start, ps1, ps2, err_ret, 0); } node * -PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, +PyParser_ParseFileFlags(FILE *fp, const char *filename, const char* enc, + grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret, int flags) { struct tok_state *tok; initerr(err_ret, filename); - if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { + if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 776183d..7f51e14 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -677,7 +677,7 @@ PyTokenizer_FromString(const char *str) /* Set up tokenizer for file */ struct tok_state * -PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) +PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) { struct tok_state *tok = tok_new(); if (tok == NULL) @@ -691,6 +691,17 @@ PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) tok->fp = fp; tok->prompt = ps1; tok->nextprompt = ps2; + if (enc != NULL) { + /* Must copy encoding declaration since it + gets copied into the parse tree. */ + tok->encoding = PyMem_MALLOC(strlen(enc)+1); + if (!tok->encoding) { + PyTokenizer_Free(tok); + return NULL; + } + strcpy(tok->encoding, enc); + tok->decoding_state = -1; + } return tok; } @@ -742,6 +753,29 @@ tok_nextc(register struct tok_state *tok) } if (tok->prompt != NULL) { char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); +#ifndef PGEN + if (tok->encoding && newtok && *newtok) { + /* Recode to UTF-8 */ + Py_ssize_t buflen; + const char* buf; + PyObject *u = translate_into_utf8(newtok, tok->encoding); + PyMem_FREE(newtok); + if (!u) { + tok->done = E_DECODE; + return EOF; + } + buflen = PyBytes_Size(u); + buf = PyBytes_AsString(u); + if (!buf) { + Py_DECREF(u); + tok->done = E_DECODE; + return EOF; + } + newtok = PyMem_MALLOC(buflen+1); + strcpy(newtok, buf); + Py_DECREF(u); + } +#endif if (tok->nextprompt != NULL) tok->prompt = tok->nextprompt; if (newtok == NULL) diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 5e7ebf7..ba90a5f 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -55,7 +55,8 @@ struct tok_state { }; extern struct tok_state *PyTokenizer_FromString(const char *); -extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); +extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, + char *, char *); extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); |