diff options
author | Brett Cannon <bcannon@gmail.com> | 2008-10-17 03:38:50 (GMT) |
---|---|---|
committer | Brett Cannon <bcannon@gmail.com> | 2008-10-17 03:38:50 (GMT) |
commit | da780432378e6298463889557ab43e0c156758cd (patch) | |
tree | dc622a9b62874851f90abc45524d3d2653cab9ba /Parser | |
parent | 9e9dcd6d4225faa6a8b19120f009e0253d16ab92 (diff) | |
download | cpython-da780432378e6298463889557ab43e0c156758cd.zip cpython-da780432378e6298463889557ab43e0c156758cd.tar.gz cpython-da780432378e6298463889557ab43e0c156758cd.tar.bz2 |
Latin-1 source code was not being properly decoded when passed through
compile(). This was due to left-over special-casing before UTF-8 became the
default source encoding.
Closes issue #3574. Thanks to Victor Stinner for help with the patch.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/tokenizer.c | 4 | ||||
-rw-r--r-- | Parser/tokenizer.h | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 4edf6d0..ce8129d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -135,6 +135,7 @@ tok_new(void) tok->decoding_state = STATE_INIT; tok->decoding_erred = 0; tok->read_coding_spec = 0; + tok->enc = NULL; tok->encoding = NULL; tok->cont_line = 0; #ifndef PGEN @@ -274,8 +275,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, tok->read_coding_spec = 1; if (tok->encoding == NULL) { assert(tok->decoding_state == STATE_RAW); - if (strcmp(cs, "utf-8") == 0 || - strcmp(cs, "iso-8859-1") == 0) { + if (strcmp(cs, "utf-8") == 0) { tok->encoding = cs; } else { r = set_readline(tok, cs); diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index c45dea1..df9cbc7 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -49,14 +49,14 @@ struct tok_state { enum decoding_state decoding_state; int decoding_erred; /* whether erred in decoding */ int read_coding_spec; /* whether 'coding:...' has been read */ - char *encoding; + char *encoding; /* Source encoding. */ int cont_line; /* whether we are in a continuation line. */ const char* line_start; /* pointer to start of current line */ #ifndef PGEN PyObject *decoding_readline; /* codecs.open(...).readline */ PyObject *decoding_buffer; #endif - const char* enc; + const char* enc; /* Encoding for the current str. */ const char* str; }; |