diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/tokenizer.c | 82 |
1 files changed, 46 insertions, 36 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 62b1a91..cbbadfb 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -147,13 +147,15 @@ tok_new(void) } static char * -new_string(const char *s, Py_ssize_t len) +new_string(const char *s, Py_ssize_t len, struct tok_state *tok) { char* result = (char *)PyMem_MALLOC(len + 1); - if (result != NULL) { - memcpy(result, s, len); - result[len] = '\0'; + if (!result) { + tok->done = E_NOMEM; + return NULL; } + memcpy(result, s, len); + result[len] = '\0'; return result; } @@ -174,7 +176,7 @@ decoding_feof(struct tok_state *tok) static char * decode_str(const char *str, int exec_input, struct tok_state *tok) { - return new_string(str, strlen(str)); + return new_string(str, strlen(str), tok); } #else /* PGEN */ @@ -221,17 +223,18 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */ /* Return the coding spec in S, or NULL if none is found. */ -static char * -get_coding_spec(const char *s, Py_ssize_t size) +static int +get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok) { Py_ssize_t i; + *spec = NULL; /* Coding spec must be in a comment, and that comment must be * the only statement on the source code line. */ for (i = 0; i < size - 6; i++) { if (s[i] == '#') break; if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') - return NULL; + return 1; } for (; i < size - 6; i++) { /* XXX inefficient search */ const char* t = s + i; @@ -250,17 +253,21 @@ get_coding_spec(const char *s, Py_ssize_t size) t++; if (begin < t) { - char* r = new_string(begin, t - begin); + char* r = new_string(begin, t - begin, tok); + if (!r) + return 0; char* q = get_normal_name(r); if (r != q) { PyMem_FREE(r); - r = new_string(q, strlen(q)); + r = new_string(q, strlen(q), tok); + if (!r) + return 0; } - return r; + *spec = r; } } } - return NULL; + return 1; } /* Check whether the line contains a coding spec. If it does, @@ -272,38 +279,39 @@ static int check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, int set_readline(struct tok_state *, const char *)) { - char * cs; + char *cs; int r = 1; if (tok->cont_line) /* It's a continuation line, so it can't be a coding spec. */ return 1; - cs = get_coding_spec(line, size); - if (cs != NULL) { - tok->read_coding_spec = 1; - if (tok->encoding == NULL) { - assert(tok->decoding_state == STATE_RAW); - if (strcmp(cs, "utf-8") == 0) { + if (!get_coding_spec(line, &cs, size, tok)) + return 0; + if (!cs) + return 1; + tok->read_coding_spec = 1; + if (tok->encoding == NULL) { + assert(tok->decoding_state == STATE_RAW); + if (strcmp(cs, "utf-8") == 0) { + tok->encoding = cs; + } else { + r = set_readline(tok, cs); + if (r) { tok->encoding = cs; - } else { - r = set_readline(tok, cs); - if (r) { - tok->encoding = cs; - tok->decoding_state = STATE_NORMAL; - } - else { - PyErr_Format(PyExc_SyntaxError, - "encoding problem: %s", cs); - PyMem_FREE(cs); - } + tok->decoding_state = STATE_NORMAL; } - } else { /* then, compare cs with BOM */ - r = (strcmp(tok->encoding, cs) == 0); - if (!r) + else { PyErr_Format(PyExc_SyntaxError, - "encoding problem: %s with BOM", cs); - PyMem_FREE(cs); + "encoding problem: %s", cs); + PyMem_FREE(cs); + } } + } else { /* then, compare cs with BOM */ + r = (strcmp(tok->encoding, cs) == 0); + if (!r) + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s with BOM", cs); + PyMem_FREE(cs); } return r; } @@ -367,7 +375,9 @@ check_bom(int get_char(struct tok_state *), } if (tok->encoding != NULL) PyMem_FREE(tok->encoding); - tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ + tok->encoding = new_string("utf-8", 5, tok); + if (!tok->encoding) + return 0; /* No need to set_readline: input is already utf-8 */ return 1; } |