summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/tokenizer.c82
1 files changed, 46 insertions, 36 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 62b1a91..cbbadfb 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -147,13 +147,15 @@ tok_new(void)
}
static char *
-new_string(const char *s, Py_ssize_t len)
+new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
{
char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
+ if (!result) {
+ tok->done = E_NOMEM;
+ return NULL;
}
+ memcpy(result, s, len);
+ result[len] = '\0';
return result;
}
@@ -174,7 +176,7 @@ decoding_feof(struct tok_state *tok)
static char *
decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return new_string(str, strlen(str));
+ return new_string(str, strlen(str), tok);
}
#else /* PGEN */
@@ -221,17 +223,18 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
/* Return the coding spec in S, or NULL if none is found. */
-static char *
-get_coding_spec(const char *s, Py_ssize_t size)
+static int
+get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
{
Py_ssize_t i;
+ *spec = NULL;
/* Coding spec must be in a comment, and that comment must be
* the only statement on the source code line. */
for (i = 0; i < size - 6; i++) {
if (s[i] == '#')
break;
if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
- return NULL;
+ return 1;
}
for (; i < size - 6; i++) { /* XXX inefficient search */
const char* t = s + i;
@@ -250,17 +253,21 @@ get_coding_spec(const char *s, Py_ssize_t size)
t++;
if (begin < t) {
- char* r = new_string(begin, t - begin);
+ char* r = new_string(begin, t - begin, tok);
+ if (!r)
+ return 0;
char* q = get_normal_name(r);
if (r != q) {
PyMem_FREE(r);
- r = new_string(q, strlen(q));
+ r = new_string(q, strlen(q), tok);
+ if (!r)
+ return 0;
}
- return r;
+ *spec = r;
}
}
}
- return NULL;
+ return 1;
}
/* Check whether the line contains a coding spec. If it does,
@@ -272,38 +279,39 @@ static int
check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
int set_readline(struct tok_state *, const char *))
{
- char * cs;
+ char *cs;
int r = 1;
if (tok->cont_line)
/* It's a continuation line, so it can't be a coding spec. */
return 1;
- cs = get_coding_spec(line, size);
- if (cs != NULL) {
- tok->read_coding_spec = 1;
- if (tok->encoding == NULL) {
- assert(tok->decoding_state == STATE_RAW);
- if (strcmp(cs, "utf-8") == 0) {
+ if (!get_coding_spec(line, &cs, size, tok))
+ return 0;
+ if (!cs)
+ return 1;
+ tok->read_coding_spec = 1;
+ if (tok->encoding == NULL) {
+ assert(tok->decoding_state == STATE_RAW);
+ if (strcmp(cs, "utf-8") == 0) {
+ tok->encoding = cs;
+ } else {
+ r = set_readline(tok, cs);
+ if (r) {
tok->encoding = cs;
- } else {
- r = set_readline(tok, cs);
- if (r) {
- tok->encoding = cs;
- tok->decoding_state = STATE_NORMAL;
- }
- else {
- PyErr_Format(PyExc_SyntaxError,
- "encoding problem: %s", cs);
- PyMem_FREE(cs);
- }
+ tok->decoding_state = STATE_NORMAL;
}
- } else { /* then, compare cs with BOM */
- r = (strcmp(tok->encoding, cs) == 0);
- if (!r)
+ else {
PyErr_Format(PyExc_SyntaxError,
- "encoding problem: %s with BOM", cs);
- PyMem_FREE(cs);
+ "encoding problem: %s", cs);
+ PyMem_FREE(cs);
+ }
}
+ } else { /* then, compare cs with BOM */
+ r = (strcmp(tok->encoding, cs) == 0);
+ if (!r)
+ PyErr_Format(PyExc_SyntaxError,
+ "encoding problem: %s with BOM", cs);
+ PyMem_FREE(cs);
}
return r;
}
@@ -367,7 +375,9 @@ check_bom(int get_char(struct tok_state *),
}
if (tok->encoding != NULL)
PyMem_FREE(tok->encoding);
- tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
+ tok->encoding = new_string("utf-8", 5, tok);
+ if (!tok->encoding)
+ return 0;
/* No need to set_readline: input is already utf-8 */
return 1;
}