summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2009-11-12 23:39:44 (GMT)
committerBenjamin Peterson <benjamin@python.org>2009-11-12 23:39:44 (GMT)
commite36199b49df77c96bad687c6681d8e54c5053b84 (patch)
tree81b9aaa74f92b9de459ede5dc6ed2ca4ec508998 /Parser
parentc4cd6d3765d054ac1b23f0f9765a2eaf3f1e7be7 (diff)
downloadcpython-e36199b49df77c96bad687c6681d8e54c5053b84.zip
cpython-e36199b49df77c96bad687c6681d8e54c5053b84.tar.gz
cpython-e36199b49df77c96bad687c6681d8e54c5053b84.tar.bz2
fix several compile() issues by translating newlines in the tokenizer
Diffstat (limited to 'Parser')
-rw-r--r--Parser/parsetok.c2
-rw-r--r--Parser/tokenizer.c82
-rw-r--r--Parser/tokenizer.h3
3 files changed, 69 insertions, 18 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 3994add..7f2fb36 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
initerr(err_ret, filename);
- if ((tok = PyTokenizer_FromString(s)) == NULL) {
+ if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index a49e9f0..1808c41 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -105,6 +105,7 @@ tok_new(void)
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK;
tok->fp = NULL;
+ tok->input = NULL;
tok->tabsize = TABSIZE;
tok->indent = 0;
tok->indstack[0] = 0;
@@ -130,6 +131,17 @@ tok_new(void)
return tok;
}
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+ char* result = (char *)PyMem_MALLOC(len + 1);
+ if (result != NULL) {
+ memcpy(result, s, len);
+ result[len] = '\0';
+ }
+ return result;
+}
+
#ifdef PGEN
static char *
@@ -144,10 +156,10 @@ decoding_feof(struct tok_state *tok)
return feof(tok->fp);
}
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return str;
+ return new_string(str, strlen(str));
}
#else /* PGEN */
@@ -162,16 +174,6 @@ error_ret(struct tok_state *tok) /* XXX */
return NULL; /* as if it were EOF */
}
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
- char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
- }
- return result;
-}
static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */
@@ -586,17 +588,63 @@ translate_into_utf8(const char* str, const char* enc) {
}
#endif
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+ int skip_next_lf = 0, length = strlen(s), final_length;
+ char *buf, *current;
+ char c;
+ buf = PyMem_MALLOC(length + 2);
+ if (buf == NULL) {
+ tok->done = E_NOMEM;
+ return NULL;
+ }
+ for (current = buf; (c = *s++);) {
+ if (skip_next_lf) {
+ skip_next_lf = 0;
+ if (c == '\n') {
+ c = *s;
+ s++;
+ if (!c)
+ break;
+ }
+ }
+ if (c == '\r') {
+ skip_next_lf = 1;
+ c = '\n';
+ }
+ *current = c;
+ current++;
+ }
+ /* If this is exec input, add a newline to the end of the file if
+ there isn't one already. */
+ if (exec_input && *current != '\n') {
+ *current = '\n';
+ current++;
+ }
+ *current = '\0';
+ final_length = current - buf;
+ if (final_length < length && final_length)
+ /* should never fail */
+ buf = PyMem_REALLOC(buf, final_length + 1);
+ return buf;
+}
+
/* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them
inside TOK. */
static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
{
PyObject* utf8 = NULL;
+ const char *str;
const char *s;
const char *newl[2] = {NULL, NULL};
int lineno = 0;
+ tok->input = str = translate_newlines(input, single, tok);
+ if (str == NULL)
+ return NULL;
tok->enc = NULL;
tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -651,12 +699,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */
struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
- str = (char *)decode_str(str, tok);
+ str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) {
PyTokenizer_Free(tok);
return NULL;
@@ -702,6 +750,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
+ if (tok->input)
+ PyMem_FREE((char *)tok->input);
PyMem_FREE(tok);
}
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index e10972c..79c9e28 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -52,9 +52,10 @@ struct tok_state {
#endif
const char* enc;
const char* str;
+ const char* input; /* Tokenizer's newline translated copy of the string. */
};
-extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);