From aeaa592516c8ea8a0a6318f69635baa817ced82f Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Fri, 13 Nov 2009 00:17:59 +0000 Subject: Merged revisions 76230 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines fix several compile() issues by translating newlines in the tokenizer ........ --- Doc/library/functions.rst | 14 +++++--- Lib/test/test_codeop.py | 4 --- Lib/test/test_compile.py | 13 +++++++ Lib/test/test_parser.py | 6 ++-- Lib/test/test_pep263.py | 2 +- Parser/parsetok.c | 5 +-- Parser/tokenizer.c | 92 +++++++++++++++++++++++++++++++++++++---------- Parser/tokenizer.h | 5 +-- 8 files changed, 106 insertions(+), 35 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 6008e97..7eb838a 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -176,11 +176,15 @@ are always available. They are listed here in alphabetical order. .. note:: - When compiling a string with multi-line statements, line endings must be - represented by a single newline character (``'\n'``), and the input must - be terminated by at least one newline character. If line endings are - represented by ``'\r\n'``, use :meth:`str.replace` to change them into - ``'\n'``. + When compiling a string with multi-line statements in ``'single'`` or + ``'eval'`` mode, input must be terminated by at least one newline + character. This is to facilitate detection of incomplete and complete + statements in the :mod:`code` module. + + + .. versionchanged:: 3.2 + Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode + does not have to end in a newline anymore. .. function:: complex([real[, imag]]) diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py index ad44121..80a73f3 100644 --- a/Lib/test/test_codeop.py +++ b/Lib/test/test_codeop.py @@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase): self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename, compile("a = 1\n", "def", 'single').co_filename) - def test_no_universal_newlines(self): - code = compile_command("'\rfoo\r'", symbol='eval') - self.assertEqual(eval(code), '\rfoo\r') - def test_main(): run_unittest(CodeopTests) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 32dd656..563a7ee 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -5,6 +5,19 @@ from test import support class TestSpecifics(unittest.TestCase): + def test_no_ending_newline(self): + compile("hi", "", "exec") + compile("hi\r", "", "exec") + + def test_empty(self): + compile("", "", "exec") + + def test_other_newlines(self): + compile("\r\n", "", "exec") + compile("\r", "", "exec") + compile("hi\r\nstuff\r\ndef f():\n pass\r", "", "exec") + compile("this_is\rreally_old_mac\rdef f():\n pass", "", "exec") + def test_debug_assignment(self): # catch assignments to __debug__ self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single') diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py index 0ac49da..d8df6a8 100644 --- a/Lib/test/test_parser.py +++ b/Lib/test/test_parser.py @@ -237,9 +237,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase): (14, '+', 2, 13), (2, '1', 2, 15), (4, '', 2, 16), - (6, '', 2, -1), - (4, '', 2, -1), - (0, '', 2, -1)], + (6, '', 3, -1), + (4, '', 3, -1), + (0, '', 3, -1)], terminals) def test_extended_unpacking(self): diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index 587b2fc..8c1fbe7 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -26,7 +26,7 @@ class PEP263Test(unittest.TestCase): try: compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec") except SyntaxError as v: - self.assertEquals(v.text, "print '\u5e74'") + self.assertEquals(v.text, "print '\u5e74'\n") else: self.fail() diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 90a90a7..ff4ca70 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, perrdetail *err_ret, int *flags) { struct tok_state *tok; + int exec_input = start == file_input; initerr(err_ret, filename); if (*flags & PyPARSE_IGNORE_COOKIE) - tok = PyTokenizer_FromUTF8(s); + tok = PyTokenizer_FromUTF8(s, exec_input); else - tok = PyTokenizer_FromString(s); + tok = PyTokenizer_FromString(s, exec_input); if (tok == NULL) { err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index daf18dc..e637cb3 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -119,6 +119,7 @@ tok_new(void) tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; tok->done = E_OK; tok->fp = NULL; + tok->input = NULL; tok->tabsize = TABSIZE; tok->indent = 0; tok->indstack[0] = 0; @@ -145,6 +146,17 @@ tok_new(void) return tok; } +static char * +new_string(const char *s, Py_ssize_t len) +{ + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + #ifdef PGEN static char * @@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok) return feof(tok->fp); } -static const char * -decode_str(const char *str, struct tok_state *tok) +static char * +decode_str(const char *str, int exec_input, struct tok_state *tok) { - return str; + return new_string(str, strlen(str)); } #else /* PGEN */ @@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */ return NULL; /* as if it were EOF */ } -static char * -new_string(const char *s, Py_ssize_t len) -{ - char* result = (char *)PyMem_MALLOC(len + 1); - if (result != NULL) { - memcpy(result, s, len); - result[len] = '\0'; - } - return result; -} static char * get_normal_name(char *s) /* for utf-8 and latin-1 */ @@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) { return utf8; } + +static char * +translate_newlines(const char *s, int exec_input, struct tok_state *tok) { + int skip_next_lf = 0, length = strlen(s), final_length; + char *buf, *current; + char c; + buf = PyMem_MALLOC(length + 2); + if (buf == NULL) { + tok->done = E_NOMEM; + return NULL; + } + for (current = buf; (c = *s++);) { + if (skip_next_lf) { + skip_next_lf = 0; + if (c == '\n') { + c = *s; + s++; + if (!c) + break; + } + } + if (c == '\r') { + skip_next_lf = 1; + c = '\n'; + } + *current = c; + current++; + } + /* If this is exec input, add a newline to the end of the file if + there isn't one already. */ + if (exec_input && *current != '\n') { + *current = '\n'; + current++; + } + *current = '\0'; + final_length = current - buf; + if (final_length < length && final_length) + /* should never fail */ + buf = PyMem_REALLOC(buf, final_length + 1); + return buf; +} + /* Decode a byte string STR for use as the buffer of TOK. Look for encoding declarations inside STR, and record them inside TOK. */ static const char * -decode_str(const char *str, struct tok_state *tok) +decode_str(const char *input, int single, struct tok_state *tok) { PyObject* utf8 = NULL; + const char *str; const char *s; const char *newl[2] = {NULL, NULL}; int lineno = 0; + tok->input = str = translate_newlines(input, single, tok); + if (str == NULL) + return NULL; tok->enc = NULL; tok->str = str; if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) @@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok) /* Set up tokenizer for string */ struct tok_state * -PyTokenizer_FromString(const char *str) +PyTokenizer_FromString(const char *str, int exec_input) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; - str = (char *)decode_str(str, tok); + str = (char *)decode_str(str, exec_input, tok); if (str == NULL) { PyTokenizer_Free(tok); return NULL; @@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str) } struct tok_state * -PyTokenizer_FromUTF8(const char *str) +PyTokenizer_FromUTF8(const char *str, int exec_input) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; +#ifndef PGEN + tok->input = str = translate_newlines(str, exec_input, tok); +#endif + if (str == NULL) { + PyTokenizer_Free(tok); + return NULL; + } tok->decoding_state = STATE_RAW; tok->read_coding_spec = 1; tok->enc = NULL; @@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str) return tok; } - /* Set up tokenizer for file */ struct tok_state * @@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok) #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); + if (tok->input) + PyMem_FREE((char *)tok->input); PyMem_FREE(tok); } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index e3328f1..1a81e33 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -58,10 +58,11 @@ struct tok_state { #endif const char* enc; /* Encoding for the current str. */ const char* str; + const char* input; /* Tokenizer's newline translated copy of the string. */ }; -extern struct tok_state *PyTokenizer_FromString(const char *); -extern struct tok_state *PyTokenizer_FromUTF8(const char *); +extern struct tok_state *PyTokenizer_FromString(const char *, int); +extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, char *, char *); extern void PyTokenizer_Free(struct tok_state *); -- cgit v0.12