diff options
| -rw-r--r-- | Doc/library/functions.rst | 13 | ||||
| -rw-r--r-- | Lib/test/test_codeop.py | 4 | ||||
| -rw-r--r-- | Lib/test/test_compile.py | 13 | ||||
| -rw-r--r-- | Lib/test/test_parser.py | 6 | ||||
| -rw-r--r-- | Misc/NEWS | 3 | ||||
| -rw-r--r-- | Parser/parsetok.c | 2 | ||||
| -rw-r--r-- | Parser/tokenizer.c | 82 | ||||
| -rw-r--r-- | Parser/tokenizer.h | 3 | 
8 files changed, 96 insertions, 30 deletions
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index ff7c4b6..ddece5a 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -173,11 +173,10 @@ available.  They are listed here in alphabetical order.     .. note:: -      When compiling a string with multi-line statements, line endings must be -      represented by a single newline character (``'\n'``), and the input must -      be terminated by at least one newline character.  If line endings are -      represented by ``'\r\n'``, use :meth:`str.replace` to change them into -      ``'\n'``. +      When compiling a string with multi-line statements in ``'single'`` or +      ``'eval'`` mode, input must be terminated by at least one newline +      character.  This is to facilitate detection of incomplete and complete +      statements in the :mod:`code` module.     .. versionchanged:: 2.3        The *flags* and *dont_inherit* arguments were added. @@ -185,6 +184,10 @@ available.  They are listed here in alphabetical order.     .. versionchanged:: 2.6        Support for compiling AST objects. +   .. versionchanged:: 2.7 +      Allowed use of Windows and Mac newlines.  Also input in ``'exec'`` mode +      does not have to end in a newline anymore. +  .. function:: complex([real[, imag]]) diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py index c8fa990..da3b83f 100644 --- a/Lib/test/test_codeop.py +++ b/Lib/test/test_codeop.py @@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase):          self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,                               compile("a = 1\n", "def", 'single').co_filename) -    def test_no_universal_newlines(self): -        code = compile_command("'\rfoo\r'", symbol='eval') -        self.assertEqual(eval(code), '\rfoo\r') -  def test_main():      run_unittest(CodeopTests) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 75c983a..28b7332 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -5,6 +5,19 @@ from test import test_support  class TestSpecifics(unittest.TestCase): +    def test_no_ending_newline(self): +        compile("hi", "<test>", "exec") +        compile("hi\r", "<test>", "exec") + +    def test_empty(self): +        compile("", "<test>", "exec") + +    def test_other_newlines(self): +        compile("\r\n", "<test>", "exec") +        compile("\r", "<test>", "exec") +        compile("hi\r\nstuff\r\ndef f():\n    pass\r", "<test>", "exec") +        compile("this_is\rreally_old_mac\rdef f():\n    pass", "<test>", "exec") +      def test_debug_assignment(self):          # catch assignments to __debug__          self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single') diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py index 7d059c2..ad5c5be 100644 --- a/Lib/test/test_parser.py +++ b/Lib/test/test_parser.py @@ -243,9 +243,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase):              (14, '+', 2, 13),              (2, '1', 2, 15),              (4, '', 2, 16), -            (6, '', 2, -1), -            (4, '', 2, -1), -            (0, '', 2, -1)], +            (6, '', 3, -1), +            (4, '', 3, -1), +            (0, '', 3, -1)],                           terminals) @@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1  Core and Builtins  ----------------- +- Fix several issues with compile().  The input can now contain Windows and Mac +  newlines and is no longer required to end in a newline. +  - Remove length limitation when constructing a complex number from a    unicode string. diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 3994add..7f2fb36 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,  	initerr(err_ret, filename); -	if ((tok = PyTokenizer_FromString(s)) == NULL) { +	if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {  		err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;  		return NULL;  	} diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index a49e9f0..1808c41 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -105,6 +105,7 @@ tok_new(void)  	tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;  	tok->done = E_OK;  	tok->fp = NULL; +	tok->input = NULL;  	tok->tabsize = TABSIZE;  	tok->indent = 0;  	tok->indstack[0] = 0; @@ -130,6 +131,17 @@ tok_new(void)  	return tok;  } +static char * +new_string(const char *s, Py_ssize_t len) +{ +	char* result = (char *)PyMem_MALLOC(len + 1); +	if (result != NULL) { +		memcpy(result, s, len); +		result[len] = '\0'; +	} +	return result; +} +  #ifdef PGEN  static char * @@ -144,10 +156,10 @@ decoding_feof(struct tok_state *tok)  	return feof(tok->fp);  } -static const char * -decode_str(const char *str, struct tok_state *tok) +static char * +decode_str(const char *str, int exec_input, struct tok_state *tok)  { -	return str; +	return new_string(str, strlen(str));  }  #else /* PGEN */ @@ -162,16 +174,6 @@ error_ret(struct tok_state *tok) /* XXX */  	return NULL;		/* as if it were EOF */  } -static char * -new_string(const char *s, Py_ssize_t len) -{ -	char* result = (char *)PyMem_MALLOC(len + 1); -	if (result != NULL) { -		memcpy(result, s, len); -		result[len] = '\0'; -	} -	return result; -}  static char *  get_normal_name(char *s)	/* for utf-8 and latin-1 */ @@ -586,17 +588,63 @@ translate_into_utf8(const char* str, const char* enc) {  }  #endif + +static char * +translate_newlines(const char *s, int exec_input, struct tok_state *tok) { +	int skip_next_lf = 0, length = strlen(s), final_length; +	char *buf, *current; +	char c; +	buf = PyMem_MALLOC(length + 2); +	if (buf == NULL) { +		tok->done = E_NOMEM; +		return NULL; +	} +	for (current = buf; (c = *s++);) { +		if (skip_next_lf) { +			skip_next_lf = 0; +			if (c == '\n') { +				c = *s; +				s++; +				if (!c) +					break; +			} +		} +		if (c == '\r') { +			skip_next_lf = 1; +			c = '\n'; +		} +		*current = c; +		current++; +	} +	/* If this is exec input, add a newline to the end of the file if +	   there isn't one already. */ +	if (exec_input && *current != '\n') { +		*current = '\n'; +		current++; +	} +	*current = '\0'; +	final_length = current - buf; +	if (final_length < length && final_length) +		/* should never fail */ +		buf = PyMem_REALLOC(buf, final_length + 1); +	return buf; +} +  /* Decode a byte string STR for use as the buffer of TOK.     Look for encoding declarations inside STR, and record them     inside TOK.  */  static const char * -decode_str(const char *str, struct tok_state *tok) +decode_str(const char *input, int single, struct tok_state *tok)  {  	PyObject* utf8 = NULL; +	const char *str;  	const char *s;  	const char *newl[2] = {NULL, NULL};  	int lineno = 0; +	tok->input = str = translate_newlines(input, single, tok); +	if (str == NULL) +		return NULL;  	tok->enc = NULL;  	tok->str = str;  	if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) @@ -651,12 +699,12 @@ decode_str(const char *str, struct tok_state *tok)  /* Set up tokenizer for string */  struct tok_state * -PyTokenizer_FromString(const char *str) +PyTokenizer_FromString(const char *str, int exec_input)  {  	struct tok_state *tok = tok_new();  	if (tok == NULL)  		return NULL; -	str = (char *)decode_str(str, tok); +	str = (char *)decode_str(str, exec_input, tok);  	if (str == NULL) {  		PyTokenizer_Free(tok);  		return NULL; @@ -702,6 +750,8 @@ PyTokenizer_Free(struct tok_state *tok)  #endif  	if (tok->fp != NULL && tok->buf != NULL)  		PyMem_FREE(tok->buf); +	if (tok->input) +		PyMem_FREE((char *)tok->input);  	PyMem_FREE(tok);  } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index e10972c..79c9e28 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -52,9 +52,10 @@ struct tok_state {  #endif  	const char* enc;  	const char* str; +	const char* input; /* Tokenizer's newline translated copy of the string. */  }; -extern struct tok_state *PyTokenizer_FromString(const char *); +extern struct tok_state *PyTokenizer_FromString(const char *, int);  extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);  extern void PyTokenizer_Free(struct tok_state *);  extern int PyTokenizer_Get(struct tok_state *, char **, char **);  | 
