Correction for issue1134: all source files with a coding spec, except latin-1

and utf-8, crashed when parsing a multiline string, or a line longer that 512 columns.
author: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2007-11-15 23:19:43 (GMT)
committer: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2007-11-15 23:19:43 (GMT)
commit: 65f9aced6ebecf418a91d273e314e40bd153e113 (patch)
tree: 2acf8d49fdd15697e8de8c50cba3e61678a15fa7
parent: c05f42a8a76800fc5a6a8a019710351dfd58dec2 (diff)
download: cpython-65f9aced6ebecf418a91d273e314e40bd153e113.zip
cpython-65f9aced6ebecf418a91d273e314e40bd153e113.tar.gz
cpython-65f9aced6ebecf418a91d273e314e40bd153e113.tar.bz2
2 files changed, 50 insertions, 15 deletions
diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py
index 4d4b3f9..0ff1bdf 100644
--- a/Lib/test/test_coding.py
+++ b/Lib/test/test_coding.py
@@ -1,6 +1,6 @@
 
 import test.test_support, unittest
-import os
+import os, sys
 
 class CodingTest(unittest.TestCase):
     def test_bad_coding(self):
@@ -26,6 +26,26 @@ class CodingTest(unittest.TestCase):
         exec('# coding: cp949\na = 5\n', d)
         self.assertEqual(d['a'], 5)
 
+    def test_file_parse(self):
+        # issue1134: all encodings outside latin-1 and utf-8 fail on
+        # multiline strings and long lines (>512 columns)
+        sys.path.insert(0, ".")
+        filename = test.test_support.TESTFN+".py"
+        f = open(filename, "w")
+        try:
+            f.write("# -*- coding: cp1252 -*-\n")
+            f.write("'''A short string\n")
+            f.write("'''\n")
+            f.write("'A very long string %s'\n" % ("X" * 1000))
+            f.close()
+
+            __import__(test.test_support.TESTFN)
+        finally:
+            f.close()
+            os.remove(test.test_support.TESTFN+".py")
+            os.remove(test.test_support.TESTFN+".pyc")
+            sys.path.pop(0)
+
 def test_main():
     test.test_support.run_unittest(CodingTest)
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 099f6df..710c566 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -369,46 +369,61 @@ check_bom(int get_char(struct tok_state *),
 static char *
 fp_readl(char *s, int size, struct tok_state *tok)
 {
-	PyObject* bufobj = tok->decoding_buffer;
+	PyObject* bufobj;
 	const char *buf;
 	Py_ssize_t buflen;
-	int allocated = 0;
 
 	/* Ask for one less byte so we can terminate it */
 	assert(size > 0);
 	size--;
 
-	if (bufobj == NULL) {
+	if (tok->decoding_buffer) {
+		bufobj = tok->decoding_buffer;
+		Py_INCREF(bufobj);
+	}
+	else
+	{
 		bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
 		if (bufobj == NULL)
 			goto error;
-		allocated = 1;
 	}
-	buf = PyUnicode_AsStringAndSize(bufobj, &buflen);
-	if (buf == NULL) {
-		goto error;
+	if (PyUnicode_CheckExact(bufobj))
+	{
+		buf = PyUnicode_AsStringAndSize(bufobj, &buflen);
+		if (buf == NULL) {
+			goto error;
+		}
 	}
+	else
+	{
+		buf = PyBytes_AsString(bufobj);
+		if (buf == NULL) {
+			goto error;
+		}
+		buflen = PyBytes_GET_SIZE(bufobj);
+	}
+
+	Py_XDECREF(tok->decoding_buffer);
 	if (buflen > size) {
-		Py_XDECREF(tok->decoding_buffer);
+		/* Too many chars, the rest goes into tok->decoding_buffer */
 		tok->decoding_buffer = PyBytes_FromStringAndSize(buf+size,
 								 buflen-size);
 		if (tok->decoding_buffer == NULL)
 			goto error;
 		buflen = size;
 	}
+	else
+		tok->decoding_buffer = NULL;
+
 	memcpy(s, buf, buflen);
 	s[buflen] = '\0';
 	if (buflen == 0) /* EOF */
 		s = NULL;
-	if (allocated) {
-		Py_DECREF(bufobj);
-	}
+	Py_DECREF(bufobj);
 	return s;
 
 error:
-	if (allocated) {
-		Py_XDECREF(bufobj);
-	}
+	Py_XDECREF(bufobj);
 	return error_ret(tok);
 }
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2007-11-15 23:19:43 (GMT)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2007-11-15 23:19:43 (GMT)
commit	65f9aced6ebecf418a91d273e314e40bd153e113 (patch)
tree	2acf8d49fdd15697e8de8c50cba3e61678a15fa7
parent	c05f42a8a76800fc5a6a8a019710351dfd58dec2 (diff)
download	cpython-65f9aced6ebecf418a91d273e314e40bd153e113.zip cpython-65f9aced6ebecf418a91d273e314e40bd153e113.tar.gz cpython-65f9aced6ebecf418a91d273e314e40bd153e113.tar.bz2