summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>2007-11-15 23:19:43 (GMT)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>2007-11-15 23:19:43 (GMT)
commit65f9aced6ebecf418a91d273e314e40bd153e113 (patch)
tree2acf8d49fdd15697e8de8c50cba3e61678a15fa7
parentc05f42a8a76800fc5a6a8a019710351dfd58dec2 (diff)
downloadcpython-65f9aced6ebecf418a91d273e314e40bd153e113.zip
cpython-65f9aced6ebecf418a91d273e314e40bd153e113.tar.gz
cpython-65f9aced6ebecf418a91d273e314e40bd153e113.tar.bz2
Correction for issue1134: all source files with a coding spec, except latin-1
and utf-8, crashed when parsing a multiline string, or a line longer that 512 columns.
-rw-r--r--Lib/test/test_coding.py22
-rw-r--r--Parser/tokenizer.c43
2 files changed, 50 insertions, 15 deletions
diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py
index 4d4b3f9..0ff1bdf 100644
--- a/Lib/test/test_coding.py
+++ b/Lib/test/test_coding.py
@@ -1,6 +1,6 @@
import test.test_support, unittest
-import os
+import os, sys
class CodingTest(unittest.TestCase):
def test_bad_coding(self):
@@ -26,6 +26,26 @@ class CodingTest(unittest.TestCase):
exec('# coding: cp949\na = 5\n', d)
self.assertEqual(d['a'], 5)
+ def test_file_parse(self):
+ # issue1134: all encodings outside latin-1 and utf-8 fail on
+ # multiline strings and long lines (>512 columns)
+ sys.path.insert(0, ".")
+ filename = test.test_support.TESTFN+".py"
+ f = open(filename, "w")
+ try:
+ f.write("# -*- coding: cp1252 -*-\n")
+ f.write("'''A short string\n")
+ f.write("'''\n")
+ f.write("'A very long string %s'\n" % ("X" * 1000))
+ f.close()
+
+ __import__(test.test_support.TESTFN)
+ finally:
+ f.close()
+ os.remove(test.test_support.TESTFN+".py")
+ os.remove(test.test_support.TESTFN+".pyc")
+ sys.path.pop(0)
+
def test_main():
test.test_support.run_unittest(CodingTest)
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 099f6df..710c566 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -369,46 +369,61 @@ check_bom(int get_char(struct tok_state *),
static char *
fp_readl(char *s, int size, struct tok_state *tok)
{
- PyObject* bufobj = tok->decoding_buffer;
+ PyObject* bufobj;
const char *buf;
Py_ssize_t buflen;
- int allocated = 0;
/* Ask for one less byte so we can terminate it */
assert(size > 0);
size--;
- if (bufobj == NULL) {
+ if (tok->decoding_buffer) {
+ bufobj = tok->decoding_buffer;
+ Py_INCREF(bufobj);
+ }
+ else
+ {
bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
if (bufobj == NULL)
goto error;
- allocated = 1;
}
- buf = PyUnicode_AsStringAndSize(bufobj, &buflen);
- if (buf == NULL) {
- goto error;
+ if (PyUnicode_CheckExact(bufobj))
+ {
+ buf = PyUnicode_AsStringAndSize(bufobj, &buflen);
+ if (buf == NULL) {
+ goto error;
+ }
}
+ else
+ {
+ buf = PyBytes_AsString(bufobj);
+ if (buf == NULL) {
+ goto error;
+ }
+ buflen = PyBytes_GET_SIZE(bufobj);
+ }
+
+ Py_XDECREF(tok->decoding_buffer);
if (buflen > size) {
- Py_XDECREF(tok->decoding_buffer);
+ /* Too many chars, the rest goes into tok->decoding_buffer */
tok->decoding_buffer = PyBytes_FromStringAndSize(buf+size,
buflen-size);
if (tok->decoding_buffer == NULL)
goto error;
buflen = size;
}
+ else
+ tok->decoding_buffer = NULL;
+
memcpy(s, buf, buflen);
s[buflen] = '\0';
if (buflen == 0) /* EOF */
s = NULL;
- if (allocated) {
- Py_DECREF(bufobj);
- }
+ Py_DECREF(bufobj);
return s;
error:
- if (allocated) {
- Py_XDECREF(bufobj);
- }
+ Py_XDECREF(bufobj);
return error_ret(tok);
}