summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrett Cannon <bcannon@gmail.com>2009-08-13 19:27:12 (GMT)
committerBrett Cannon <bcannon@gmail.com>2009-08-13 19:27:12 (GMT)
commit764465f31506601d7e3e8a56ecf576caed09bf15 (patch)
tree5819594bea0ab8f40f214a516a50b3fea6c2cf9a
parentc4ad0345cf7789dc432ff57ab644db230d8baf1c (diff)
downloadcpython-764465f31506601d7e3e8a56ecf576caed09bf15.zip
cpython-764465f31506601d7e3e8a56ecf576caed09bf15.tar.gz
cpython-764465f31506601d7e3e8a56ecf576caed09bf15.tar.bz2
Expat could crash if given the wrong kind of input by never stopping its
tokenizing step. Thanks to Ivan Krstić for the patch.
-rw-r--r--Lib/test/test_pyexpat.py21
-rw-r--r--Misc/ACKS3
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/expat/xmltok_impl.c2
4 files changed, 26 insertions, 3 deletions
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index de5cded..649d5d2 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -559,6 +559,24 @@ class ChardataBufferTest(unittest.TestCase):
parser.Parse(xml2, 1)
self.assertEquals(self.n, 4)
+class MalformedInputText(unittest.TestCase):
+ def test1(self):
+ xml = "\0\r\n"
+ parser = expat.ParserCreate()
+ try:
+ parser.Parse(xml, True)
+ self.fail()
+ except expat.ExpatError as e:
+ self.assertEquals(str(e), 'no element found: line 2, column 1')
+
+ def test2(self):
+ xml = "<?xml version\xc2\x85='1.0'?>\r\n"
+ parser = expat.ParserCreate()
+ try:
+ parser.Parse(xml, True)
+ self.fail()
+ except expat.ExpatError as e:
+ self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14')
def test_main():
run_unittest(SetAttributeTest,
@@ -569,7 +587,8 @@ def test_main():
HandlerExceptionTest,
PositionTest,
sf1296433Test,
- ChardataBufferTest)
+ ChardataBufferTest,
+ MalformedInputText)
if __name__ == "__main__":
test_main()
diff --git a/Misc/ACKS b/Misc/ACKS
index defa802..dc87dc2 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -183,6 +183,7 @@ Ismail Donmez
Dima Dorfman
Cesar Douady
Dean Draayer
+Fred L. Drake, Jr.
John DuBois
Paul Dubois
Graham Dumpleton
@@ -371,7 +372,6 @@ Irmen de Jong
Lucas de Jonge
John Jorgensen
Jens B. Jorgensen
-Fred L. Drake, Jr.
Andreas Jung
Tattoo Mabonzo K.
Bob Kahn
@@ -408,6 +408,7 @@ Holger Krekel
Michael Kremer
Fabian Kreutz
Hannu Krosing
+Ivan Krstić
Andrew Kuchling
Vladimir Kushnir
Cameron Laird
diff --git a/Misc/NEWS b/Misc/NEWS
index 54758ae..42e14a2 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -1227,6 +1227,9 @@ C-API
Extension Modules
-----------------
+- Fix a segfault in expat when given a specially crafted input lead to the
+ tokenizer not stopping.
+
- Issue #6561: '\d' in a regex now matches only characters with
Unicode category 'Nd' (Number, Decimal Digit). Previously it also
matched characters with category 'No'.
diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c
index 0ee57ab..f793a6b 100644
--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@@ -1741,7 +1741,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end,
POSITION *pos)
{
- while (ptr != end) {
+ while (ptr < end) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \