diff options
author | Brett Cannon <bcannon@gmail.com> | 2009-08-13 19:27:12 (GMT) |
---|---|---|
committer | Brett Cannon <bcannon@gmail.com> | 2009-08-13 19:27:12 (GMT) |
commit | 764465f31506601d7e3e8a56ecf576caed09bf15 (patch) | |
tree | 5819594bea0ab8f40f214a516a50b3fea6c2cf9a | |
parent | c4ad0345cf7789dc432ff57ab644db230d8baf1c (diff) | |
download | cpython-764465f31506601d7e3e8a56ecf576caed09bf15.zip cpython-764465f31506601d7e3e8a56ecf576caed09bf15.tar.gz cpython-764465f31506601d7e3e8a56ecf576caed09bf15.tar.bz2 |
Expat could crash if given the wrong kind of input by never stopping its
tokenizing step.
Thanks to Ivan Krstić for the patch.
-rw-r--r-- | Lib/test/test_pyexpat.py | 21 | ||||
-rw-r--r-- | Misc/ACKS | 3 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/expat/xmltok_impl.c | 2 |
4 files changed, 26 insertions, 3 deletions
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index de5cded..649d5d2 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -559,6 +559,24 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, 1) self.assertEquals(self.n, 4) +class MalformedInputText(unittest.TestCase): + def test1(self): + xml = "\0\r\n" + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEquals(str(e), 'no element found: line 2, column 1') + + def test2(self): + xml = "<?xml version\xc2\x85='1.0'?>\r\n" + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14') def test_main(): run_unittest(SetAttributeTest, @@ -569,7 +587,8 @@ def test_main(): HandlerExceptionTest, PositionTest, sf1296433Test, - ChardataBufferTest) + ChardataBufferTest, + MalformedInputText) if __name__ == "__main__": test_main() @@ -183,6 +183,7 @@ Ismail Donmez Dima Dorfman Cesar Douady Dean Draayer +Fred L. Drake, Jr. John DuBois Paul Dubois Graham Dumpleton @@ -371,7 +372,6 @@ Irmen de Jong Lucas de Jonge John Jorgensen Jens B. Jorgensen -Fred L. Drake, Jr. Andreas Jung Tattoo Mabonzo K. Bob Kahn @@ -408,6 +408,7 @@ Holger Krekel Michael Kremer Fabian Kreutz Hannu Krosing +Ivan Krstić Andrew Kuchling Vladimir Kushnir Cameron Laird @@ -1227,6 +1227,9 @@ C-API Extension Modules ----------------- +- Fix a segfault in expat when given a specially crafted input lead to the + tokenizer not stopping. + - Issue #6561: '\d' in a regex now matches only characters with Unicode category 'Nd' (Number, Decimal Digit). Previously it also matched characters with category 'No'. diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 0ee57ab..f793a6b 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -1741,7 +1741,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *end, POSITION *pos) { - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ |