From 764465f31506601d7e3e8a56ecf576caed09bf15 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 13 Aug 2009 19:27:12 +0000 Subject: Expat could crash if given the wrong kind of input by never stopping its tokenizing step. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Ivan Krstić for the patch. --- Lib/test/test_pyexpat.py | 21 ++++++++++++++++++++- Misc/ACKS | 3 ++- Misc/NEWS | 3 +++ Modules/expat/xmltok_impl.c | 2 +- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index de5cded..649d5d2 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -559,6 +559,24 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, 1) self.assertEquals(self.n, 4) +class MalformedInputText(unittest.TestCase): + def test1(self): + xml = "\0\r\n" + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEquals(str(e), 'no element found: line 2, column 1') + + def test2(self): + xml = "\r\n" + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14') def test_main(): run_unittest(SetAttributeTest, @@ -569,7 +587,8 @@ def test_main(): HandlerExceptionTest, PositionTest, sf1296433Test, - ChardataBufferTest) + ChardataBufferTest, + MalformedInputText) if __name__ == "__main__": test_main() diff --git a/Misc/ACKS b/Misc/ACKS index defa802..dc87dc2 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -183,6 +183,7 @@ Ismail Donmez Dima Dorfman Cesar Douady Dean Draayer +Fred L. Drake, Jr. John DuBois Paul Dubois Graham Dumpleton @@ -371,7 +372,6 @@ Irmen de Jong Lucas de Jonge John Jorgensen Jens B. Jorgensen -Fred L. Drake, Jr. Andreas Jung Tattoo Mabonzo K. Bob Kahn @@ -408,6 +408,7 @@ Holger Krekel Michael Kremer Fabian Kreutz Hannu Krosing +Ivan Krstić Andrew Kuchling Vladimir Kushnir Cameron Laird diff --git a/Misc/NEWS b/Misc/NEWS index 54758ae..42e14a2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1227,6 +1227,9 @@ C-API Extension Modules ----------------- +- Fix a segfault in expat when given a specially crafted input lead to the + tokenizer not stopping. + - Issue #6561: '\d' in a regex now matches only characters with Unicode category 'Nd' (Number, Decimal Digit). Previously it also matched characters with category 'No'. diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 0ee57ab..f793a6b 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -1741,7 +1741,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *end, POSITION *pos) { - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ -- cgit v0.12