diff options
-rw-r--r-- | Doc/library/pyexpat.rst | 8 | ||||
-rw-r--r-- | Doc/whatsnew/2.6.rst | 7 | ||||
-rw-r--r-- | Lib/test/test_pyexpat.py | 130 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/pyexpat.c | 44 |
6 files changed, 189 insertions, 4 deletions
diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index a4cc1d2..9a0f914 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -182,11 +182,15 @@ XMLParser Objects .. attribute:: xmlparser.buffer_size - The size of the buffer used when :attr:`buffer_text` is true. This value cannot - be changed at this time. + The size of the buffer used when :attr:`buffer_text` is true. + A new buffer size can be set by assigning a new integer value + to this attribute. + When the size is changed, the buffer will be flushed. .. versionadded:: 2.3 + .. versionchanged:: 2.6 + The buffer size can now be changed. .. attribute:: xmlparser.buffer_text diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index fee298d..4d90d35 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -875,6 +875,13 @@ complete list of changes, or look through the CVS logs for all the details. changed and :const:`UF_APPEND` to indicate that data can only be appended to the file. (Contributed by M. Levinson.) +* The :mod:`pyexpat` module's :class:`Parser` objects now allow setting + their :attr:`buffer_size` attribute to change the size of the buffer + used to hold character data. + (Contributed by Achim Gaedke.) + + .. Patch 1137 + * The :mod:`random` module's :class:`Random` objects can now be pickled on a 32-bit system and unpickled on a 64-bit system, and vice versa. Unfortunately, this change also means diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 69cf08c..de5cded 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -1,7 +1,7 @@ # XXX TypeErrors on calling handlers, or on bad return values from a # handler, are obscure and unhelpful. -import StringIO +import StringIO, sys import unittest import pyexpat @@ -434,6 +434,131 @@ class sf1296433Test(unittest.TestCase): self.assertRaises(Exception, parser.Parse, xml) +class ChardataBufferTest(unittest.TestCase): + """ + test setting of chardata buffer size + """ + + def test_1025_bytes(self): + self.assertEquals(self.small_buffer_test(1025), 2) + + def test_1000_bytes(self): + self.assertEquals(self.small_buffer_test(1000), 1) + + def test_wrong_size(self): + parser = expat.ParserCreate() + parser.buffer_text = 1 + def f(size): + parser.buffer_size = size + + self.assertRaises(TypeError, f, sys.maxint+1) + self.assertRaises(ValueError, f, -1) + self.assertRaises(ValueError, f, 0) + + def test_unchanged_size(self): + xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) + xml2 = 'a'*512 + '</s>' + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_size = 512 + parser.buffer_text = 1 + + # Feed 512 bytes of character data: the handler should be called + # once. + self.n = 0 + parser.Parse(xml1) + self.assertEquals(self.n, 1) + + # Reassign to buffer_size, but assign the same size. + parser.buffer_size = parser.buffer_size + self.assertEquals(self.n, 1) + + # Try parsing rest of the document + parser.Parse(xml2) + self.assertEquals(self.n, 2) + + + def test_disabling_buffer(self): + xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512) + xml2 = ('b' * 1024) + xml3 = "%s</a>" % ('c' * 1024) + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_text = 1 + parser.buffer_size = 1024 + self.assertEquals(parser.buffer_size, 1024) + + # Parse one chunk of XML + self.n = 0 + parser.Parse(xml1, 0) + self.assertEquals(parser.buffer_size, 1024) + self.assertEquals(self.n, 1) + + # Turn off buffering and parse the next chunk. + parser.buffer_text = 0 + self.assertFalse(parser.buffer_text) + self.assertEquals(parser.buffer_size, 1024) + for i in range(10): + parser.Parse(xml2, 0) + self.assertEquals(self.n, 11) + + parser.buffer_text = 1 + self.assertTrue(parser.buffer_text) + self.assertEquals(parser.buffer_size, 1024) + parser.Parse(xml3, 1) + self.assertEquals(self.n, 12) + + + + def make_document(self, bytes): + return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>') + + def counting_handler(self, text): + self.n += 1 + + def small_buffer_test(self, buffer_len): + xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len) + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_size = 1024 + parser.buffer_text = 1 + + self.n = 0 + parser.Parse(xml) + return self.n + + def test_change_size_1(self): + xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024) + xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_text = 1 + parser.buffer_size = 1024 + self.assertEquals(parser.buffer_size, 1024) + + self.n = 0 + parser.Parse(xml1, 0) + parser.buffer_size *= 2 + self.assertEquals(parser.buffer_size, 2048) + parser.Parse(xml2, 1) + self.assertEquals(self.n, 2) + + def test_change_size_2(self): + xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023) + xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) + parser = expat.ParserCreate() + parser.CharacterDataHandler = self.counting_handler + parser.buffer_text = 1 + parser.buffer_size = 2048 + self.assertEquals(parser.buffer_size, 2048) + + self.n=0 + parser.Parse(xml1, 0) + parser.buffer_size /= 2 + self.assertEquals(parser.buffer_size, 1024) + parser.Parse(xml2, 1) + self.assertEquals(self.n, 4) + def test_main(): run_unittest(SetAttributeTest, @@ -443,7 +568,8 @@ def test_main(): BufferTextTest, HandlerExceptionTest, PositionTest, - sf1296433Test) + sf1296433Test, + ChardataBufferTest) if __name__ == "__main__": test_main() @@ -225,6 +225,7 @@ Gyro Funch Peter Funk Geoff Furnish Ulisses Furquim +Achim Gaedke Lele Gaifax Santiago Gala Yitzchak Gale @@ -348,6 +348,9 @@ Core and builtins Library ------- +- pyexpat, patch #1137: allow setting buffer_size attribute + on Parser objects to set the character data buffer size. + - Issue #1757: The hash of a Decimal instance is no longer affected by the current context. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 2d2c4c5..01971b7 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1649,6 +1649,50 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) self->specified_attributes = 0; return 0; } + + if (strcmp(name, "buffer_size") == 0) { + long new_buffer_size; + if (!PyInt_Check(v)) { + PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer"); + return -1; + } + + new_buffer_size=PyInt_AS_LONG(v); + /* trivial case -- no change */ + if (new_buffer_size == self->buffer_size) { + return 0; + } + + if (new_buffer_size <= 0) { + PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero"); + return -1; + } + + /* check maximum */ + if (new_buffer_size > INT_MAX) { + char errmsg[100]; + sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX); + PyErr_SetString(PyExc_ValueError, errmsg); + return -1; + } + + if (self->buffer != NULL) { + /* there is already a buffer */ + if (self->buffer_used != 0) { + flush_character_buffer(self); + } + /* free existing buffer */ + free(self->buffer); + } + self->buffer = malloc(new_buffer_size); + if (self->buffer == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buffer_size = new_buffer_size; + return 0; + } + if (strcmp(name, "CharacterDataHandler") == 0) { /* If we're changing the character data handler, flush all * cached data with the old handler. Not sure there's a |