From 778db289b5a1968c67db195572f2384489cca20c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 Apr 2015 10:12:26 +0300 Subject: Issue #10590: xml.sax.parseString() now supports string argument. --- Doc/library/xml.sax.rst | 6 +++++- Lib/test/test_sax.py | 7 +++++++ Lib/xml/sax/__init__.py | 8 +++++--- Misc/NEWS | 2 ++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index e95d6b0..55f9799 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -47,7 +47,11 @@ The convenience functions are: .. function:: parseString(string, handler, error_handler=handler.ErrorHandler()) Similar to :func:`parse`, but parses from a buffer *string* received as a - parameter. + parameter. *string* must be a :class:`str` instance or a + :term:`bytes-like object`. + + .. versionchanged:: 3.5 + Added support of :class:`str` instances. A typical SAX application uses three kinds of objects: readers, handlers and input sources. "Reader" in this context is another term for parser, i.e. some diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index ecfb391..85c1cfe 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -200,6 +200,13 @@ class ParseTest(unittest.TestCase): parseString(s, XMLGenerator(result, 'utf-8')) self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) + def test_parseString_text(self): + encodings = ('us-ascii', 'iso-8859-1', 'utf-8', + 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parseString(xml_str(self.data, encoding)) + self.check_parseString(self.data) + def test_parseString_bytes(self): # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, # UTF-16 is autodetected diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py index b161b1f..ef67ae6 100644 --- a/Lib/xml/sax/__init__.py +++ b/Lib/xml/sax/__init__.py @@ -33,8 +33,7 @@ def parse(source, handler, errorHandler=ErrorHandler()): parser.parse(source) def parseString(string, handler, errorHandler=ErrorHandler()): - from io import BytesIO - + import io if errorHandler is None: errorHandler = ErrorHandler() parser = make_parser() @@ -42,7 +41,10 @@ def parseString(string, handler, errorHandler=ErrorHandler()): parser.setErrorHandler(errorHandler) inpsrc = InputSource() - inpsrc.setByteStream(BytesIO(string)) + if isinstance(string, str): + inpsrc.setCharacterStream(io.StringIO(string)) + else: + inpsrc.setByteStream(io.BytesIO(string)) parser.parse(inpsrc) # this is the parser list used by the make_parser function if no diff --git a/Misc/NEWS b/Misc/NEWS index a15600b..2d07fd1 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -19,6 +19,8 @@ Core and Builtins Library ------- +- Issue #10590: xml.sax.parseString() now supports string argument. + - Issue #23338: Fixed formatting ctypes error messages on Cygwin. Patch by Makoto Kato. -- cgit v0.12