summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-04-02 18:00:13 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-04-02 18:00:13 (GMT)
commit61de087f0f838f5b69592827d3d592c06aa9b655 (patch)
tree302f1a8799a529de0213a395e30fb4705b53f6bf
parent278ba2690c9367d36f138c880130aa1390fbaa19 (diff)
downloadcpython-61de087f0f838f5b69592827d3d592c06aa9b655.zip
cpython-61de087f0f838f5b69592827d3d592c06aa9b655.tar.gz
cpython-61de087f0f838f5b69592827d3d592c06aa9b655.tar.bz2
Issue #2175: SAX parsers now support a character stream of InputSource object.
-rw-r--r--Doc/library/xml.sax.reader.rst12
-rw-r--r--Doc/whatsnew/3.5.rst7
-rw-r--r--Lib/test/test_sax.py33
-rw-r--r--Lib/xml/sax/expatreader.py11
-rw-r--r--Lib/xml/sax/saxutils.py7
-rw-r--r--Lib/xml/sax/xmlreader.py4
-rw-r--r--Misc/NEWS2
7 files changed, 64 insertions, 12 deletions
diff --git a/Doc/library/xml.sax.reader.rst b/Doc/library/xml.sax.reader.rst
index 3ab6063..31ca260 100644
--- a/Doc/library/xml.sax.reader.rst
+++ b/Doc/library/xml.sax.reader.rst
@@ -100,8 +100,10 @@ The :class:`XMLReader` interface supports the following methods:
system identifier (a string identifying the input source -- typically a file
name or an URL), a file-like object, or an :class:`InputSource` object. When
:meth:`parse` returns, the input is completely processed, and the parser object
- can be discarded or reset. As a limitation, the current implementation only
- accepts byte streams; processing of character streams is for further study.
+ can be discarded or reset.
+
+ .. versionchanged:: 3.5
+ Added support of character streams.
.. method:: XMLReader.getContentHandler()
@@ -288,8 +290,7 @@ InputSource Objects
.. method:: InputSource.setByteStream(bytefile)
- Set the byte stream (a Python file-like object which does not perform
- byte-to-character conversion) for this input source.
+ Set the byte stream (a :term:`binary file`) for this input source.
The SAX parser will ignore this if there is also a character stream specified,
but it will use a byte stream in preference to opening a URI connection itself.
@@ -308,8 +309,7 @@ InputSource Objects
.. method:: InputSource.setCharacterStream(charfile)
- Set the character stream for this input source. (The stream must be a Python 1.6
- Unicode-wrapped file-like that performs conversion to strings.)
+ Set the character stream (a :term:`text file`) for this input source.
If there is a character stream specified, the SAX parser will ignore any byte
stream and will not attempt to open a URI connection to the system identifier.
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index 4fea8af..e3381ae 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -499,6 +499,13 @@ xmlrpc
* :class:`xmlrpc.client.ServerProxy` is now a :term:`context manager`.
(Contributed by Claudiu Popa in :issue:`20627`.)
+xml.sax
+-------
+
+* SAX parsers now support a character stream of
+ :class:`~xml.sax.xmlreader.InputSource` object.
+ (Contributed by Serhiy Storchaka in :issue:`2175`.)
+
faulthandler
------------
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index c8d5b21..813dc2e 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -185,12 +185,24 @@ class PrepareInputSourceTest(unittest.TestCase):
def make_byte_stream(self):
return BytesIO(b"This is a byte stream.")
+ def make_character_stream(self):
+ return StringIO("This is a character stream.")
+
def checkContent(self, stream, content):
self.assertIsNotNone(stream)
self.assertEqual(stream.read(), content)
stream.close()
+ def test_character_stream(self):
+ # If the source is an InputSource with a character stream, use it.
+ src = InputSource(self.file)
+ src.setCharacterStream(self.make_character_stream())
+ prep = prepare_input_source(src)
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
def test_byte_stream(self):
# If the source is an InputSource that does not have a character
# stream but does have a byte stream, use the byte stream.
@@ -225,6 +237,14 @@ class PrepareInputSourceTest(unittest.TestCase):
self.checkContent(prep.getByteStream(),
b"This is a byte stream.")
+ def test_text_file(self):
+ # If the source is a text file-like object, use it as a character
+ # stream.
+ prep = prepare_input_source(self.make_character_stream())
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
# ===== XMLGenerator
@@ -904,6 +924,19 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), xml_test_out)
+ def test_expat_inpsource_character_stream(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
+ inpsrc.setCharacterStream(f)
+ parser.parse(inpsrc)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
# ===== IncrementalParser support
def test_expat_incremental(self):
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index a227cda..65ac7e3 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -219,9 +219,14 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._parsing = 0
# break cycle created by expat handlers pointing to our methods
self._parser = None
- bs = self._source.getByteStream()
- if bs is not None:
- bs.close()
+ try:
+ file = self._source.getCharacterStream()
+ if file is not None:
+ file.close()
+ finally:
+ file = self._source.getByteStream()
+ if file is not None:
+ file.close()
def _reset_cont_handler(self):
self._parser.ProcessingInstructionHandler = \
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
index 1d3d0ec..a69c7f7 100644
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -345,11 +345,14 @@ def prepare_input_source(source, base=""):
elif hasattr(source, "read"):
f = source
source = xmlreader.InputSource()
- source.setByteStream(f)
+ if isinstance(f.read(0), str):
+ source.setCharacterStream(f)
+ else:
+ source.setByteStream(f)
if hasattr(f, "name") and isinstance(f.name, str):
source.setSystemId(f.name)
- if source.getByteStream() is None:
+ if source.getCharacterStream() is None and source.getByteStream() is None:
sysid = source.getSystemId()
basehead = os.path.dirname(os.path.normpath(base))
sysidfilename = os.path.join(basehead, sysid)
diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py
index 7ef497f..716f228 100644
--- a/Lib/xml/sax/xmlreader.py
+++ b/Lib/xml/sax/xmlreader.py
@@ -117,7 +117,9 @@ class IncrementalParser(XMLReader):
source = saxutils.prepare_input_source(source)
self.prepareParser(source)
- file = source.getByteStream()
+ file = source.getCharacterStream()
+ if file is None:
+ file = source.getByteStream()
buffer = file.read(self._bufsize)
while buffer:
self.feed(buffer)
diff --git a/Misc/NEWS b/Misc/NEWS
index 9563a5b..b230674 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -16,6 +16,8 @@ Core and Builtins
Library
-------
+- Issue #2175: SAX parsers now support a character stream of InputSource object.
+
- Issue #16840: Tkinter now supports 64-bit integers added in Tcl 8.4 and
arbitrary precision integers added in Tcl 8.5.