summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-04-02 18:00:13 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-04-02 18:00:13 (GMT)
commit61de087f0f838f5b69592827d3d592c06aa9b655 (patch)
tree302f1a8799a529de0213a395e30fb4705b53f6bf /Lib
parent278ba2690c9367d36f138c880130aa1390fbaa19 (diff)
downloadcpython-61de087f0f838f5b69592827d3d592c06aa9b655.zip
cpython-61de087f0f838f5b69592827d3d592c06aa9b655.tar.gz
cpython-61de087f0f838f5b69592827d3d592c06aa9b655.tar.bz2
Issue #2175: SAX parsers now support a character stream of InputSource object.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_sax.py33
-rw-r--r--Lib/xml/sax/expatreader.py11
-rw-r--r--Lib/xml/sax/saxutils.py7
-rw-r--r--Lib/xml/sax/xmlreader.py4
4 files changed, 49 insertions, 6 deletions
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index c8d5b21..813dc2e 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -185,12 +185,24 @@ class PrepareInputSourceTest(unittest.TestCase):
def make_byte_stream(self):
return BytesIO(b"This is a byte stream.")
+ def make_character_stream(self):
+ return StringIO("This is a character stream.")
+
def checkContent(self, stream, content):
self.assertIsNotNone(stream)
self.assertEqual(stream.read(), content)
stream.close()
+ def test_character_stream(self):
+ # If the source is an InputSource with a character stream, use it.
+ src = InputSource(self.file)
+ src.setCharacterStream(self.make_character_stream())
+ prep = prepare_input_source(src)
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
def test_byte_stream(self):
# If the source is an InputSource that does not have a character
# stream but does have a byte stream, use the byte stream.
@@ -225,6 +237,14 @@ class PrepareInputSourceTest(unittest.TestCase):
self.checkContent(prep.getByteStream(),
b"This is a byte stream.")
+ def test_text_file(self):
+ # If the source is a text file-like object, use it as a character
+ # stream.
+ prep = prepare_input_source(self.make_character_stream())
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
# ===== XMLGenerator
@@ -904,6 +924,19 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), xml_test_out)
+ def test_expat_inpsource_character_stream(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
+ inpsrc.setCharacterStream(f)
+ parser.parse(inpsrc)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
# ===== IncrementalParser support
def test_expat_incremental(self):
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index a227cda..65ac7e3 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -219,9 +219,14 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._parsing = 0
# break cycle created by expat handlers pointing to our methods
self._parser = None
- bs = self._source.getByteStream()
- if bs is not None:
- bs.close()
+ try:
+ file = self._source.getCharacterStream()
+ if file is not None:
+ file.close()
+ finally:
+ file = self._source.getByteStream()
+ if file is not None:
+ file.close()
def _reset_cont_handler(self):
self._parser.ProcessingInstructionHandler = \
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
index 1d3d0ec..a69c7f7 100644
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -345,11 +345,14 @@ def prepare_input_source(source, base=""):
elif hasattr(source, "read"):
f = source
source = xmlreader.InputSource()
- source.setByteStream(f)
+ if isinstance(f.read(0), str):
+ source.setCharacterStream(f)
+ else:
+ source.setByteStream(f)
if hasattr(f, "name") and isinstance(f.name, str):
source.setSystemId(f.name)
- if source.getByteStream() is None:
+ if source.getCharacterStream() is None and source.getByteStream() is None:
sysid = source.getSystemId()
basehead = os.path.dirname(os.path.normpath(base))
sysidfilename = os.path.join(basehead, sysid)
diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py
index 7ef497f..716f228 100644
--- a/Lib/xml/sax/xmlreader.py
+++ b/Lib/xml/sax/xmlreader.py
@@ -117,7 +117,9 @@ class IncrementalParser(XMLReader):
source = saxutils.prepare_input_source(source)
self.prepareParser(source)
- file = source.getByteStream()
+ file = source.getCharacterStream()
+ if file is None:
+ file = source.getByteStream()
buffer = file.read(self._bufsize)
while buffer:
self.feed(buffer)