summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_sax.py50
-rw-r--r--Lib/xml/sax/expatreader.py5
-rw-r--r--Lib/xml/sax/saxutils.py28
-rw-r--r--Misc/NEWS2
4 files changed, 79 insertions, 6 deletions
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index c3b44f8..c7604a1 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -14,6 +14,8 @@ from xml.sax.expatreader import create_parser
from xml.sax.handler import feature_namespaces
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from cStringIO import StringIO
+import shutil
+import test.test_support as support
from test.test_support import findfile, run_unittest
import unittest
@@ -384,6 +386,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), xml_test_out)
+ @unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
+ 'Requires unicode filenames support')
+ def test_expat_file_unicode(self):
+ fname = support.TESTFN_UNICODE
+ shutil.copyfile(TEST_XMLFILE, fname)
+ self.addCleanup(support.unlink, fname)
+
+ parser = create_parser()
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ parser.parse(open(fname))
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
# ===== DTDHandler support
class TestDTDHandler:
@@ -523,6 +541,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), xml_test_out)
+ @unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
+ 'Requires unicode filenames support')
+ def test_expat_inpsource_sysid_unicode(self):
+ fname = support.TESTFN_UNICODE
+ shutil.copyfile(TEST_XMLFILE, fname)
+ self.addCleanup(support.unlink, fname)
+
+ parser = create_parser()
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ parser.parse(InputSource(fname))
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
def test_expat_inpsource_stream(self):
parser = create_parser()
result = StringIO()
@@ -596,6 +630,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
self.assertEqual(parser.getPublicId(), None)
+ @unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
+ 'Requires unicode filenames support')
+ def test_expat_locator_withinfo_unicode(self):
+ fname = support.TESTFN_UNICODE
+ shutil.copyfile(TEST_XMLFILE, fname)
+ self.addCleanup(support.unlink, fname)
+
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+ parser.parse(fname)
+
+ self.assertEqual(parser.getSystemId(), fname)
+ self.assertEqual(parser.getPublicId(), None)
+
# ===========================================================================
#
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index 92a79c1..9de3e72 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -108,7 +108,10 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def prepareParser(self, source):
if source.getSystemId() is not None:
- self._parser.SetBase(source.getSystemId())
+ base = source.getSystemId()
+ if isinstance(base, unicode):
+ base = base.encode('utf-8')
+ self._parser.SetBase(base)
# Redefined setContentHandler to allow changing handlers during parsing
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
index 97d65d8..7989713 100644
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -4,6 +4,7 @@ convenience of application and driver writers.
"""
import os, urlparse, urllib, types
+import sys
import handler
import xmlreader
@@ -293,14 +294,31 @@ def prepare_input_source(source, base = ""):
source.setSystemId(f.name)
if source.getByteStream() is None:
- sysid = source.getSystemId()
- basehead = os.path.dirname(os.path.normpath(base))
- sysidfilename = os.path.join(basehead, sysid)
- if os.path.isfile(sysidfilename):
+ try:
+ sysid = source.getSystemId()
+ basehead = os.path.dirname(os.path.normpath(base))
+ encoding = sys.getfilesystemencoding()
+ if isinstance(sysid, unicode):
+ if not isinstance(basehead, unicode):
+ try:
+ basehead = basehead.decode(encoding)
+ except UnicodeDecodeError:
+ sysid = sysid.encode(encoding)
+ else:
+ if isinstance(basehead, unicode):
+ try:
+ sysid = sysid.decode(encoding)
+ except UnicodeDecodeError:
+ basehead = basehead.encode(encoding)
+ sysidfilename = os.path.join(basehead, sysid)
+ isfile = os.path.isfile(sysidfilename)
+ except UnicodeError:
+ isfile = False
+ if isfile:
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
- source.setSystemId(urlparse.urljoin(base, sysid))
+ source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
f = urllib.urlopen(source.getSystemId())
source.setByteStream(f)
diff --git a/Misc/NEWS b/Misc/NEWS
index fb68dde..cfe99c9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -202,6 +202,8 @@ Core and Builtins
Library
-------
+- Issue #11159: SAX parser now supports unicode file names.
+
- Issue #6972: The zipfile module no longer overwrites files outside of
its destination path when extracting malicious zip files.