summaryrefslogtreecommitdiffstats
path: root/Lib/xml
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-02-02 08:28:30 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-02-02 08:28:30 (GMT)
commit8673ab97cc1930f5f2c5d96667386e09d22d60ec (patch)
tree545b8f1ae56ace1a2e227f39da68d80b452911d4 /Lib/xml
parent6e7da1527969897a408dc23c16f47729edb04558 (diff)
downloadcpython-8673ab97cc1930f5f2c5d96667386e09d22d60ec.zip
cpython-8673ab97cc1930f5f2c5d96667386e09d22d60ec.tar.gz
cpython-8673ab97cc1930f5f2c5d96667386e09d22d60ec.tar.bz2
Issue #11159: SAX parser now supports unicode file names.
Diffstat (limited to 'Lib/xml')
-rw-r--r--Lib/xml/sax/expatreader.py5
-rw-r--r--Lib/xml/sax/saxutils.py28
2 files changed, 27 insertions, 6 deletions
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index 92a79c1..9de3e72 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -108,7 +108,10 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def prepareParser(self, source):
if source.getSystemId() is not None:
- self._parser.SetBase(source.getSystemId())
+ base = source.getSystemId()
+ if isinstance(base, unicode):
+ base = base.encode('utf-8')
+ self._parser.SetBase(base)
# Redefined setContentHandler to allow changing handlers during parsing
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
index 97d65d8..7989713 100644
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -4,6 +4,7 @@ convenience of application and driver writers.
"""
import os, urlparse, urllib, types
+import sys
import handler
import xmlreader
@@ -293,14 +294,31 @@ def prepare_input_source(source, base = ""):
source.setSystemId(f.name)
if source.getByteStream() is None:
- sysid = source.getSystemId()
- basehead = os.path.dirname(os.path.normpath(base))
- sysidfilename = os.path.join(basehead, sysid)
- if os.path.isfile(sysidfilename):
+ try:
+ sysid = source.getSystemId()
+ basehead = os.path.dirname(os.path.normpath(base))
+ encoding = sys.getfilesystemencoding()
+ if isinstance(sysid, unicode):
+ if not isinstance(basehead, unicode):
+ try:
+ basehead = basehead.decode(encoding)
+ except UnicodeDecodeError:
+ sysid = sysid.encode(encoding)
+ else:
+ if isinstance(basehead, unicode):
+ try:
+ sysid = sysid.decode(encoding)
+ except UnicodeDecodeError:
+ basehead = basehead.encode(encoding)
+ sysidfilename = os.path.join(basehead, sysid)
+ isfile = os.path.isfile(sysidfilename)
+ except UnicodeError:
+ isfile = False
+ if isfile:
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
- source.setSystemId(urlparse.urljoin(base, sysid))
+ source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
f = urllib.urlopen(source.getSystemId())
source.setByteStream(f)