diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-02 08:28:30 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-02 08:28:30 (GMT) |
commit | 8673ab97cc1930f5f2c5d96667386e09d22d60ec (patch) | |
tree | 545b8f1ae56ace1a2e227f39da68d80b452911d4 /Lib/xml | |
parent | 6e7da1527969897a408dc23c16f47729edb04558 (diff) | |
download | cpython-8673ab97cc1930f5f2c5d96667386e09d22d60ec.zip cpython-8673ab97cc1930f5f2c5d96667386e09d22d60ec.tar.gz cpython-8673ab97cc1930f5f2c5d96667386e09d22d60ec.tar.bz2 |
Issue #11159: SAX parser now supports unicode file names.
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/sax/expatreader.py | 5 | ||||
-rw-r--r-- | Lib/xml/sax/saxutils.py | 28 |
2 files changed, 27 insertions, 6 deletions
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 92a79c1..9de3e72 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -108,7 +108,10 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): def prepareParser(self, source): if source.getSystemId() is not None: - self._parser.SetBase(source.getSystemId()) + base = source.getSystemId() + if isinstance(base, unicode): + base = base.encode('utf-8') + self._parser.SetBase(base) # Redefined setContentHandler to allow changing handlers during parsing diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index 97d65d8..7989713 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -4,6 +4,7 @@ convenience of application and driver writers. """ import os, urlparse, urllib, types +import sys import handler import xmlreader @@ -293,14 +294,31 @@ def prepare_input_source(source, base = ""): source.setSystemId(f.name) if source.getByteStream() is None: - sysid = source.getSystemId() - basehead = os.path.dirname(os.path.normpath(base)) - sysidfilename = os.path.join(basehead, sysid) - if os.path.isfile(sysidfilename): + try: + sysid = source.getSystemId() + basehead = os.path.dirname(os.path.normpath(base)) + encoding = sys.getfilesystemencoding() + if isinstance(sysid, unicode): + if not isinstance(basehead, unicode): + try: + basehead = basehead.decode(encoding) + except UnicodeDecodeError: + sysid = sysid.encode(encoding) + else: + if isinstance(basehead, unicode): + try: + sysid = sysid.decode(encoding) + except UnicodeDecodeError: + basehead = basehead.encode(encoding) + sysidfilename = os.path.join(basehead, sysid) + isfile = os.path.isfile(sysidfilename) + except UnicodeError: + isfile = False + if isfile: source.setSystemId(sysidfilename) f = open(sysidfilename, "rb") else: - source.setSystemId(urlparse.urljoin(base, sysid)) + source.setSystemId(urlparse.urljoin(base, source.getSystemId())) f = urllib.urlopen(source.getSystemId()) source.setByteStream(f) |