diff options
-rw-r--r-- | Tools/idle/IOBinding.py | 185 | ||||
-rw-r--r-- | Tools/idle/PyShell.py | 4 |
2 files changed, 188 insertions, 1 deletions
diff --git a/Tools/idle/IOBinding.py b/Tools/idle/IOBinding.py index 925015f..0ea4524 100644 --- a/Tools/idle/IOBinding.py +++ b/Tools/idle/IOBinding.py @@ -1,4 +1,8 @@ import os +import types +import sys +import codecs +import re import tempfile import tkFileDialog import tkMessageBox @@ -24,6 +28,71 @@ from IdleConf import idleconf #$ win <Control-p> #$ unix <Control-x><Control-p> +try: + from codecs import BOM_UTF8 +except ImportError: + # only available since Python 2.3 + BOM_UTF8 = '\xef\xbb\xbf' + +# Try setting the locale, so that we can find out +# what encoding to use +try: + import locale + locale.setlocale(locale.LC_CTYPE, "") +except ImportError: + pass + +encoding = "ascii" +if sys.platform == 'win32': + # On Windows, we could use "mbcs". However, to give the user + # a portable encoding name, we need to find the code page + try: + encoding = locale.getdefaultlocale()[1] + codecs.lookup(encoding) + except LookupError: + pass +else: + try: + # Different things can fail here: the locale module may not be + # loaded, it may not offer nl_langinfo, or CODESET, or the + # resulting codeset may be unknown to Python. We ignore all + # these problems, falling back to ASCII + encoding = locale.nl_langinfo(locale.CODESET) + codecs.lookup(encoding) + except (NameError, AttributeError, LookupError): + # Try getdefaultlocale well: it parses environment variables, + # which may give a clue. Unfortunately, getdefaultlocale has + # bugs that can cause ValueError. + try: + encoding = locale.getdefaultlocale()[1] + codecs.lookup(encoding) + except (ValueError, LookupError): + pass + +encoding = encoding.lower() + +coding_re = re.compile("coding[:=]\s*([-\w_.]+)") +def coding_spec(str): + + """Return the encoding declaration according to PEP 263. + Raise LookupError if the encoding is declared but unknown.""" + + # Only consider the first two lines + str = str.split("\n")[:2] + str = "\n".join(str) + + match = coding_re.search(str) + if not match: + return None + name = match.group(1) + # Check whether the encoding is known + import codecs + try: + codecs.lookup(name) + except LookupError: + # The standard encoding error does not indicate the encoding + raise LookupError, "Unknown encoding "+name + return name class IOBinding: @@ -37,6 +106,7 @@ class IOBinding: self.__id_savecopy = self.text.bind("<<save-copy-of-window-as-file>>", self.save_a_copy) self.__id_print = self.text.bind("<<print-window>>", self.print_window) + self.fileencoding = None def close(self): # Undo command bindings @@ -101,6 +171,9 @@ class IOBinding: except IOError, msg: tkMessageBox.showerror("I/O Error", str(msg), master=self.text) return False + + chars = self.decode(chars) + self.text.delete("1.0", "end") self.set_filename(None) self.text.insert("1.0", chars) @@ -110,6 +183,54 @@ class IOBinding: self.text.see("insert") return True + def decode(self, chars): + # Try to create a Unicode string. If that fails, let Tcl try + # its best + + # Check presence of a UTF-8 signature first + if chars.startswith(BOM_UTF8): + try: + chars = chars[3:].decode("utf-8") + except UnicodeError: + # has UTF-8 signature, but fails to decode... + return chars + else: + # Indicates that this file originally had a BOM + self.fileencoding = BOM_UTF8 + return chars + + # Next look for coding specification + try: + enc = coding_spec(chars) + except LookupError, name: + tkMessageBox.showerror( + title="Error loading the file", + message="The encoding '%s' is not known to this Python "\ + "installation. The file may not display correctly" % name, + master = self.text) + enc = None + + if enc: + try: + return unicode(chars, enc) + except UnicodeError: + pass + + # If it is ASCII, we need not to record anything + try: + return unicode(chars, 'ascii') + except UnicodeError: + pass + + # Finally, try the locale's encoding. This is deprecated; + # the user should declare a non-ASCII encoding + try: + chars = unicode(chars, encoding) + self.fileencoding = encoding + except UnicodeError: + pass + return chars + def maybesave(self): if self.get_saved(): return "yes" @@ -180,7 +301,7 @@ class IOBinding: def writefile(self, filename): self.fixlastline() - chars = str(self.text.get("1.0", "end-1c")) + chars = self.encode(self.text.get("1.0", "end-1c")) try: f = open(filename, "w") f.write(chars) @@ -192,6 +313,68 @@ class IOBinding: master=self.text) return False + def encode(self, chars): + if isinstance(chars, types.StringType): + # This is either plain ASCII, or Tk was returning mixed-encoding + # text to us. Don't try to guess further. + return chars + + # See whether there is anything non-ASCII in it. + # If not, no need to figure out the encoding. + try: + return chars.encode('ascii') + except UnicodeError: + pass + + # If there is an encoding declared, try this first. + try: + enc = coding_spec(chars) + failed = None + except LookupError, msg: + failed = msg + enc = None + if enc: + try: + return chars.encode(enc) + except UnicodeError: + failed = "Invalid encoding '%s'" % enc + + if failed: + tkMessageBox.showerror( + "I/O Error", + "%s. Saving as UTF-8" % failed, + master = self.text) + + # If there was a UTF-8 signature, use that. This should not fail + if self.fileencoding == BOM_UTF8 or failed: + return BOM_UTF8 + chars.encode("utf-8") + + # Try the original file encoding next, if any + if self.fileencoding: + try: + return chars.encode(self.fileencoding) + except UnicodeError: + tkMessageBox.showerror( + "I/O Error", + "Cannot save this as '%s' anymore. Saving as UTF-8" % self.fileencoding, + master = self.text) + return BOM_UTF8 + chars.encode("utf-8") + + # Nothing was declared, and we had not determined an encoding + # on loading. Recommend an encoding line. + try: + chars = chars.encode(encoding) + enc = encoding + except UnicodeError: + chars = BOM_UTF8 + chars.encode("utf-8") + enc = "utf-8" + tkMessageBox.showerror( + "I/O Error", + "Non-ASCII found, yet no encoding declared. Add a line like\n" + "# -*- coding: %s -*- \nto your file" % enc, + master = self.text) + return chars + def fixlastline(self): c = self.text.get("end-2c") if c != '\n': diff --git a/Tools/idle/PyShell.py b/Tools/idle/PyShell.py index cf854d3..31a8940 100644 --- a/Tools/idle/PyShell.py +++ b/Tools/idle/PyShell.py @@ -6,6 +6,7 @@ import string import getopt import re import warnings +import types import linecache from code import InteractiveInterpreter @@ -188,6 +189,9 @@ class ModifiedInterpreter(InteractiveInterpreter): self.more = 0 self.save_warnings_filters = warnings.filters[:] warnings.filterwarnings(action="error", category=SyntaxWarning) + if isinstance(source, types.UnicodeType): + import IOBinding + source = source.encode(IOBinding.encoding) try: return InteractiveInterpreter.runsource(self, source, filename) finally: |