summaryrefslogtreecommitdiffstats
path: root/Tools/idle/IOBinding.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/idle/IOBinding.py')
-rw-r--r--Tools/idle/IOBinding.py185
1 files changed, 184 insertions, 1 deletions
diff --git a/Tools/idle/IOBinding.py b/Tools/idle/IOBinding.py
index 925015f..0ea4524 100644
--- a/Tools/idle/IOBinding.py
+++ b/Tools/idle/IOBinding.py
@@ -1,4 +1,8 @@
import os
+import types
+import sys
+import codecs
+import re
import tempfile
import tkFileDialog
import tkMessageBox
@@ -24,6 +28,71 @@ from IdleConf import idleconf
#$ win <Control-p>
#$ unix <Control-x><Control-p>
+try:
+ from codecs import BOM_UTF8
+except ImportError:
+ # only available since Python 2.3
+ BOM_UTF8 = '\xef\xbb\xbf'
+
+# Try setting the locale, so that we can find out
+# what encoding to use
+try:
+ import locale
+ locale.setlocale(locale.LC_CTYPE, "")
+except ImportError:
+ pass
+
+encoding = "ascii"
+if sys.platform == 'win32':
+ # On Windows, we could use "mbcs". However, to give the user
+ # a portable encoding name, we need to find the code page
+ try:
+ encoding = locale.getdefaultlocale()[1]
+ codecs.lookup(encoding)
+ except LookupError:
+ pass
+else:
+ try:
+ # Different things can fail here: the locale module may not be
+ # loaded, it may not offer nl_langinfo, or CODESET, or the
+ # resulting codeset may be unknown to Python. We ignore all
+ # these problems, falling back to ASCII
+ encoding = locale.nl_langinfo(locale.CODESET)
+ codecs.lookup(encoding)
+ except (NameError, AttributeError, LookupError):
+ # Try getdefaultlocale well: it parses environment variables,
+ # which may give a clue. Unfortunately, getdefaultlocale has
+ # bugs that can cause ValueError.
+ try:
+ encoding = locale.getdefaultlocale()[1]
+ codecs.lookup(encoding)
+ except (ValueError, LookupError):
+ pass
+
+encoding = encoding.lower()
+
+coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
+def coding_spec(str):
+
+ """Return the encoding declaration according to PEP 263.
+ Raise LookupError if the encoding is declared but unknown."""
+
+ # Only consider the first two lines
+ str = str.split("\n")[:2]
+ str = "\n".join(str)
+
+ match = coding_re.search(str)
+ if not match:
+ return None
+ name = match.group(1)
+ # Check whether the encoding is known
+ import codecs
+ try:
+ codecs.lookup(name)
+ except LookupError:
+ # The standard encoding error does not indicate the encoding
+ raise LookupError, "Unknown encoding "+name
+ return name
class IOBinding:
@@ -37,6 +106,7 @@ class IOBinding:
self.__id_savecopy = self.text.bind("<<save-copy-of-window-as-file>>",
self.save_a_copy)
self.__id_print = self.text.bind("<<print-window>>", self.print_window)
+ self.fileencoding = None
def close(self):
# Undo command bindings
@@ -101,6 +171,9 @@ class IOBinding:
except IOError, msg:
tkMessageBox.showerror("I/O Error", str(msg), master=self.text)
return False
+
+ chars = self.decode(chars)
+
self.text.delete("1.0", "end")
self.set_filename(None)
self.text.insert("1.0", chars)
@@ -110,6 +183,54 @@ class IOBinding:
self.text.see("insert")
return True
+ def decode(self, chars):
+ # Try to create a Unicode string. If that fails, let Tcl try
+ # its best
+
+ # Check presence of a UTF-8 signature first
+ if chars.startswith(BOM_UTF8):
+ try:
+ chars = chars[3:].decode("utf-8")
+ except UnicodeError:
+ # has UTF-8 signature, but fails to decode...
+ return chars
+ else:
+ # Indicates that this file originally had a BOM
+ self.fileencoding = BOM_UTF8
+ return chars
+
+ # Next look for coding specification
+ try:
+ enc = coding_spec(chars)
+ except LookupError, name:
+ tkMessageBox.showerror(
+ title="Error loading the file",
+ message="The encoding '%s' is not known to this Python "\
+ "installation. The file may not display correctly" % name,
+ master = self.text)
+ enc = None
+
+ if enc:
+ try:
+ return unicode(chars, enc)
+ except UnicodeError:
+ pass
+
+ # If it is ASCII, we need not to record anything
+ try:
+ return unicode(chars, 'ascii')
+ except UnicodeError:
+ pass
+
+ # Finally, try the locale's encoding. This is deprecated;
+ # the user should declare a non-ASCII encoding
+ try:
+ chars = unicode(chars, encoding)
+ self.fileencoding = encoding
+ except UnicodeError:
+ pass
+ return chars
+
def maybesave(self):
if self.get_saved():
return "yes"
@@ -180,7 +301,7 @@ class IOBinding:
def writefile(self, filename):
self.fixlastline()
- chars = str(self.text.get("1.0", "end-1c"))
+ chars = self.encode(self.text.get("1.0", "end-1c"))
try:
f = open(filename, "w")
f.write(chars)
@@ -192,6 +313,68 @@ class IOBinding:
master=self.text)
return False
+ def encode(self, chars):
+ if isinstance(chars, types.StringType):
+ # This is either plain ASCII, or Tk was returning mixed-encoding
+ # text to us. Don't try to guess further.
+ return chars
+
+ # See whether there is anything non-ASCII in it.
+ # If not, no need to figure out the encoding.
+ try:
+ return chars.encode('ascii')
+ except UnicodeError:
+ pass
+
+ # If there is an encoding declared, try this first.
+ try:
+ enc = coding_spec(chars)
+ failed = None
+ except LookupError, msg:
+ failed = msg
+ enc = None
+ if enc:
+ try:
+ return chars.encode(enc)
+ except UnicodeError:
+ failed = "Invalid encoding '%s'" % enc
+
+ if failed:
+ tkMessageBox.showerror(
+ "I/O Error",
+ "%s. Saving as UTF-8" % failed,
+ master = self.text)
+
+ # If there was a UTF-8 signature, use that. This should not fail
+ if self.fileencoding == BOM_UTF8 or failed:
+ return BOM_UTF8 + chars.encode("utf-8")
+
+ # Try the original file encoding next, if any
+ if self.fileencoding:
+ try:
+ return chars.encode(self.fileencoding)
+ except UnicodeError:
+ tkMessageBox.showerror(
+ "I/O Error",
+ "Cannot save this as '%s' anymore. Saving as UTF-8" % self.fileencoding,
+ master = self.text)
+ return BOM_UTF8 + chars.encode("utf-8")
+
+ # Nothing was declared, and we had not determined an encoding
+ # on loading. Recommend an encoding line.
+ try:
+ chars = chars.encode(encoding)
+ enc = encoding
+ except UnicodeError:
+ chars = BOM_UTF8 + chars.encode("utf-8")
+ enc = "utf-8"
+ tkMessageBox.showerror(
+ "I/O Error",
+ "Non-ASCII found, yet no encoding declared. Add a line like\n"
+ "# -*- coding: %s -*- \nto your file" % enc,
+ master = self.text)
+ return chars
+
def fixlastline(self):
c = self.text.get("end-2c")
if c != '\n':