summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2009-01-18 20:15:42 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2009-01-18 20:15:42 (GMT)
commit975a079794cf1672c2b917f946da25966f75c6b6 (patch)
tree1e5e536be53abddc607e7fdaf87df0b1613a1c0f
parent1cbb17a818a70aacd6cbecd3411a2f91a08b7826 (diff)
downloadcpython-975a079794cf1672c2b917f946da25966f75c6b6.zip
cpython-975a079794cf1672c2b917f946da25966f75c6b6.tar.gz
cpython-975a079794cf1672c2b917f946da25966f75c6b6.tar.bz2
Issue #4008: Fix problems with non-ASCII source files.
-rw-r--r--Lib/idlelib/IOBinding.py9
-rw-r--r--Lib/idlelib/NEWS.txt2
-rw-r--r--Lib/idlelib/ScriptBinding.py22
3 files changed, 21 insertions, 12 deletions
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
index 516cb75..71c0163 100644
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -74,10 +74,11 @@ def coding_spec(data):
Raise a LookupError if the encoding is declared but unknown.
"""
if isinstance(data, bytes):
- try:
- lines = data.decode('utf-8')
- except UnicodeDecodeError:
- return None
+ # This encoding might be wrong. However, the coding
+ # spec must be ASCII-only, so any non-ASCII characters
+ # around here will be ignored. Decoding to Latin-1 should
+ # never fail (except for memory outage)
+ lines = data.decode('iso-8859-1')
else:
lines = data
# consider only the first two lines
diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt
index c91a57b..94ab7f7 100644
--- a/Lib/idlelib/NEWS.txt
+++ b/Lib/idlelib/NEWS.txt
@@ -3,6 +3,8 @@ What's New in IDLE 3.1a1?
*Release date: XX-XXX-XXXX*
+- Issue #4008: Fix problems with non-ASCII source files.
+
- Issue #4323: Always encode source as UTF-8 without asking
the user (unless a different encoding is declared); remove
user configuration of source encoding; all according to
diff --git a/Lib/idlelib/ScriptBinding.py b/Lib/idlelib/ScriptBinding.py
index 1e98f1e..37b7b51 100644
--- a/Lib/idlelib/ScriptBinding.py
+++ b/Lib/idlelib/ScriptBinding.py
@@ -24,7 +24,7 @@ import tabnanny
import tokenize
import tkinter.messagebox as tkMessageBox
from idlelib.EditorWindow import EditorWindow
-from idlelib import PyShell
+from idlelib import PyShell, IOBinding
from idlelib.configHandler import idleConf
@@ -62,7 +62,13 @@ class ScriptBinding:
return 'break'
def tabnanny(self, filename):
- f = open(filename, 'r')
+ # XXX: tabnanny should work on binary files as well
+ with open(filename, 'r', encoding='iso-8859-1') as f:
+ two_lines = f.readline() + f.readline()
+ encoding = IOBinding.coding_spec(two_lines)
+ if not encoding:
+ encoding = 'utf-8'
+ f = open(filename, 'r', encoding=encoding)
try:
tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
except tokenize.TokenError as msg:
@@ -82,14 +88,14 @@ class ScriptBinding:
self.shell = shell = self.flist.open_shell()
saved_stream = shell.get_warning_stream()
shell.set_warning_stream(shell.stderr)
- f = open(filename, 'r')
+ f = open(filename, 'rb')
source = f.read()
f.close()
- if '\r' in source:
- source = re.sub(r"\r\n", "\n", source)
- source = re.sub(r"\r", "\n", source)
- if source and source[-1] != '\n':
- source = source + '\n'
+ if b'\r' in source:
+ source = source.replace(b'\r\n', b'\n')
+ source = source.replace(b'\r', b'\n')
+ if source and source[-1] != ord(b'\n'):
+ source = source + b'\n'
editwin = self.editwin
text = editwin.text
text.tag_remove("ERROR", "1.0", "end")