From 975a079794cf1672c2b917f946da25966f75c6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sun, 18 Jan 2009 20:15:42 +0000 Subject: Issue #4008: Fix problems with non-ASCII source files. --- Lib/idlelib/IOBinding.py | 9 +++++---- Lib/idlelib/NEWS.txt | 2 ++ Lib/idlelib/ScriptBinding.py | 22 ++++++++++++++-------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py index 516cb75..71c0163 100644 --- a/Lib/idlelib/IOBinding.py +++ b/Lib/idlelib/IOBinding.py @@ -74,10 +74,11 @@ def coding_spec(data): Raise a LookupError if the encoding is declared but unknown. """ if isinstance(data, bytes): - try: - lines = data.decode('utf-8') - except UnicodeDecodeError: - return None + # This encoding might be wrong. However, the coding + # spec must be ASCII-only, so any non-ASCII characters + # around here will be ignored. Decoding to Latin-1 should + # never fail (except for memory outage) + lines = data.decode('iso-8859-1') else: lines = data # consider only the first two lines diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt index c91a57b..94ab7f7 100644 --- a/Lib/idlelib/NEWS.txt +++ b/Lib/idlelib/NEWS.txt @@ -3,6 +3,8 @@ What's New in IDLE 3.1a1? *Release date: XX-XXX-XXXX* +- Issue #4008: Fix problems with non-ASCII source files. + - Issue #4323: Always encode source as UTF-8 without asking the user (unless a different encoding is declared); remove user configuration of source encoding; all according to diff --git a/Lib/idlelib/ScriptBinding.py b/Lib/idlelib/ScriptBinding.py index 1e98f1e..37b7b51 100644 --- a/Lib/idlelib/ScriptBinding.py +++ b/Lib/idlelib/ScriptBinding.py @@ -24,7 +24,7 @@ import tabnanny import tokenize import tkinter.messagebox as tkMessageBox from idlelib.EditorWindow import EditorWindow -from idlelib import PyShell +from idlelib import PyShell, IOBinding from idlelib.configHandler import idleConf @@ -62,7 +62,13 @@ class ScriptBinding: return 'break' def tabnanny(self, filename): - f = open(filename, 'r') + # XXX: tabnanny should work on binary files as well + with open(filename, 'r', encoding='iso-8859-1') as f: + two_lines = f.readline() + f.readline() + encoding = IOBinding.coding_spec(two_lines) + if not encoding: + encoding = 'utf-8' + f = open(filename, 'r', encoding=encoding) try: tabnanny.process_tokens(tokenize.generate_tokens(f.readline)) except tokenize.TokenError as msg: @@ -82,14 +88,14 @@ class ScriptBinding: self.shell = shell = self.flist.open_shell() saved_stream = shell.get_warning_stream() shell.set_warning_stream(shell.stderr) - f = open(filename, 'r') + f = open(filename, 'rb') source = f.read() f.close() - if '\r' in source: - source = re.sub(r"\r\n", "\n", source) - source = re.sub(r"\r", "\n", source) - if source and source[-1] != '\n': - source = source + '\n' + if b'\r' in source: + source = source.replace(b'\r\n', b'\n') + source = source.replace(b'\r', b'\n') + if source and source[-1] != ord(b'\n'): + source = source + b'\n' editwin = self.editwin text = editwin.text text.tag_remove("ERROR", "1.0", "end") -- cgit v0.12