summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Ward <gward@python.net>2002-12-12 17:24:35 (GMT)
committerGreg Ward <gward@python.net>2002-12-12 17:24:35 (GMT)
commitafd44de812d5243beadffaaa2c8d11cc8cdaf8ad (patch)
tree24440eacf02e909bad919d096c119d9eb7d1d193
parentb5bfb9f38c786c3330b2d52d93b664588c42283d (diff)
downloadcpython-afd44de812d5243beadffaaa2c8d11cc8cdaf8ad.zip
cpython-afd44de812d5243beadffaaa2c8d11cc8cdaf8ad.tar.gz
cpython-afd44de812d5243beadffaaa2c8d11cc8cdaf8ad.tar.bz2
Hardcode the recognized whitespace characters to the US-ASCII whitespace
chars. See the comment for rationale.
-rw-r--r--Lib/textwrap.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index be85878..de07c8d 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -12,6 +12,16 @@ __revision__ = "$Id$"
import string, re
+# Hardcode the recognized whitespace characters to the US-ASCII
+# whitespace characters. The main reason for doing this is that in
+# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
+# that character winds up in string.whitespace. Respecting
+# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
+# same as any other whitespace char, which is clearly wrong (it's a
+# *non-breaking* space), 2) possibly cause problems with Unicode,
+# since 0xa0 is not in range(128).
+whitespace = '\t\n\x0b\x0c\r '
+
class TextWrapper:
"""
Object for wrapping/filling text. The public interface consists of
@@ -48,12 +58,11 @@ class TextWrapper:
be broken, and some lines might be longer than 'width'.
"""
- whitespace_trans = string.maketrans(string.whitespace,
- ' ' * len(string.whitespace))
+ whitespace_trans = string.maketrans(whitespace, ' ' * len(whitespace))
unicode_whitespace_trans = {}
uspace = ord(u' ')
- for x in map(ord, string.whitespace):
+ for x in map(ord, whitespace):
unicode_whitespace_trans[x] = uspace
# This funky little regex is just the trick for splitting