diff options
author | Greg Ward <gward@python.net> | 2002-12-12 17:24:35 (GMT) |
---|---|---|
committer | Greg Ward <gward@python.net> | 2002-12-12 17:24:35 (GMT) |
commit | afd44de812d5243beadffaaa2c8d11cc8cdaf8ad (patch) | |
tree | 24440eacf02e909bad919d096c119d9eb7d1d193 /Lib | |
parent | b5bfb9f38c786c3330b2d52d93b664588c42283d (diff) | |
download | cpython-afd44de812d5243beadffaaa2c8d11cc8cdaf8ad.zip cpython-afd44de812d5243beadffaaa2c8d11cc8cdaf8ad.tar.gz cpython-afd44de812d5243beadffaaa2c8d11cc8cdaf8ad.tar.bz2 |
Hardcode the recognized whitespace characters to the US-ASCII whitespace
chars. See the comment for rationale.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/textwrap.py | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py index be85878..de07c8d 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -12,6 +12,16 @@ __revision__ = "$Id$" import string, re +# Hardcode the recognized whitespace characters to the US-ASCII +# whitespace characters. The main reason for doing this is that in +# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales +# that character winds up in string.whitespace. Respecting +# string.whitespace in those cases would 1) make textwrap treat 0xa0 the +# same as any other whitespace char, which is clearly wrong (it's a +# *non-breaking* space), 2) possibly cause problems with Unicode, +# since 0xa0 is not in range(128). +whitespace = '\t\n\x0b\x0c\r ' + class TextWrapper: """ Object for wrapping/filling text. The public interface consists of @@ -48,12 +58,11 @@ class TextWrapper: be broken, and some lines might be longer than 'width'. """ - whitespace_trans = string.maketrans(string.whitespace, - ' ' * len(string.whitespace)) + whitespace_trans = string.maketrans(whitespace, ' ' * len(whitespace)) unicode_whitespace_trans = {} uspace = ord(u' ') - for x in map(ord, string.whitespace): + for x in map(ord, whitespace): unicode_whitespace_trans[x] = uspace # This funky little regex is just the trick for splitting |