summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2008-12-13 23:20:54 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2008-12-13 23:20:54 (GMT)
commit7c59bc6f2faefbf8c4ef4692fee1f658aa53c6a6 (patch)
tree192915cee35e899422ec26ad83e34d35c198ae5b
parentd9c03e0da300bc52d5503fd18a95f897d92bf454 (diff)
downloadcpython-7c59bc6f2faefbf8c4ef4692fee1f658aa53c6a6.zip
cpython-7c59bc6f2faefbf8c4ef4692fee1f658aa53c6a6.tar.gz
cpython-7c59bc6f2faefbf8c4ef4692fee1f658aa53c6a6.tar.bz2
Issue #4163: textwrap module: allow word splitting on a hyphen preceded by a non-ASCII letter.
-rw-r--r--Lib/test/test_textwrap.py8
-rw-r--r--Lib/textwrap.py2
-rw-r--r--Misc/NEWS3
3 files changed, 12 insertions, 1 deletions
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 1cbd9ce..ffd59c3 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -365,6 +365,14 @@ What a mess!
self.assertRaises(ValueError, wrap, text, 0)
self.assertRaises(ValueError, wrap, text, -1)
+ def test_no_split_at_umlaut(self):
+ text = "Die Empf\xe4nger-Auswahl"
+ self.check_wrap(text, 13, ["Die", "Empf\xe4nger-", "Auswahl"])
+
+ def test_umlaut_followed_by_dash(self):
+ text = "aa \xe4\xe4-\xe4\xe4"
+ self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"])
+
class LongWordTestCase (BaseTestCase):
def setUp(self):
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 867b9d9..1f2e9b4 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -76,7 +76,7 @@ class TextWrapper:
# (after stripping out empty strings).
wordsep_re = re.compile(
r'(\s+|' # any whitespace
- r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
+ r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
# This less funky little regex just split on recognized spaces. E.g.
diff --git a/Misc/NEWS b/Misc/NEWS
index aaa65c5..e31bb0b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -45,6 +45,9 @@ Core and Builtins
Library
-------
+- Issue #4163: textwrap module: allow word splitting on a hyphen preceded by
+ a non-ASCII letter.
+
- Issue #4616: TarFile.utime(): Restore directory times on Windows.
- Issue #4021: tokenize.detect_encoding() now raises a SyntaxError when the