From 7c59bc6f2faefbf8c4ef4692fee1f658aa53c6a6 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 13 Dec 2008 23:20:54 +0000 Subject: Issue #4163: textwrap module: allow word splitting on a hyphen preceded by a non-ASCII letter. --- Lib/test/test_textwrap.py | 8 ++++++++ Lib/textwrap.py | 2 +- Misc/NEWS | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index 1cbd9ce..ffd59c3 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -365,6 +365,14 @@ What a mess! self.assertRaises(ValueError, wrap, text, 0) self.assertRaises(ValueError, wrap, text, -1) + def test_no_split_at_umlaut(self): + text = "Die Empf\xe4nger-Auswahl" + self.check_wrap(text, 13, ["Die", "Empf\xe4nger-", "Auswahl"]) + + def test_umlaut_followed_by_dash(self): + text = "aa \xe4\xe4-\xe4\xe4" + self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"]) + class LongWordTestCase (BaseTestCase): def setUp(self): diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 867b9d9..1f2e9b4 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -76,7 +76,7 @@ class TextWrapper: # (after stripping out empty strings). wordsep_re = re.compile( r'(\s+|' # any whitespace - r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words + r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash # This less funky little regex just split on recognized spaces. E.g. diff --git a/Misc/NEWS b/Misc/NEWS index aaa65c5..e31bb0b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -45,6 +45,9 @@ Core and Builtins Library ------- +- Issue #4163: textwrap module: allow word splitting on a hyphen preceded by + a non-ASCII letter. + - Issue #4616: TarFile.utime(): Restore directory times on Windows. - Issue #4021: tokenize.detect_encoding() now raises a SyntaxError when the -- cgit v0.12