summaryrefslogtreecommitdiffstats
path: root/Lib/textwrap.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/textwrap.py')
-rw-r--r--Lib/textwrap.py23
1 files changed, 19 insertions, 4 deletions
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 1759b0d..05e0306 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -79,10 +79,25 @@ class TextWrapper:
# splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
- wordsep_re = re.compile(
- r'(\s+|' # any whitespace
- r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
- r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
+ word_punct = r'[\w!"\'&.,?]'
+ letter = r'[^\d\W]'
+ wordsep_re = re.compile(r'''
+ ( # any whitespace
+ \s+
+ | # em-dash between words
+ (?<=%(wp)s) -{2,} (?=\w)
+ | # word, possibly hyphenated
+ \S+? (?:
+ # hyphenated word
+ -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
+ (?= %(lt)s -? %(lt)s)
+ | # end of word
+ (?=\s|\Z)
+ | # em-dash
+ (?<=%(wp)s) (?=-{2,}\w)
+ )
+ )''' % {'wp': word_punct, 'lt': letter}, re.VERBOSE)
+ del word_punct, letter
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"