summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGreg Ward <gward@python.net>2004-06-03 01:59:41 (GMT)
committerGreg Ward <gward@python.net>2004-06-03 01:59:41 (GMT)
commit6186410db06f28b3032a882d5ba7a2e3dcf95a80 (patch)
tree9ddf7a949974f652fe6012007d0650312634b51b /Lib
parent29eb8c315d23fd6a36a22be50ad4c1a229f94342 (diff)
downloadcpython-6186410db06f28b3032a882d5ba7a2e3dcf95a80.zip
cpython-6186410db06f28b3032a882d5ba7a2e3dcf95a80.tar.gz
cpython-6186410db06f28b3032a882d5ba7a2e3dcf95a80.tar.bz2
SF #965425: fix so hyphenated words surrounded by punctuation are
wrapped correctly.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_textwrap.py19
-rw-r--r--Lib/textwrap.py6
2 files changed, 21 insertions, 4 deletions
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index 5ff4bcc..8c7279d 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -1,5 +1,5 @@
#
-# Test script for the textwrap module.
+# Test suite for the textwrap module.
#
# Original tests written by Greg Ward <gward@python.net>.
# Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
@@ -271,6 +271,23 @@ What a mess!
self.check_split("foo --option-opt bar",
["foo", " ", "--option-", "opt", " ", "bar"])
+ def test_punct_hyphens(self):
+ # Oh bother, SF #965425 found another problem with hyphens --
+ # hyphenated words in single quotes weren't handled correctly.
+ # In fact, the bug is that *any* punctuation around a hyphenated
+ # word was handled incorrectly, except for a leading "--", which
+ # was special-cased for Optik and Docutils. So test a variety
+ # of styles of punctuation around a hyphenated word.
+ # (Actually this is based on an Optik bug report, #813077).
+ self.check_split("the 'wibble-wobble' widget",
+ ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
+ self.check_split('the "wibble-wobble" widget',
+ ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
+ self.check_split("the (wibble-wobble) widget",
+ ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
+ self.check_split("the ['wibble-wobble'] widget",
+ ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
+
def test_funky_parens (self):
# Second part of SF bug #596434: long option strings inside
# parentheses.
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index d9df019..32ab10b 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -79,11 +79,11 @@ class TextWrapper:
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
wordsep_re = re.compile(r'(\s+|' # any whitespace
- r'-*\w{2,}-(?=\w{2,})|' # hyphenated words
+ r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
- # XXX will there be a locale-or-charset-aware version of
- # string.lowercase in 2.3?
+ # XXX this is not locale- or charset-aware -- string.lowercase
+ # is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[%s]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote