summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/textwrap.rst18
-rw-r--r--Lib/test/test_textwrap.py8
-rw-r--r--Lib/textwrap.py23
-rw-r--r--Misc/NEWS2
4 files changed, 48 insertions, 3 deletions
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index 7a52eb9b..a2db567 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -41,6 +41,10 @@ instance and calling a single method on it. That instance is not reused, so for
applications that wrap/fill many text strings, it will be more efficient for you
to create your own :class:`TextWrapper` object.
+Text is preferably wrapped on whitespaces and right after the hyphens in
+hyphenated words; only then will long words be broken if necessary, unless
+:attr:`TextWrapper.break_long_words` is set to false.
+
An additional utility function, :func:`dedent`, is provided to remove
indentation from strings that have unwanted whitespace to the left of the text.
@@ -174,10 +178,22 @@ indentation from strings that have unwanted whitespace to the left of the text.
than :attr:`width`. (Long words will be put on a line by themselves, in
order to minimize the amount by which :attr:`width` is exceeded.)
+
+ .. attribute:: break_on_hyphens
+
+ (default: ``True``) If true, wrapping will occur preferably on whitespaces
+ and right after hyphens in compound words, as it is customary in English.
+ If false, only whitespaces will be considered as potentially good places
+ for line breaks, but you need to set :attr:`break_long_words` to false if
+ you want truly insecable words. Default behaviour in previous versions
+ was to always allow breaking hyphenated words.
+
+ .. versionadded:: 2.6
+
+
:class:`TextWrapper` also provides two public methods, analogous to the
module-level convenience functions:
-
.. method:: wrap(text)
Wraps the single paragraph in *text* (a string) so every line is at most
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index c1c09f6..1eab90c 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -364,6 +364,14 @@ What a mess!
["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
"ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
+ def test_break_on_hyphens(self):
+ # Ensure that the break_on_hyphens attributes work
+ text = "yaba daba-doo"
+ self.check_wrap(text, 10, ["yaba daba-", "doo"],
+ break_on_hyphens=True)
+ self.check_wrap(text, 10, ["yaba", "daba-doo"],
+ break_on_hyphens=False)
+
def test_bad_width(self):
# Ensure that width <= 0 is caught.
text = "Whatever, it doesn't matter."
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index ffbb9d1..7cd0597 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -63,6 +63,10 @@ class TextWrapper:
break_long_words (default: true)
Break words longer than 'width'. If false, those words will not
be broken, and some lines might be longer than 'width'.
+ break_on_hyphens (default: true)
+ Allow breaking hyphenated words. If true, wrapping will occur
+ preferably on whitespaces and right after hyphens part of
+ compound words.
drop_whitespace (default: true)
Drop leading and trailing whitespace from lines.
"""
@@ -85,6 +89,12 @@ class TextWrapper:
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
+ # This less funky little regex just split on recognized spaces. E.g.
+ # "Hello there -- you goof-ball, use the -b option!"
+ # splits into
+ # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
+ wordsep_simple_re = re.compile(r'(\s+)')
+
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[%s]' # lowercase letter
@@ -102,7 +112,8 @@ class TextWrapper:
replace_whitespace=True,
fix_sentence_endings=False,
break_long_words=True,
- drop_whitespace=True):
+ drop_whitespace=True,
+ break_on_hyphens=True):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
@@ -111,6 +122,7 @@ class TextWrapper:
self.fix_sentence_endings = fix_sentence_endings
self.break_long_words = break_long_words
self.drop_whitespace = drop_whitespace
+ self.break_on_hyphens = break_on_hyphens
# -- Private methods -----------------------------------------------
@@ -143,8 +155,15 @@ class TextWrapper:
breaks into the following chunks:
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
+ if break_on_hyphens is True, or in:
+ 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
+ 'use', ' ', 'the', ' ', '-b', ' ', option!'
+ otherwise.
"""
- chunks = self.wordsep_re.split(text)
+ if self.break_on_hyphens is True:
+ chunks = self.wordsep_re.split(text)
+ else:
+ chunks = self.wordsep_simple_re.split(text)
chunks = filter(None, chunks) # remove empty chunks
return chunks
diff --git a/Misc/NEWS b/Misc/NEWS
index d796735..9007462 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,8 @@ Extension Modules
Library
-------
+- #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class.
+
- The mhlib module has been deprecated for removal in Python 3.0.
- The linuxaudiodev module has been deprecated for removal in Python 3.0.